- ...or maybe even just coworkers. Ya know the ones who like to {likeTo()}?{' '}
+ {titlePrefix()} better with friends ... or maybe even just coworkers. Ya know the ones who like to{' '}
+ {likeTo()}?{' '}
{preflight?.email_service_available && (
Enter their email below and we'll send them a custom invite link. Invites expire after 3
diff --git a/frontend/src/scenes/onboarding/OnboardingProductConfiguration.tsx b/frontend/src/scenes/onboarding/OnboardingProductConfiguration.tsx
index 7899d14ee5c0b..8698b6bf64dbd 100644
--- a/frontend/src/scenes/onboarding/OnboardingProductConfiguration.tsx
+++ b/frontend/src/scenes/onboarding/OnboardingProductConfiguration.tsx
@@ -1,11 +1,44 @@
import { LemonDivider, LemonSelect, LemonSwitch } from '@posthog/lemon-ui'
import { useActions, useValues } from 'kea'
-import React, { useEffect } from 'react'
+import React, { useEffect, useRef } from 'react'
+import { pluginsLogic } from 'scenes/plugins/pluginsLogic'
import { OnboardingStepKey } from './onboardingLogic'
import { onboardingProductConfigurationLogic, ProductConfigOption } from './onboardingProductConfigurationLogic'
import { OnboardingStep } from './OnboardingStep'
+type ConfigType = 'toggle' | 'select'
+type PluginType = 'plugin'
+type ConfigOption =
+ | {
+ title: string
+ description?: string
+ type: ConfigType
+ selectOptions?: { label: string; value: string | number }[]
+ value: boolean | string | number
+ onChange: (newValue: boolean | string | number) => void
+ }
+ | {
+ title: string
+ description?: string
+ type: PluginType
+ value: boolean
+ onChange: (newValue: boolean) => void
+ }
+
+interface PluginContent {
+ title: string
+ description: string
+}
+type PluginContentMapping = Record
+const pluginContentMapping: PluginContentMapping = {
+ GeoIP: {
+ title: 'Capture location information',
+ description:
+ 'Enrich PostHog events and persons with IP location data. This is useful for understanding where your users are coming from. This setting can be found under the data pipelines apps.',
+ },
+}
+
export const OnboardingProductConfiguration = ({
stepKey = OnboardingStepKey.PRODUCT_CONFIGURATION,
options,
@@ -14,53 +47,95 @@ export const OnboardingProductConfiguration = ({
options: ProductConfigOption[]
}): JSX.Element | null => {
const { configOptions } = useValues(onboardingProductConfigurationLogic)
+ const { defaultEnabledPlugins } = useValues(pluginsLogic)
const { setConfigOptions, saveConfiguration } = useActions(onboardingProductConfigurationLogic)
+ const { toggleEnabled } = useActions(pluginsLogic)
+
+ const configOptionsRef = useRef(configOptions)
+
+ useEffect(() => {
+ configOptionsRef.current = configOptions
+ }, [configOptions])
+
useEffect(() => {
setConfigOptions(options)
}, [])
- return configOptions ? (
+ const combinedList: ConfigOption[] = [
+ ...configOptions.map((option) => ({
+ title: option.title,
+ description: option.description,
+ type: option.type as ConfigType,
+ selectOptions: option.selectOptions,
+ value: option.value,
+ onChange: (newValue: boolean | string | number) => {
+ // Use the current value from the ref to ensure that onChange always accesses
+ // the latest state of configOptions, preventing the closure from using stale data.
+ const updatedConfigOptions = configOptionsRef.current.map((o) =>
+ o.teamProperty === option.teamProperty ? { ...o, value: newValue } : o
+ )
+ setConfigOptions(updatedConfigOptions)
+ },
+ })),
+ ...defaultEnabledPlugins.map((plugin) => {
+ const pluginContent = pluginContentMapping[plugin.name]
+ return {
+ title: pluginContent?.title || plugin.name,
+ description: pluginContent?.description || plugin.description,
+ type: 'plugin' as PluginType,
+ value: plugin.pluginConfig?.enabled || false,
+ onChange: (newValue: boolean) => {
+ toggleEnabled({
+ id: plugin.pluginConfig?.id,
+ enabled: newValue,
+ })
+ },
+ }
+ }),
+ ]
+
+ return combinedList.length > 0 ? (
- Options
- {configOptions?.map((option: ProductConfigOption, idx) => (
-
-
-
-
- {option.type == 'toggle' ? (
-
{
- setConfigOptions(
- configOptions.map((o) =>
- o.teamProperty === option.teamProperty ? { ...o, value: checked } : o
- )
- )
- }}
- className="self-center"
- fullWidth={true}
- checked={option.value || false}
- />
- ) : (
-
-
{
- setConfigOptions(
- configOptions.map((o) =>
- o.teamProperty === option.teamProperty ? { ...o, value: v } : o
- )
- )
- }}
- options={option.selectOptions || []}
- value={option.value}
- />
+
+
Options
+ {combinedList.map((item, idx) => (
+
+
+
+
+
+
{item.description}
+
+
+ {item.type === 'toggle' ? (
+
+ ) : item.type === 'plugin' ? (
+
+ ) : (
+
+
+
+ )}
- )}
-
{option.description}
-
-
- ))}
+
+
+ ))}
+
) : null
}
diff --git a/frontend/src/scenes/onboarding/OnboardingProductIntroduction.tsx b/frontend/src/scenes/onboarding/OnboardingProductIntroduction.tsx
index 03eaec32cd9b8..5402f8fe7d728 100644
--- a/frontend/src/scenes/onboarding/OnboardingProductIntroduction.tsx
+++ b/frontend/src/scenes/onboarding/OnboardingProductIntroduction.tsx
@@ -8,6 +8,7 @@ import React from 'react'
import { convertLargeNumberToWords } from 'scenes/billing/billing-utils'
import { billingProductLogic } from 'scenes/billing/billingProductLogic'
import { ProductPricingModal } from 'scenes/billing/ProductPricingModal'
+import { preflightLogic } from 'scenes/PreflightCheck/preflightLogic'
import { getProductIcon } from 'scenes/products/Products'
import { userLogic } from 'scenes/userLogic'
@@ -149,8 +150,8 @@ const PricingSection = ({ product }: { product: BillingProductV2Type }): JSX.Ele
Or, stay on our generous free plan if you'd like - you still get{' '}
{convertLargeNumberToWords(
- currentAndUpgradePlans.currentPlan.free_allocation ||
- currentAndUpgradePlans.downgradePlan.free_allocation ||
+ currentAndUpgradePlans.currentPlan?.free_allocation ||
+ currentAndUpgradePlans.downgradePlan?.free_allocation ||
0,
null
)}{' '}
@@ -183,6 +184,7 @@ const PricingSection = ({ product }: { product: BillingProductV2Type }): JSX.Ele
export function OnboardingProductIntroduction({ stepKey }: { stepKey: OnboardingStepKey }): JSX.Element | null {
const { product } = useValues(onboardingLogic)
+ const { isCloudOrDev } = useValues(preflightLogic)
const websiteSlug: Partial> = {
[ProductKey.SESSION_REPLAY]: 'session-replay',
[ProductKey.FEATURE_FLAGS]: 'feature-flags',
@@ -198,7 +200,9 @@ export function OnboardingProductIntroduction({ stepKey }: { stepKey: Onboarding
-
-
-
+ {isCloudOrDev && (
+
-
+ )}
diff --git a/frontend/src/scenes/onboarding/OnboardingStep.tsx b/frontend/src/scenes/onboarding/OnboardingStep.tsx
index 9a380cc5f678a..c4cd544e7a38d 100644
--- a/frontend/src/scenes/onboarding/OnboardingStep.tsx
+++ b/frontend/src/scenes/onboarding/OnboardingStep.tsx
@@ -41,15 +41,11 @@ export const OnboardingStep = ({
return (
<>
-
-
- {title || stepKeyToTitle(currentOnboardingStep?.props.stepKey)}
-
-
+
+
{onboardingStepKeys.map((stepName, idx) => {
return (
@@ -76,6 +72,9 @@ export const OnboardingStep = ({
)
})}
+
+ {title || stepKeyToTitle(currentOnboardingStep?.props.stepKey)}
+
diff --git a/frontend/src/scenes/onboarding/onboardingLogic.tsx b/frontend/src/scenes/onboarding/onboardingLogic.tsx
index bb1c625f78a38..f6b597b74414a 100644
--- a/frontend/src/scenes/onboarding/onboardingLogic.tsx
+++ b/frontend/src/scenes/onboarding/onboardingLogic.tsx
@@ -4,6 +4,7 @@ import { FEATURE_FLAGS } from 'lib/constants'
import { featureFlagLogic, FeatureFlagsSet } from 'lib/logic/featureFlagLogic'
import { eventUsageLogic } from 'lib/utils/eventUsageLogic'
import { billingLogic } from 'scenes/billing/billingLogic'
+import { preflightLogic } from 'scenes/PreflightCheck/preflightLogic'
import { Scene } from 'scenes/sceneTypes'
import { teamLogic } from 'scenes/teamLogic'
import { urls } from 'scenes/urls'
@@ -92,6 +93,8 @@ export const onboardingLogic = kea([
['featureFlags'],
userLogic,
['user'],
+ preflightLogic,
+ ['isCloudOrDev'],
],
actions: [billingLogic, ['loadBillingSuccess'], teamLogic, ['updateCurrentTeam', 'updateCurrentTeamSuccess']],
}),
@@ -200,8 +203,11 @@ export const onboardingLogic = kea([
},
],
shouldShowBillingStep: [
- (s) => [s.product, s.subscribedDuringOnboarding],
- (product: BillingProductV2Type | null, subscribedDuringOnboarding: boolean) => {
+ (s) => [s.product, s.subscribedDuringOnboarding, s.isCloudOrDev],
+ (product: BillingProductV2Type | null, subscribedDuringOnboarding: boolean, isCloudOrDev) => {
+ if (!isCloudOrDev) {
+ return false
+ }
const hasAllAddons = product?.addons?.every((addon) => addon.subscribed)
return !product?.subscribed || !hasAllAddons || subscribedDuringOnboarding
},
diff --git a/frontend/src/scenes/onboarding/sdks/SDKSnippet.tsx b/frontend/src/scenes/onboarding/sdks/SDKSnippet.tsx
index 1a3a90c87a3eb..094d5d31497fc 100644
--- a/frontend/src/scenes/onboarding/sdks/SDKSnippet.tsx
+++ b/frontend/src/scenes/onboarding/sdks/SDKSnippet.tsx
@@ -11,7 +11,7 @@ export const SDKSnippet = ({ sdk, sdkInstructions }: { sdk: SDK; sdkInstructions
Read the docs
- {sdkInstructions()}
+
{sdkInstructions()}
)
}
diff --git a/frontend/src/scenes/onboarding/sdks/SDKs.tsx b/frontend/src/scenes/onboarding/sdks/SDKs.tsx
index 89b424fd9bae7..d885e94ea6fcf 100644
--- a/frontend/src/scenes/onboarding/sdks/SDKs.tsx
+++ b/frontend/src/scenes/onboarding/sdks/SDKs.tsx
@@ -98,7 +98,7 @@ export function SDKs({
}
>
-
+
setFilter({ startPoint: pageUrl(node) })
- const setAsPathEnd = (): void => setFilter({ endPoint: pageUrl(node) })
+ const nodeName = pageUrl(node)
+ const isPath = nodeName.includes('/')
+
+ const setAsPathStart = (): void => setFilter({ startPoint: nodeName })
+ const setAsPathEnd = (): void => setFilter({ endPoint: nodeName })
const excludePathItem = (): void => {
setFilter({ excludeEvents: [...(filter.excludeEvents || []), pageUrl(node, false)] })
}
@@ -42,15 +45,17 @@ export function PathNodeCardButton({
viewPathToFunnel(node)
}
const copyName = (): void => {
- void copyToClipboard(pageUrl(node)).then(captureException)
+ void copyToClipboard(nodeName).then(captureException)
}
const openModal = (): void => openPersonsModal({ path_end_key: name })
+ const isTruncatedPath = name.slice(1) === '_...'
+
return (
-
+
{`0${name[0]}`}
- {pageUrl(node, true)}
+ {pageUrl(node, isPath)}
@@ -59,6 +64,11 @@ export function PathNodeCardButton({
}
+ disabledReason={
+ isTruncatedPath
+ ? 'Multiple paths truncated and combined for efficiency during querying. No further analysis possible.'
+ : undefined
+ }
dropdown={{
overlay: (
<>
diff --git a/frontend/src/scenes/persons/PersonsTable.tsx b/frontend/src/scenes/persons/PersonsTable.tsx
deleted file mode 100644
index 05ecbccc5233b..0000000000000
--- a/frontend/src/scenes/persons/PersonsTable.tsx
+++ /dev/null
@@ -1,134 +0,0 @@
-import { IconTrash } from '@posthog/icons'
-import { LemonButton } from '@posthog/lemon-ui'
-import { useActions } from 'kea'
-import { CopyToClipboardInline } from 'lib/components/CopyToClipboard'
-import { PropertiesTable } from 'lib/components/PropertiesTable'
-import { TZLabel } from 'lib/components/TZLabel'
-import { LemonTable, LemonTableColumn, LemonTableColumns } from 'lib/lemon-ui/LemonTable'
-import { PersonDeleteModal } from 'scenes/persons/PersonDeleteModal'
-import { personDeleteModalLogic } from 'scenes/persons/personDeleteModalLogic'
-import { personsLogic } from 'scenes/persons/personsLogic'
-
-import { PersonType, PropertyDefinitionType } from '~/types'
-
-import { PersonDisplay } from './PersonDisplay'
-
-interface PersonsTableType {
- people: PersonType[]
- loading?: boolean
- hasPrevious?: boolean
- hasNext?: boolean
- loadPrevious?: () => void
- loadNext?: () => void
- compact?: boolean
- extraColumns?: LemonTableColumns
- emptyState?: JSX.Element
-}
-
-export function PersonsTable({
- people,
- loading = false,
- hasPrevious,
- hasNext,
- loadPrevious,
- loadNext,
- compact,
- extraColumns,
- emptyState,
-}: PersonsTableType): JSX.Element {
- const { showPersonDeleteModal } = useActions(personDeleteModalLogic)
- const { loadPersons } = useActions(personsLogic)
-
- const columns: LemonTableColumns = [
- {
- title: 'Person',
- key: 'person',
- render: function Render(_, person: PersonType) {
- return
- },
- },
- ...(!compact
- ? ([
- {
- title: 'ID',
- key: 'id',
- render: function Render(_, person: PersonType) {
- return (
-
- {person.distinct_ids.length && (
-
- {person.distinct_ids[0]}
-
- )}
-
- )
- },
- },
- {
- title: 'First seen',
- dataIndex: 'created_at',
- render: function Render(created_at: PersonType['created_at']) {
- return created_at ? : <>>
- },
- },
- {
- render: function Render(_, person: PersonType) {
- return (
- showPersonDeleteModal(person, () => loadPersons())}
- icon={}
- status="danger"
- size="small"
- />
- )
- },
- },
- ] as Array>)
- : []),
- ...(extraColumns || []),
- ]
-
- return (
- <>
- {
- loadNext?.()
- window.scrollTo(0, 0)
- }
- : undefined,
- onBackward: hasPrevious
- ? () => {
- loadPrevious?.()
- window.scrollTo(0, 0)
- }
- : undefined,
- }}
- expandable={{
- expandedRowRender: function RenderPropertiesTable({ properties }) {
- return Object.keys(properties).length ? (
-
- ) : (
- 'This person has no properties.'
- )
- },
- }}
- dataSource={people}
- emptyState={emptyState ? emptyState : 'No persons'}
- nouns={['person', 'persons']}
- />
-
- >
- )
-}
diff --git a/frontend/src/scenes/plugins/edit/PluginDrawer.tsx b/frontend/src/scenes/plugins/edit/PluginDrawer.tsx
index 2543867ce3c28..6313663808f93 100644
--- a/frontend/src/scenes/plugins/edit/PluginDrawer.tsx
+++ b/frontend/src/scenes/plugins/edit/PluginDrawer.tsx
@@ -118,7 +118,7 @@ export function PluginDrawer(): JSX.Element {
{endWithPunctation(editingPlugin.description)}
-
+
{editingPlugin.url && (
diff --git a/frontend/src/scenes/plugins/edit/UploadField.tsx b/frontend/src/scenes/plugins/edit/UploadField.tsx
index 16b27528532e8..6c4a90720a11d 100644
--- a/frontend/src/scenes/plugins/edit/UploadField.tsx
+++ b/frontend/src/scenes/plugins/edit/UploadField.tsx
@@ -2,12 +2,15 @@ import { LemonFileInput } from '@posthog/lemon-ui'
export function UploadField({ value, onChange }: { value?: File; onChange?: (file: File) => void }): JSX.Element {
return (
-
onChange?.(files[0])}
- value={value?.size ? [value] : []}
- showUploadedFiles={false}
- />
+ <>
+ {value?.name ? Selected file: {value.name} : null}
+ onChange?.(files[0])}
+ value={value?.size ? [value] : []}
+ showUploadedFiles={false}
+ />
+ >
)
}
diff --git a/frontend/src/scenes/plugins/pluginsLogic.ts b/frontend/src/scenes/plugins/pluginsLogic.ts
index e4ce68b032669..4fef7a7e51d7e 100644
--- a/frontend/src/scenes/plugins/pluginsLogic.ts
+++ b/frontend/src/scenes/plugins/pluginsLogic.ts
@@ -574,6 +574,15 @@ export const pluginsLogic = kea([
)
},
],
+ defaultEnabledPlugins: [
+ (s) => [s.filteredEnabledPlugins, s.filteredDisabledPlugins],
+ (filteredEnabledPlugins, filteredDisabledPlugins) => {
+ const defaultEnabledPluginIds = ['GeoIP']
+ return filteredEnabledPlugins
+ .concat(filteredDisabledPlugins)
+ .filter((plugin) => defaultEnabledPluginIds.includes(plugin.name))
+ },
+ ],
pluginUrlToMaintainer: [
(s) => [s.repository],
(repository) => {
diff --git a/frontend/src/scenes/project/CreateProjectModal.tsx b/frontend/src/scenes/project/CreateProjectModal.tsx
index b1212bf53aeea..eeba0420612fb 100644
--- a/frontend/src/scenes/project/CreateProjectModal.tsx
+++ b/frontend/src/scenes/project/CreateProjectModal.tsx
@@ -57,9 +57,9 @@ export function CreateProjectModal({
Learn more in PostHog Docs.
- {currentOrganization?.teams?.some((team) => team.name === 'Default Project') && (
+ {currentOrganization?.teams?.some((team) => team.name.toLowerCase() === 'default project') && (
- Bonus tip: You can always rename your "Default Project".
+ Bonus tip: You can always rename your "Default project".
)}
>
diff --git a/frontend/src/scenes/saved-insights/SavedInsights.tsx b/frontend/src/scenes/saved-insights/SavedInsights.tsx
index efe58a71c0fb6..301c7c2c4e2d9 100644
--- a/frontend/src/scenes/saved-insights/SavedInsights.tsx
+++ b/frontend/src/scenes/saved-insights/SavedInsights.tsx
@@ -67,13 +67,13 @@ export interface InsightTypeMetadata {
export const INSIGHT_TYPES_METADATA: Record = {
[InsightType.TRENDS]: {
name: 'Trends',
- description: 'Visualize and break down how actions or events vary over time.',
+ description: 'Visualize and break down how actions or events vary over time.',
icon: IconTrends,
inMenu: true,
},
[InsightType.FUNNELS]: {
name: 'Funnel',
- description: 'Discover how many users complete or drop out of a sequence of actions.',
+ description: 'Discover how many users complete or drop out of a sequence of actions.',
icon: IconFunnels,
inMenu: true,
},
@@ -85,19 +85,19 @@ export const INSIGHT_TYPES_METADATA: Record =
},
[InsightType.PATHS]: {
name: 'Paths',
- description: 'Trace the journeys users take within your product and where they drop off.',
+ description: 'Trace the journeys users take within your product and where they drop off.',
icon: IconUserPaths,
inMenu: true,
},
[InsightType.STICKINESS]: {
name: 'Stickiness',
- description: 'See what keeps users coming back by viewing the interval between repeated actions.',
+ description: 'See what keeps users coming back by viewing the interval between repeated actions.',
icon: IconStickiness,
inMenu: true,
},
[InsightType.LIFECYCLE]: {
name: 'Lifecycle',
- description: 'Understand growth by breaking down new, resurrected, returning and dormant users.',
+ description: 'Understand growth by breaking down new, resurrected, returning and dormant users.',
icon: IconLifecycle,
inMenu: true,
},
@@ -118,13 +118,13 @@ export const INSIGHT_TYPES_METADATA: Record =
export const QUERY_TYPES_METADATA: Record = {
[NodeKind.TrendsQuery]: {
name: 'Trends',
- description: 'Visualize and break down how actions or events vary over time',
+ description: 'Visualize and break down how actions or events vary over time',
icon: IconTrends,
inMenu: true,
},
[NodeKind.FunnelsQuery]: {
name: 'Funnel',
- description: 'Discover how many users complete or drop out of a sequence of actions',
+ description: 'Discover how many users complete or drop out of a sequence of actions',
icon: IconFunnels,
inMenu: true,
},
@@ -136,22 +136,28 @@ export const QUERY_TYPES_METADATA: Record = {
},
[NodeKind.PathsQuery]: {
name: 'Paths',
- description: 'Trace the journeys users take within your product and where they drop off',
+ description: 'Trace the journeys users take within your product and where they drop off',
icon: IconUserPaths,
inMenu: true,
},
[NodeKind.StickinessQuery]: {
name: 'Stickiness',
- description: 'See what keeps users coming back by viewing the interval between repeated actions',
+ description: 'See what keeps users coming back by viewing the interval between repeated actions',
icon: IconStickiness,
inMenu: true,
},
[NodeKind.LifecycleQuery]: {
name: 'Lifecycle',
- description: 'Understand growth by breaking down new, resurrected, returning and dormant users',
+ description: 'Understand growth by breaking down new, resurrected, returning and dormant users',
icon: IconLifecycle,
inMenu: true,
},
+ [NodeKind.FunnelCorrelationQuery]: {
+ name: 'Funnel Correlation',
+ description: 'See which events or properties correlate to a funnel result',
+ icon: IconPerson,
+ inMenu: false,
+ },
[NodeKind.EventsNode]: {
name: 'Events',
description: 'List and explore events',
@@ -200,6 +206,18 @@ export const QUERY_TYPES_METADATA: Record = {
icon: IconPerson,
inMenu: false,
},
+ [NodeKind.FunnelsActorsQuery]: {
+ name: 'Persons',
+ description: 'List of persons matching specified conditions, derived from an insight',
+ icon: IconPerson,
+ inMenu: false,
+ },
+ [NodeKind.FunnelCorrelationActorsQuery]: {
+ name: 'Persons',
+ description: 'List of persons matching specified conditions, derived from an insight',
+ icon: IconPerson,
+ inMenu: false,
+ },
[NodeKind.DataTableNode]: {
name: 'Data table',
description: 'Slice and dice your data in a table',
diff --git a/frontend/src/scenes/session-recordings/errors/SessionRecordingErrors.tsx b/frontend/src/scenes/session-recordings/errors/SessionRecordingErrors.tsx
index eeec9bb20d642..6ff2bd792ab72 100644
--- a/frontend/src/scenes/session-recordings/errors/SessionRecordingErrors.tsx
+++ b/frontend/src/scenes/session-recordings/errors/SessionRecordingErrors.tsx
@@ -3,6 +3,8 @@ import { LemonButton, LemonCollapse, Spinner } from '@posthog/lemon-ui'
import { useActions, useValues } from 'kea'
import { urls } from 'scenes/urls'
+import { ErrorClusterSample } from '~/types'
+
import { sessionRecordingErrorsLogic } from './sessionRecordingErrorsLogic'
export function SessionRecordingErrors(): JSX.Element {
@@ -45,11 +47,11 @@ const ErrorPanelHeader = ({
}: {
occurrenceCount: number
sessionCount: number
- example: { session_id: string; message: string }
+ example: ErrorClusterSample
}): JSX.Element => {
return (
-
{example.message}
+
{example.input}
{occurrenceCount} occurrences / {sessionCount} sessions
@@ -62,12 +64,12 @@ const ErrorPanelHeader = ({
)
}
-const ErrorPanelContent = ({ samples }: { samples: { session_id: string; message: string }[] }): JSX.Element => {
+const ErrorPanelContent = ({ samples }: { samples: ErrorClusterSample[] }): JSX.Element => {
return (
{samples.map((error) => (
-
{error.message}
+
{error.input}
Watch recording
diff --git a/frontend/src/scenes/session-recordings/filters/DurationTypeSelect.tsx b/frontend/src/scenes/session-recordings/filters/DurationTypeSelect.tsx
index e6585bf4306d4..09c4517b1c4d4 100644
--- a/frontend/src/scenes/session-recordings/filters/DurationTypeSelect.tsx
+++ b/frontend/src/scenes/session-recordings/filters/DurationTypeSelect.tsx
@@ -1,5 +1,5 @@
import { LemonSelect } from '@posthog/lemon-ui'
-import { posthog } from 'posthog-js'
+import posthog from 'posthog-js'
import { DurationType } from '~/types'
diff --git a/frontend/src/scenes/session-recordings/player/__snapshots__/sessionRecordingDataLogic.test.ts.snap b/frontend/src/scenes/session-recordings/player/__snapshots__/sessionRecordingDataLogic.test.ts.snap
index 5f38f5bd4636d..1941ebe103f3a 100644
--- a/frontend/src/scenes/session-recordings/player/__snapshots__/sessionRecordingDataLogic.test.ts.snap
+++ b/frontend/src/scenes/session-recordings/player/__snapshots__/sessionRecordingDataLogic.test.ts.snap
@@ -8,6 +8,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
"href": "http://localhost:3000/",
"width": 2560,
},
+ "seen": 501012029800196,
"timestamp": 1682952380877,
"type": 4,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -575,6 +576,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
"type": 0,
},
},
+ "seen": 8726461620907292,
"timestamp": 1682952380882,
"type": 2,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -591,6 +593,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
],
"source": 1,
},
+ "seen": 8584030046878700,
"timestamp": 1682952383040,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -603,6 +606,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
"x": 1618.84765625,
"y": 299.01953125,
},
+ "seen": 2483146755249437,
"timestamp": 1682952383262,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -615,6 +619,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
"x": 1618.84765625,
"y": 299.01953125,
},
+ "seen": 5263782251753321,
"timestamp": 1682952383263,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -628,6 +633,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
"x": 1618,
"y": 299,
},
+ "seen": 5653094638019907,
"timestamp": 1682952383264,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -650,6 +656,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
],
"source": 1,
},
+ "seen": 17060931257009,
"timestamp": 1682952383543,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -690,6 +697,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
],
"source": 1,
},
+ "seen": 4557187183717243,
"timestamp": 1682952384050,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -702,6 +710,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
"x": 729.30859375,
"y": 124.6875,
},
+ "seen": 8414661306677587,
"timestamp": 1682952384230,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -712,6 +721,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
"source": 2,
"type": 5,
},
+ "seen": 5691729520402348,
"timestamp": 1682952384231,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -724,6 +734,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
"x": 729.30859375,
"y": 124.5546875,
},
+ "seen": 5750299534503778,
"timestamp": 1682952384310,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -737,6 +748,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
"x": 729,
"y": 124,
},
+ "seen": 8129303588045202,
"timestamp": 1682952384313,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -749,6 +761,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
"x": 729.30859375,
"y": 124.0546875,
},
+ "seen": 5621966732519651,
"timestamp": 1682952384447,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -761,6 +774,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
"x": 729.30859375,
"y": 124.0546875,
},
+ "seen": 7326349637770734,
"timestamp": 1682952384460,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -774,6 +788,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
"x": 729,
"y": 124,
},
+ "seen": 7654897841603800,
"timestamp": 1682952384463,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -786,6 +801,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
"x": 729,
"y": 124,
},
+ "seen": 6693982823133270,
"timestamp": 1682952384464,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -820,6 +836,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
],
"source": 1,
},
+ "seen": 7596817111670310,
"timestamp": 1682952384555,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -832,6 +849,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
"x": 729.30859375,
"y": 124.0546875,
},
+ "seen": 8853173800988607,
"timestamp": 1682952384559,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -844,6 +862,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
"x": 729.30859375,
"y": 124.0546875,
},
+ "seen": 4271865787519747,
"timestamp": 1682952384675,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -857,6 +876,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
"x": 729,
"y": 124,
},
+ "seen": 1674109008838975,
"timestamp": 1682952384676,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -869,6 +889,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
"x": 729.30859375,
"y": 124.0546875,
},
+ "seen": 3659459423609925,
"timestamp": 1682952384709,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -881,6 +902,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
"x": 729.30859375,
"y": 124.0546875,
},
+ "seen": 3319121651411945,
"timestamp": 1682952384810,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -894,6 +916,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
"x": 729,
"y": 124,
},
+ "seen": 6290996447334721,
"timestamp": 1682952384811,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -910,6 +933,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
],
"source": 1,
},
+ "seen": 6289157423077161,
"timestamp": 1682952385058,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -926,6 +950,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
],
"source": 1,
},
+ "seen": 5317150202362897,
"timestamp": 1682952385562,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -936,6 +961,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
"source": 2,
"type": 6,
},
+ "seen": 3091503570476556,
"timestamp": 1682952385719,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -946,6 +972,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
"source": 4,
"width": 2560,
},
+ "seen": 2584127757761616,
"timestamp": 1682952385738,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -962,6 +989,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
],
"source": 1,
},
+ "seen": 1875201057400002,
"timestamp": 1682952386063,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -984,6 +1012,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
],
"source": 1,
},
+ "seen": 6218302554450745,
"timestamp": 1682952386571,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -994,6 +1023,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
"href": "http://localhost:3000/",
"width": 2560,
},
+ "seen": 5644389944173152,
"timestamp": 1682952388104,
"type": 4,
"windowId": "187d7c77dfe1d45-08bdcaf91135a2-1d525634-384000-187d7c77dff39a6",
@@ -1348,6 +1378,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
"type": 0,
},
},
+ "seen": 3138372698016266,
"timestamp": 1682952388106,
"type": 2,
"windowId": "187d7c77dfe1d45-08bdcaf91135a2-1d525634-384000-187d7c77dff39a6",
@@ -1369,6 +1400,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
"source": 0,
"texts": [],
},
+ "seen": 2717748427947278,
"timestamp": 1682952388108,
"type": 3,
"windowId": "187d7c77dfe1d45-08bdcaf91135a2-1d525634-384000-187d7c77dff39a6",
@@ -1697,6 +1729,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
"source": 0,
"texts": [],
},
+ "seen": 7280424110324570,
"timestamp": 1682952388117,
"type": 3,
"windowId": "187d7c77dfe1d45-08bdcaf91135a2-1d525634-384000-187d7c77dff39a6",
@@ -1778,6 +1811,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
"source": 0,
"texts": [],
},
+ "seen": 6919367212959915,
"timestamp": 1682952388132,
"type": 3,
"windowId": "187d7c77dfe1d45-08bdcaf91135a2-1d525634-384000-187d7c77dff39a6",
@@ -1794,6 +1828,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
],
"source": 1,
},
+ "seen": 76365872556484,
"timestamp": 1682952388659,
"type": 3,
"windowId": "187d7c77dfe1d45-08bdcaf91135a2-1d525634-384000-187d7c77dff39a6",
@@ -1828,6 +1863,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
],
"source": 1,
},
+ "seen": 395714907704044,
"timestamp": 1682952389163,
"type": 3,
"windowId": "187d7c77dfe1d45-08bdcaf91135a2-1d525634-384000-187d7c77dff39a6",
@@ -1880,6 +1916,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
],
"source": 1,
},
+ "seen": 8416592866543045,
"timestamp": 1682952389668,
"type": 3,
"windowId": "187d7c77dfe1d45-08bdcaf91135a2-1d525634-384000-187d7c77dff39a6",
@@ -1892,6 +1929,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
"x": 852.7421875,
"y": 133.1640625,
},
+ "seen": 3965731883593926,
"timestamp": 1682952389698,
"type": 3,
"windowId": "187d7c77dfe1d45-08bdcaf91135a2-1d525634-384000-187d7c77dff39a6",
@@ -1902,6 +1940,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
"source": 2,
"type": 5,
},
+ "seen": 4164163026072593,
"timestamp": 1682952389699,
"type": 3,
"windowId": "187d7c77dfe1d45-08bdcaf91135a2-1d525634-384000-187d7c77dff39a6",
@@ -1914,6 +1953,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
"x": 852.7421875,
"y": 133.1640625,
},
+ "seen": 6098664365675746,
"timestamp": 1682952389798,
"type": 3,
"windowId": "187d7c77dfe1d45-08bdcaf91135a2-1d525634-384000-187d7c77dff39a6",
@@ -1927,6 +1967,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
"x": 852,
"y": 133,
},
+ "seen": 5762389377540625,
"timestamp": 1682952389798,
"type": 3,
"windowId": "187d7c77dfe1d45-08bdcaf91135a2-1d525634-384000-187d7c77dff39a6",
@@ -1939,6 +1980,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
"x": 852.7421875,
"y": 133.1640625,
},
+ "seen": 8669833091819132,
"timestamp": 1682952389943,
"type": 3,
"windowId": "187d7c77dfe1d45-08bdcaf91135a2-1d525634-384000-187d7c77dff39a6",
@@ -1951,6 +1993,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
"x": 852.7421875,
"y": 133.1640625,
},
+ "seen": 7827498949680603,
"timestamp": 1682952390043,
"type": 3,
"windowId": "187d7c77dfe1d45-08bdcaf91135a2-1d525634-384000-187d7c77dff39a6",
@@ -1964,6 +2007,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
"x": 852,
"y": 133,
},
+ "seen": 7198721964954330,
"timestamp": 1682952390044,
"type": 3,
"windowId": "187d7c77dfe1d45-08bdcaf91135a2-1d525634-384000-187d7c77dff39a6",
@@ -1976,6 +2020,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
"x": 852,
"y": 133,
},
+ "seen": 6166726337775078,
"timestamp": 1682952390047,
"type": 3,
"windowId": "187d7c77dfe1d45-08bdcaf91135a2-1d525634-384000-187d7c77dff39a6",
@@ -1988,6 +2033,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
"x": 852.7421875,
"y": 133.1640625,
},
+ "seen": 8681720216534728,
"timestamp": 1682952390112,
"type": 3,
"windowId": "187d7c77dfe1d45-08bdcaf91135a2-1d525634-384000-187d7c77dff39a6",
@@ -2000,6 +2046,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
"x": 852.7421875,
"y": 133.1640625,
},
+ "seen": 4356985098874230,
"timestamp": 1682952390243,
"type": 3,
"windowId": "187d7c77dfe1d45-08bdcaf91135a2-1d525634-384000-187d7c77dff39a6",
@@ -2013,6 +2060,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
"x": 852,
"y": 133,
},
+ "seen": 1432285913547783,
"timestamp": 1682952390244,
"type": 3,
"windowId": "187d7c77dfe1d45-08bdcaf91135a2-1d525634-384000-187d7c77dff39a6",
@@ -2023,6 +2071,7 @@ exports[`sessionRecordingDataLogic prepareRecordingSnapshots should match snapsh
"source": 2,
"type": 6,
},
+ "seen": 4703642245972871,
"timestamp": 1682952392745,
"type": 3,
"windowId": "187d7c77dfe1d45-08bdcaf91135a2-1d525634-384000-187d7c77dff39a6",
diff --git a/frontend/src/scenes/session-recordings/player/__snapshots__/sessionRecordingPlayerLogic.test.ts.snap b/frontend/src/scenes/session-recordings/player/__snapshots__/sessionRecordingPlayerLogic.test.ts.snap
index 7e1b84825e474..155bfdc8c631f 100644
--- a/frontend/src/scenes/session-recordings/player/__snapshots__/sessionRecordingPlayerLogic.test.ts.snap
+++ b/frontend/src/scenes/session-recordings/player/__snapshots__/sessionRecordingPlayerLogic.test.ts.snap
@@ -86,6 +86,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
"href": "http://localhost:3000/",
"width": 2560,
},
+ "seen": 501012029800196,
"timestamp": 1682952380877,
"type": 4,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -653,6 +654,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
"type": 0,
},
},
+ "seen": 8726461620907292,
"timestamp": 1682952380882,
"type": 2,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -669,6 +671,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
],
"source": 1,
},
+ "seen": 8584030046878700,
"timestamp": 1682952383040,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -681,6 +684,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
"x": 1618.84765625,
"y": 299.01953125,
},
+ "seen": 2483146755249437,
"timestamp": 1682952383262,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -693,6 +697,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
"x": 1618.84765625,
"y": 299.01953125,
},
+ "seen": 5263782251753321,
"timestamp": 1682952383263,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -706,6 +711,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
"x": 1618,
"y": 299,
},
+ "seen": 5653094638019907,
"timestamp": 1682952383264,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -728,6 +734,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
],
"source": 1,
},
+ "seen": 17060931257009,
"timestamp": 1682952383543,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -768,6 +775,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
],
"source": 1,
},
+ "seen": 4557187183717243,
"timestamp": 1682952384050,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -780,6 +788,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
"x": 729.30859375,
"y": 124.6875,
},
+ "seen": 8414661306677587,
"timestamp": 1682952384230,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -790,6 +799,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
"source": 2,
"type": 5,
},
+ "seen": 5691729520402348,
"timestamp": 1682952384231,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -802,6 +812,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
"x": 729.30859375,
"y": 124.5546875,
},
+ "seen": 5750299534503778,
"timestamp": 1682952384310,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -815,6 +826,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
"x": 729,
"y": 124,
},
+ "seen": 8129303588045202,
"timestamp": 1682952384313,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -827,6 +839,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
"x": 729.30859375,
"y": 124.0546875,
},
+ "seen": 5621966732519651,
"timestamp": 1682952384447,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -839,6 +852,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
"x": 729.30859375,
"y": 124.0546875,
},
+ "seen": 7326349637770734,
"timestamp": 1682952384460,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -852,6 +866,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
"x": 729,
"y": 124,
},
+ "seen": 7654897841603800,
"timestamp": 1682952384463,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -864,6 +879,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
"x": 729,
"y": 124,
},
+ "seen": 6693982823133270,
"timestamp": 1682952384464,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -898,6 +914,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
],
"source": 1,
},
+ "seen": 7596817111670310,
"timestamp": 1682952384555,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -910,6 +927,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
"x": 729.30859375,
"y": 124.0546875,
},
+ "seen": 8853173800988607,
"timestamp": 1682952384559,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -922,6 +940,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
"x": 729.30859375,
"y": 124.0546875,
},
+ "seen": 4271865787519747,
"timestamp": 1682952384675,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -935,6 +954,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
"x": 729,
"y": 124,
},
+ "seen": 1674109008838975,
"timestamp": 1682952384676,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -947,6 +967,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
"x": 729.30859375,
"y": 124.0546875,
},
+ "seen": 3659459423609925,
"timestamp": 1682952384709,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -959,6 +980,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
"x": 729.30859375,
"y": 124.0546875,
},
+ "seen": 3319121651411945,
"timestamp": 1682952384810,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -972,6 +994,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
"x": 729,
"y": 124,
},
+ "seen": 6290996447334721,
"timestamp": 1682952384811,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -988,6 +1011,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
],
"source": 1,
},
+ "seen": 6289157423077161,
"timestamp": 1682952385058,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -1004,6 +1028,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
],
"source": 1,
},
+ "seen": 5317150202362897,
"timestamp": 1682952385562,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -1014,6 +1039,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
"source": 2,
"type": 6,
},
+ "seen": 3091503570476556,
"timestamp": 1682952385719,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -1024,6 +1050,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
"source": 4,
"width": 2560,
},
+ "seen": 2584127757761616,
"timestamp": 1682952385738,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -1040,6 +1067,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
],
"source": 1,
},
+ "seen": 1875201057400002,
"timestamp": 1682952386063,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -1062,6 +1090,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
],
"source": 1,
},
+ "seen": 6218302554450745,
"timestamp": 1682952386571,
"type": 3,
"windowId": "187d7c761a0525d-05f175487d4b65-1d525634-384000-187d7c761a149d0",
@@ -1074,6 +1103,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
"href": "http://localhost:3000/",
"width": 2560,
},
+ "seen": 5644389944173152,
"timestamp": 1682952388104,
"type": 4,
"windowId": "187d7c77dfe1d45-08bdcaf91135a2-1d525634-384000-187d7c77dff39a6",
@@ -1428,6 +1458,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
"type": 0,
},
},
+ "seen": 3138372698016266,
"timestamp": 1682952388106,
"type": 2,
"windowId": "187d7c77dfe1d45-08bdcaf91135a2-1d525634-384000-187d7c77dff39a6",
@@ -1449,6 +1480,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
"source": 0,
"texts": [],
},
+ "seen": 2717748427947278,
"timestamp": 1682952388108,
"type": 3,
"windowId": "187d7c77dfe1d45-08bdcaf91135a2-1d525634-384000-187d7c77dff39a6",
@@ -1777,6 +1809,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
"source": 0,
"texts": [],
},
+ "seen": 7280424110324570,
"timestamp": 1682952388117,
"type": 3,
"windowId": "187d7c77dfe1d45-08bdcaf91135a2-1d525634-384000-187d7c77dff39a6",
@@ -1858,6 +1891,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
"source": 0,
"texts": [],
},
+ "seen": 6919367212959915,
"timestamp": 1682952388132,
"type": 3,
"windowId": "187d7c77dfe1d45-08bdcaf91135a2-1d525634-384000-187d7c77dff39a6",
@@ -1874,6 +1908,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
],
"source": 1,
},
+ "seen": 76365872556484,
"timestamp": 1682952388659,
"type": 3,
"windowId": "187d7c77dfe1d45-08bdcaf91135a2-1d525634-384000-187d7c77dff39a6",
@@ -1908,6 +1943,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
],
"source": 1,
},
+ "seen": 395714907704044,
"timestamp": 1682952389163,
"type": 3,
"windowId": "187d7c77dfe1d45-08bdcaf91135a2-1d525634-384000-187d7c77dff39a6",
@@ -1960,6 +1996,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
],
"source": 1,
},
+ "seen": 8416592866543045,
"timestamp": 1682952389668,
"type": 3,
"windowId": "187d7c77dfe1d45-08bdcaf91135a2-1d525634-384000-187d7c77dff39a6",
@@ -1972,6 +2009,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
"x": 852.7421875,
"y": 133.1640625,
},
+ "seen": 3965731883593926,
"timestamp": 1682952389698,
"type": 3,
"windowId": "187d7c77dfe1d45-08bdcaf91135a2-1d525634-384000-187d7c77dff39a6",
@@ -1982,6 +2020,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
"source": 2,
"type": 5,
},
+ "seen": 4164163026072593,
"timestamp": 1682952389699,
"type": 3,
"windowId": "187d7c77dfe1d45-08bdcaf91135a2-1d525634-384000-187d7c77dff39a6",
@@ -1994,6 +2033,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
"x": 852.7421875,
"y": 133.1640625,
},
+ "seen": 6098664365675746,
"timestamp": 1682952389798,
"type": 3,
"windowId": "187d7c77dfe1d45-08bdcaf91135a2-1d525634-384000-187d7c77dff39a6",
@@ -2007,6 +2047,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
"x": 852,
"y": 133,
},
+ "seen": 5762389377540625,
"timestamp": 1682952389798,
"type": 3,
"windowId": "187d7c77dfe1d45-08bdcaf91135a2-1d525634-384000-187d7c77dff39a6",
@@ -2019,6 +2060,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
"x": 852.7421875,
"y": 133.1640625,
},
+ "seen": 8669833091819132,
"timestamp": 1682952389943,
"type": 3,
"windowId": "187d7c77dfe1d45-08bdcaf91135a2-1d525634-384000-187d7c77dff39a6",
@@ -2031,6 +2073,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
"x": 852.7421875,
"y": 133.1640625,
},
+ "seen": 7827498949680603,
"timestamp": 1682952390043,
"type": 3,
"windowId": "187d7c77dfe1d45-08bdcaf91135a2-1d525634-384000-187d7c77dff39a6",
@@ -2044,6 +2087,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
"x": 852,
"y": 133,
},
+ "seen": 7198721964954330,
"timestamp": 1682952390044,
"type": 3,
"windowId": "187d7c77dfe1d45-08bdcaf91135a2-1d525634-384000-187d7c77dff39a6",
@@ -2056,6 +2100,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
"x": 852,
"y": 133,
},
+ "seen": 6166726337775078,
"timestamp": 1682952390047,
"type": 3,
"windowId": "187d7c77dfe1d45-08bdcaf91135a2-1d525634-384000-187d7c77dff39a6",
@@ -2068,6 +2113,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
"x": 852.7421875,
"y": 133.1640625,
},
+ "seen": 8681720216534728,
"timestamp": 1682952390112,
"type": 3,
"windowId": "187d7c77dfe1d45-08bdcaf91135a2-1d525634-384000-187d7c77dff39a6",
@@ -2080,6 +2126,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
"x": 852.7421875,
"y": 133.1640625,
},
+ "seen": 4356985098874230,
"timestamp": 1682952390243,
"type": 3,
"windowId": "187d7c77dfe1d45-08bdcaf91135a2-1d525634-384000-187d7c77dff39a6",
@@ -2093,6 +2140,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
"x": 852,
"y": 133,
},
+ "seen": 1432285913547783,
"timestamp": 1682952390244,
"type": 3,
"windowId": "187d7c77dfe1d45-08bdcaf91135a2-1d525634-384000-187d7c77dff39a6",
@@ -2103,6 +2151,7 @@ exports[`sessionRecordingPlayerLogic loading session core loads metadata and sna
"source": 2,
"type": 6,
},
+ "seen": 4703642245972871,
"timestamp": 1682952392745,
"type": 3,
"windowId": "187d7c77dfe1d45-08bdcaf91135a2-1d525634-384000-187d7c77dff39a6",
diff --git a/frontend/src/scenes/session-recordings/player/inspector/playerInspectorLogic.ts b/frontend/src/scenes/session-recordings/player/inspector/playerInspectorLogic.ts
index f1415221ba327..09ebe7988bc3e 100644
--- a/frontend/src/scenes/session-recordings/player/inspector/playerInspectorLogic.ts
+++ b/frontend/src/scenes/session-recordings/player/inspector/playerInspectorLogic.ts
@@ -604,7 +604,15 @@ export const playerInspectorLogic = kea
([
// always show offline status changes
if (item.type === 'offline-status' || item.type === 'browser-visibility') {
- include = true
+ include =
+ tab === SessionRecordingPlayerTab.DOCTOR ||
+ !!(
+ miniFiltersByKey['performance-all']?.enabled ||
+ miniFiltersByKey['all-everything']?.enabled ||
+ miniFiltersByKey['all-automatic']?.enabled ||
+ miniFiltersByKey['console-all']?.enabled ||
+ miniFiltersByKey['events-all']?.enabled
+ )
}
if (item.type === SessionRecordingPlayerTab.DOCTOR && tab === SessionRecordingPlayerTab.DOCTOR) {
diff --git a/frontend/src/scenes/session-recordings/player/sessionRecordingDataLogic.ts b/frontend/src/scenes/session-recordings/player/sessionRecordingDataLogic.ts
index e8eabbdb645d1..8d630aa10acbb 100644
--- a/frontend/src/scenes/session-recordings/player/sessionRecordingDataLogic.ts
+++ b/frontend/src/scenes/session-recordings/player/sessionRecordingDataLogic.ts
@@ -125,6 +125,27 @@ const getHrefFromSnapshot = (snapshot: RecordingSnapshot): string | undefined =>
return (snapshot.data as any)?.href || (snapshot.data as any)?.payload?.href
}
+/*
+ cyrb53 (c) 2018 bryc (github.com/bryc)
+ License: Public domain. Attribution appreciated.
+ A fast and simple 53-bit string hash function with decent collision resistance.
+ Largely inspired by MurmurHash2/3, but with a focus on speed/simplicity.
+*/
+const cyrb53 = function (str: string, seed = 0): number {
+ let h1 = 0xdeadbeef ^ seed,
+ h2 = 0x41c6ce57 ^ seed
+ for (let i = 0, ch; i < str.length; i++) {
+ ch = str.charCodeAt(i)
+ h1 = Math.imul(h1 ^ ch, 2654435761)
+ h2 = Math.imul(h2 ^ ch, 1597334677)
+ }
+ h1 = Math.imul(h1 ^ (h1 >>> 16), 2246822507)
+ h1 ^= Math.imul(h2 ^ (h2 >>> 13), 3266489909)
+ h2 = Math.imul(h2 ^ (h2 >>> 16), 2246822507)
+ h2 ^= Math.imul(h1 ^ (h1 >>> 13), 3266489909)
+ return 4294967296 * (2097151 & h2) + (h1 >>> 0)
+}
+
export const deduplicateSnapshots = (
newSnapshots?: RecordingSnapshot[],
existingSnapshots?: RecordingSnapshot[]
@@ -141,7 +162,11 @@ export const deduplicateSnapshots = (
// we can see duplicates that only differ by delay - these still count as duplicates
// even though the delay would hide that
const { delay: _delay, ...delayFreeSnapshot } = snapshot
- const key = JSON.stringify(delayFreeSnapshot)
+ // we check each item multiple times as new snapshots come in
+ // so store the computer value on the object to save recalculating it so much
+ const key = (snapshot as any).seen || cyrb53(JSON.stringify(delayFreeSnapshot))
+ ;(snapshot as any).seen = key
+
if (seenHashes.has(key)) {
return false
} else {
diff --git a/frontend/src/scenes/settings/SettingsMap.tsx b/frontend/src/scenes/settings/SettingsMap.tsx
index 8ffcf95b94082..c24e8930cf3ec 100644
--- a/frontend/src/scenes/settings/SettingsMap.tsx
+++ b/frontend/src/scenes/settings/SettingsMap.tsx
@@ -25,10 +25,10 @@ import {
WebSnippet,
} from './project/ProjectSettings'
import {
+ ReplayAISettings,
ReplayAuthorizedDomains,
ReplayCostControl,
ReplayGeneral,
- ReplaySummarySettings,
} from './project/SessionRecordingSettings'
import { SettingPersonsOnEvents } from './project/SettingPersonsOnEvents'
import { SlackIntegration } from './project/SlackIntegration'
@@ -86,7 +86,7 @@ export const SettingsMap: SettingSection[] = [
},
{
id: 'exception-autocapture',
- title: 'Exception Autocapture',
+ title: 'Exception autocapture',
component: ,
flag: 'EXCEPTION_AUTOCAPTURE',
},
@@ -101,11 +101,11 @@ export const SettingsMap: SettingSection[] = [
{
level: 'project',
id: 'project-product-analytics',
- title: 'Product Analytics',
+ title: 'Product analytics',
settings: [
{
id: 'date-and-time',
- title: 'Date & Time',
+ title: 'Date & time',
component: ,
},
{
@@ -130,12 +130,12 @@ export const SettingsMap: SettingSection[] = [
},
{
id: 'datacapture',
- title: 'IP Data capture configuration',
+ title: 'IP data capture configuration',
component: ,
},
{
id: 'group-analytics',
- title: 'Group Analytics',
+ title: 'Group analytics',
component: ,
},
{
@@ -149,16 +149,16 @@ export const SettingsMap: SettingSection[] = [
{
level: 'project',
id: 'project-replay',
- title: 'Session Replay',
+ title: 'Session replay',
settings: [
{
id: 'replay',
- title: 'Session Replay',
+ title: 'Session replay',
component: ,
},
{
id: 'replay-authorized-domains',
- title: 'Authorized Domains for Replay',
+ title: 'Authorized domains for replay',
component: ,
},
{
@@ -174,8 +174,8 @@ export const SettingsMap: SettingSection[] = [
},
{
id: 'replay-ai-config',
- title: 'AI Recording Summary',
- component: ,
+ title: 'AI recording summary',
+ component: ,
flag: 'AI_SESSION_PERMISSIONS',
},
],
@@ -200,7 +200,7 @@ export const SettingsMap: SettingSection[] = [
settings: [
{
id: 'authorized-toolbar-urls',
- title: 'Authorized Toolbar URLs',
+ title: 'Authorized toolbar URLs',
component: ,
},
],
@@ -229,7 +229,7 @@ export const SettingsMap: SettingSection[] = [
settings: [
{
id: 'project-rbac',
- title: 'Access Control',
+ title: 'Access control',
component: ,
},
],
@@ -267,12 +267,12 @@ export const SettingsMap: SettingSection[] = [
settings: [
{
id: 'invites',
- title: 'Pending Invites',
+ title: 'Pending invites',
component: ,
},
{
id: 'members',
- title: 'Members',
+ title: 'Organization members',
component: ,
},
{
@@ -285,7 +285,7 @@ export const SettingsMap: SettingSection[] = [
{
level: 'organization',
id: 'organization-authentication',
- title: 'Authentication Domains & SSO',
+ title: 'Authentication domains & SSO',
settings: [
{
id: 'authentication-domains',
@@ -346,7 +346,7 @@ export const SettingsMap: SettingSection[] = [
{
level: 'user',
id: 'user-api-keys',
- title: 'Personal API Keys',
+ title: 'Personal API keys',
settings: [
{
id: 'personal-api-keys',
@@ -372,7 +372,7 @@ export const SettingsMap: SettingSection[] = [
},
{
id: 'optout',
- title: 'Anonymize Data Collection',
+ title: 'Anonymize data collection',
component: ,
},
],
diff --git a/frontend/src/scenes/settings/organization/InviteModal.tsx b/frontend/src/scenes/settings/organization/InviteModal.tsx
index 67ead188a7595..cc92623f946fe 100644
--- a/frontend/src/scenes/settings/organization/InviteModal.tsx
+++ b/frontend/src/scenes/settings/organization/InviteModal.tsx
@@ -236,15 +236,15 @@ export function InviteModal({ isOpen, onClose }: { isOpen: boolean; onClose: ()
description={
preflight?.email_service_available ? (
- Invite others to your project to collaborate together in PostHog. An invite is specific to
- an email address and expires after 3 days. Name can be provided for the team member's
+ Invite others to your organization to collaborate together in PostHog. An invite is specific
+ to an email address and expires after 3 days. Name can be provided for the team member's
convenience.
) : (
This PostHog instance isn't configured to send emails. In the meantime, you can generate a
link for each team member you want to invite. You can always invite others at a later time.{' '}
- Make sure you share links with the project members you want to invite.
+ Make sure you share links with the organization members you want to invite.
)
}
diff --git a/frontend/src/scenes/settings/organization/Members.tsx b/frontend/src/scenes/settings/organization/Members.tsx
index a3a6175b672ac..e5be1e2b290dc 100644
--- a/frontend/src/scenes/settings/organization/Members.tsx
+++ b/frontend/src/scenes/settings/organization/Members.tsx
@@ -11,7 +11,7 @@ import { LemonTable, LemonTableColumns } from 'lib/lemon-ui/LemonTable'
import { LemonTag } from 'lib/lemon-ui/LemonTag/LemonTag'
import { ProfilePicture } from 'lib/lemon-ui/ProfilePicture'
import { Tooltip } from 'lib/lemon-ui/Tooltip'
-import { fullName } from 'lib/utils'
+import { capitalizeFirstLetter, fullName } from 'lib/utils'
import {
getReasonForAccessLevelChangeProhibition,
membershipLevelToName,
@@ -195,9 +195,7 @@ export function Members(): JSX.Element | null {
render: function LevelRender(_, member) {
return (
- {member.level === OrganizationMembershipLevel.Owner
- ? 'Organization owner'
- : `Project ${membershipLevelToName.get(member.level) ?? `unknown (${member.level})`}`}
+ {capitalizeFirstLetter(membershipLevelToName.get(member.level) ?? `unknown (${member.level})`)}
)
},
diff --git a/frontend/src/scenes/settings/project/SessionRecordingSettings.tsx b/frontend/src/scenes/settings/project/SessionRecordingSettings.tsx
index 02fa32655da03..4776999cc047c 100644
--- a/frontend/src/scenes/settings/project/SessionRecordingSettings.tsx
+++ b/frontend/src/scenes/settings/project/SessionRecordingSettings.tsx
@@ -2,6 +2,7 @@ import { IconPlus } from '@posthog/icons'
import {
LemonBanner,
LemonButton,
+ LemonDialog,
LemonSegmentedButton,
LemonSegmentedButtonOption,
LemonSelect,
@@ -22,6 +23,7 @@ import { FEATURE_FLAGS, SESSION_REPLAY_MINIMUM_DURATION_OPTIONS } from 'lib/cons
import { IconCancel, IconSelectEvents } from 'lib/lemon-ui/icons'
import { LemonLabel } from 'lib/lemon-ui/LemonLabel/LemonLabel'
import { featureFlagLogic as enabledFlagsLogic } from 'lib/logic/featureFlagLogic'
+import { objectsEqual } from 'lib/utils'
import { sessionReplayLinkedFlagLogic } from 'scenes/settings/project/sessionReplayLinkedFlagLogic'
import { teamLogic } from 'scenes/teamLogic'
import { userLogic } from 'scenes/userLogic'
@@ -96,6 +98,26 @@ function CanvasCaptureSettings(): JSX.Element | null {
)
}
+function PayloadWarning(): JSX.Element {
+ return (
+ <>
+
+ We automatically scrub some sensitive information from network headers and request and response bodies.
+
{' '}
+
+ If they could contain sensitive data, you should provide a function to mask the data when you initialise
+ PostHog.{' '}
+
+ Learn how to mask header and body values in our docs
+
+
+ >
+ )
+}
+
function NetworkCaptureSettings(): JSX.Element {
const { updateCurrentTeam } = useActions(teamLogic)
const { currentTeam } = useValues(teamLogic)
@@ -128,15 +150,7 @@ function NetworkCaptureSettings(): JSX.Element {
- We automatically scrub some sensitive information from network headers, but if your request or
- response payloads could contain sensitive data, you can provide a function to mask the data when you
- initialise PostHog.{' '}
-
- Learn how to mask header and payload values in our docs
-
+
{
- updateCurrentTeam({
- session_recording_network_payload_capture_config: {
- ...currentTeam?.session_recording_network_payload_capture_config,
- recordBody: checked,
- },
- })
+ if (checked) {
+ LemonDialog.open({
+ maxWidth: '650px',
+ title: 'Network body capture',
+ description: ,
+ primaryButton: {
+ 'data-attr': 'network-payload-capture-accept-warning-and-enable',
+ children: 'Enable body capture',
+ onClick: () => {
+ updateCurrentTeam({
+ session_recording_network_payload_capture_config: {
+ ...currentTeam?.session_recording_network_payload_capture_config,
+ recordBody: true,
+ },
+ })
+ },
+ },
+ })
+ } else {
+ updateCurrentTeam({
+ session_recording_network_payload_capture_config: {
+ ...currentTeam?.session_recording_network_payload_capture_config,
+ recordBody: false,
+ },
+ })
+ }
}}
label="Capture body"
bordered
@@ -473,7 +507,7 @@ export function ReplayCostControl(): JSX.Element | null {
) : null
}
-export function ReplaySummarySettings(): JSX.Element | null {
+export function ReplayAISettings(): JSX.Element | null {
const { updateCurrentTeam } = useActions(teamLogic)
const { currentTeam } = useValues(teamLogic)
@@ -498,18 +532,19 @@ export function ReplaySummarySettings(): JSX.Element | null {
})
}
+ const { opt_in: _discardCurrentOptIn, ...currentComparable } = currentConfig
+ const { opt_in: _discardDefaultOptIn, ...defaultComparable } = defaultConfig
+
return (
-
- updateSummaryConfig(defaultConfig)}>
- Reset to default
-
-
- We use several machine learning technologies to process sessions. Some of those are hosted by Open
- AI. No data is sent to OpenAI without an explicit instruction to do so. If we do send data we only
- send the data selected below. sData submitted is not used to train Open AI's models
+ We use several machine learning technologies to process sessions. Some of those are powered by{' '}
+
+ OpenAI
+
+ . No data is sent to OpenAI without an explicit instruction to do so. If we do send data we only
+ send the data selected below. Data submitted is not used to train OpenAI's models
{currentConfig.opt_in && (
<>
+ {!objectsEqual(currentComparable, defaultComparable) && (
+
+ updateSummaryConfig({ ...defaultConfig, opt_in: true })}
+ >
+ Reset config to default
+
+
+ )}
diff --git a/frontend/src/scenes/teamActivityDescriber.tsx b/frontend/src/scenes/teamActivityDescriber.tsx
index a58fd574c3ebd..a4b2da80d5191 100644
--- a/frontend/src/scenes/teamActivityDescriber.tsx
+++ b/frontend/src/scenes/teamActivityDescriber.tsx
@@ -9,7 +9,7 @@ import {
} from 'lib/components/ActivityLog/humanizeActivity'
import { SentenceList } from 'lib/components/ActivityLog/SentenceList'
import { Link } from 'lib/lemon-ui/Link'
-import { pluralize } from 'lib/utils'
+import { isObject, pluralize } from 'lib/utils'
import { urls } from 'scenes/urls'
import { ActivityScope, TeamType } from '~/types'
@@ -101,9 +101,28 @@ const teamActionsMapping: Record<
}
},
session_recording_network_payload_capture_config(change: ActivityChange | undefined): ChangeMapping | null {
- return {
- description: [<>{change?.after ? 'enabled' : 'disabled'} network payload capture in session replay>],
+ const payloadBefore = isObject(change?.before) ? change?.before.recordBody : !!change?.before
+ const payloadAfter = isObject(change?.after) ? change?.after.recordBody : !!change?.after
+ const payloadChanged = payloadBefore !== payloadAfter
+
+ const headersBefore = isObject(change?.before) ? change?.before.recordHeaders : !!change?.before
+ const headersAfter = isObject(change?.after) ? change?.after.recordHeaders : !!change?.after
+ const headersChanged = headersBefore !== headersAfter
+
+ const descriptions = []
+ if (payloadChanged) {
+ descriptions.push(<>{payloadAfter ? 'enabled' : 'disabled'} network body capture in session replay>)
}
+
+ if (headersChanged) {
+ descriptions.push(<>{headersAfter ? 'enabled' : 'disabled'} network headers capture in session replay>)
+ }
+
+ return descriptions.length
+ ? {
+ description: descriptions,
+ }
+ : null
},
session_recording_opt_in(change: ActivityChange | undefined): ChangeMapping | null {
return { description: [<>{change?.after ? 'enabled' : 'disabled'} session recording>] }
@@ -168,7 +187,19 @@ const teamActionsMapping: Record<
timezone: () => null,
surveys_opt_in: () => null,
week_start_day: () => null,
- extra_settings: () => null,
+ extra_settings: (change: ActivityChange | undefined): ChangeMapping | null => {
+ const after = change?.after
+ if (typeof after !== 'object') {
+ return null
+ }
+ const descriptions = []
+ for (const key in after) {
+ if (key === 'poe_v2_enabled') {
+ descriptions.push(<>{after[key] ? 'enabled' : 'disabled'} Person on Events (v2)>)
+ }
+ }
+ return { description: descriptions }
+ },
has_completed_onboarding_for: () => null,
// should never come from the backend
created_at: () => null,
diff --git a/frontend/src/scenes/trends/persons-modal/personsModalLogic.ts b/frontend/src/scenes/trends/persons-modal/personsModalLogic.ts
index df5270fb2c2dd..dbe81266fb7db 100644
--- a/frontend/src/scenes/trends/persons-modal/personsModalLogic.ts
+++ b/frontend/src/scenes/trends/persons-modal/personsModalLogic.ts
@@ -14,6 +14,8 @@ import { query as performQuery } from '~/queries/query'
import {
ActorsQuery,
DataTableNode,
+ FunnelCorrelationActorsQuery,
+ FunnelsActorsQuery,
InsightActorsQuery,
InsightActorsQueryOptions,
InsightActorsQueryOptionsResponse,
@@ -35,7 +37,7 @@ import type { personsModalLogicType } from './personsModalLogicType'
const RESULTS_PER_PAGE = 100
export interface PersonModalLogicProps {
- query?: InsightActorsQuery | null
+ query?: InsightActorsQuery | FunnelsActorsQuery | FunnelCorrelationActorsQuery | null
url?: string | null
additionalSelect?: Partial>
orderBy?: string[]
diff --git a/frontend/src/styles/global.scss b/frontend/src/styles/global.scss
index 5fcef01ceee1e..37ea604986d6a 100644
--- a/frontend/src/styles/global.scss
+++ b/frontend/src/styles/global.scss
@@ -506,11 +506,6 @@ body {
fill: var(--border-3000);
}
- .ant-table-tbody > tr.ant-table-row:hover > td,
- .ant-table-tbody > tr > td.ant-table-cell-row-hover {
- background-color: var(--mid);
- }
-
@include dark-mode-3000-variables;
}
@@ -685,45 +680,6 @@ body {
max-width: 350px;
}
- .ant-table-thead > tr > th,
- .ant-table-small .ant-table-thead > tr > th {
- background: var(--mid);
- }
-
- .ant-table-tbody > tr > td {
- border-bottom-color: var(--border);
- }
-
- .ant-table-tbody > tr.ant-table-placeholder:hover > td {
- background: inherit;
- }
-
- .ant-table {
- color: var(--text-3000);
- }
-
- .ant-pagination-item-active {
- border-color: var(--link);
-
- & a {
- color: var(--link);
- }
-
- & a:hover {
- color: var(--link);
- }
- }
-
- .ant-pagination-item:hover {
- border-color: var(--link);
- }
-
- .ant-pagination-item:hover a,
- .ant-pagination-prev:hover .ant-pagination-item-link,
- .ant-pagination-next:hover .ant-pagination-item-link {
- color: var(--link);
- }
-
@include common-variables;
}
diff --git a/frontend/src/toolbar/flags/FlagsToolbarMenu.tsx b/frontend/src/toolbar/flags/FlagsToolbarMenu.tsx
index 2ffc0cd9666b3..99efa265d6050 100644
--- a/frontend/src/toolbar/flags/FlagsToolbarMenu.tsx
+++ b/frontend/src/toolbar/flags/FlagsToolbarMenu.tsx
@@ -16,11 +16,18 @@ import { toolbarConfigLogic } from '~/toolbar/toolbarConfigLogic'
export const FlagsToolbarMenu = (): JSX.Element => {
const { searchTerm, filteredFlags, userFlagsLoading } = useValues(flagsToolbarLogic)
- const { setSearchTerm, setOverriddenUserFlag, deleteOverriddenUserFlag, getUserFlags, checkLocalOverrides } =
- useActions(flagsToolbarLogic)
- const { apiURL } = useValues(toolbarConfigLogic)
+ const {
+ setSearchTerm,
+ setOverriddenUserFlag,
+ deleteOverriddenUserFlag,
+ getUserFlags,
+ checkLocalOverrides,
+ setFeatureFlagValueFromPostHogClient,
+ } = useActions(flagsToolbarLogic)
+ const { apiURL, posthog: posthogClient } = useValues(toolbarConfigLogic)
useEffect(() => {
+ posthogClient?.onFeatureFlags(setFeatureFlagValueFromPostHogClient)
getUserFlags()
checkLocalOverrides()
}, [])
diff --git a/frontend/src/toolbar/flags/flagsToolbarLogic.test.ts b/frontend/src/toolbar/flags/flagsToolbarLogic.test.ts
index 5ac9502c27d75..046b91867d386 100644
--- a/frontend/src/toolbar/flags/flagsToolbarLogic.test.ts
+++ b/frontend/src/toolbar/flags/flagsToolbarLogic.test.ts
@@ -50,6 +50,30 @@ describe('toolbar featureFlagsLogic', () => {
})
})
+ it('uses posthog client values if present', async () => {
+ const flags = {
+ 'flag 1': false,
+ 'flag 2': true,
+ 'flag 3': 'value',
+ }
+ await expectLogic(logic, () => {
+ logic.actions.setFeatureFlagValueFromPostHogClient(Object.keys(flags), flags)
+ }).toMatchValues({
+ userFlags: featureFlags,
+ searchTerm: '',
+ filteredFlags: [
+ { currentValue: false, hasOverride: false, hasVariants: false, feature_flag: { key: 'flag 1' } },
+ { currentValue: true, hasOverride: false, hasVariants: false, feature_flag: { key: 'flag 2' } },
+ {
+ currentValue: 'value',
+ hasOverride: false,
+ hasVariants: false,
+ feature_flag: { key: 'flag 3', name: 'mentions 2' },
+ },
+ ],
+ })
+ })
+
it('can filter the flags', async () => {
await expectLogic(logic, () => {
logic.actions.setSearchTerm('2')
diff --git a/frontend/src/toolbar/flags/flagsToolbarLogic.ts b/frontend/src/toolbar/flags/flagsToolbarLogic.ts
index ba385dc1c2a19..60c1f568f45a6 100644
--- a/frontend/src/toolbar/flags/flagsToolbarLogic.ts
+++ b/frontend/src/toolbar/flags/flagsToolbarLogic.ts
@@ -18,6 +18,10 @@ export const flagsToolbarLogic = kea([
})),
actions({
getUserFlags: true,
+ setFeatureFlagValueFromPostHogClient: (flags: string[], variants: Record) => ({
+ flags,
+ variants,
+ }),
setOverriddenUserFlag: (flagKey: string, overrideValue: string | boolean) => ({ flagKey, overrideValue }),
deleteOverriddenUserFlag: (flagKey: string) => ({ flagKey }),
setSearchTerm: (searchTerm: string) => ({ searchTerm }),
@@ -63,16 +67,26 @@ export const flagsToolbarLogic = kea([
setSearchTerm: (_, { searchTerm }) => searchTerm,
},
],
+ posthogClientFlagValues: [
+ {} as Record,
+ {
+ setFeatureFlagValueFromPostHogClient: (_, { variants }) => {
+ return variants
+ },
+ },
+ ],
}),
selectors({
userFlagsWithOverrideInfo: [
- (s) => [s.userFlags, s.localOverrides],
- (userFlags, localOverrides) => {
+ (s) => [s.userFlags, s.localOverrides, s.posthogClientFlagValues],
+ (userFlags, localOverrides, posthogClientFlagValues) => {
return userFlags.map((flag) => {
const hasVariants = (flag.feature_flag.filters?.multivariate?.variants?.length || 0) > 0
const currentValue =
- flag.feature_flag.key in localOverrides ? localOverrides[flag.feature_flag.key] : flag.value
+ flag.feature_flag.key in localOverrides
+ ? localOverrides[flag.feature_flag.key]
+ : posthogClientFlagValues[flag.feature_flag.key] ?? flag.value
return {
...flag,
diff --git a/frontend/src/toolbar/index.tsx b/frontend/src/toolbar/index.tsx
index 66f36bd4f45ad..a5bdb7923fa1c 100644
--- a/frontend/src/toolbar/index.tsx
+++ b/frontend/src/toolbar/index.tsx
@@ -1,7 +1,7 @@
import '~/styles'
import './styles.scss'
-import { PostHog } from 'posthog-js'
+import type { PostHog } from 'posthog-js'
import { createRoot } from 'react-dom/client'
import { initKea } from '~/initKea'
diff --git a/frontend/src/types.ts b/frontend/src/types.ts
index 662f83942c1b6..9a9535ab5d6a0 100644
--- a/frontend/src/types.ts
+++ b/frontend/src/types.ts
@@ -20,7 +20,7 @@ import {
} from 'lib/constants'
import { Dayjs, dayjs } from 'lib/dayjs'
import { PopoverProps } from 'lib/lemon-ui/Popover/Popover'
-import { PostHog } from 'posthog-js'
+import type { PostHog } from 'posthog-js'
import { Layout } from 'react-grid-layout'
import { LogLevel } from 'rrweb'
import { BehavioralFilterKey, BehavioralFilterType } from 'scenes/cohorts/CohortFilters/types'
@@ -896,9 +896,11 @@ export interface SessionRecordingsResponse {
has_next: boolean
}
+export type ErrorClusterSample = { session_id: string; input: string }
+
type ErrorCluster = {
cluster: number
- samples: { session_id: string; message: string }[]
+ samples: ErrorClusterSample[]
occurrences: number
unique_sessions: number
}
@@ -942,6 +944,7 @@ export interface ActionFilter extends EntityFilter {
export interface DataWarehouseFilter extends ActionFilter {
id_field: string
timestamp_field: string
+ distinct_id_field: string
table_name: string
}
@@ -1470,7 +1473,7 @@ export interface BillingV2PlanType {
note: string | null
unit: string | null
product_key: ProductKeyUnion
- current_plan?: any
+ current_plan?: boolean | null
tiers?: BillingV2TierType[] | null
unit_amount_usd?: string
included_if?: 'no_active_subscription' | 'has_subscription' | null
diff --git a/frontend/utils.mjs b/frontend/utils.mjs
index 16ad8b984c51e..e4e9bbd5d9c70 100644
--- a/frontend/utils.mjs
+++ b/frontend/utils.mjs
@@ -194,15 +194,21 @@ function getChunks(result) {
}
export async function buildInParallel(configs, { onBuildStart, onBuildComplete } = {}) {
- await Promise.all(
- configs.map((config) =>
- buildOrWatch({
- ...config,
- onBuildStart,
- onBuildComplete,
- })
+ try {
+ await Promise.all(
+ configs.map((config) =>
+ buildOrWatch({
+ ...config,
+ onBuildStart,
+ onBuildComplete,
+ })
+ )
)
- )
+ } catch (e) {
+ if (!isDev) {
+ process.exit(1)
+ }
+ }
if (!isDev) {
process.exit(0)
@@ -338,7 +344,11 @@ export async function buildOrWatch(config) {
...buildResult.metafile,
}
} catch (e) {
- log({ success: false, name, time })
+ if (isDev) {
+ log({ success: false, name, time })
+ } else {
+ throw e
+ }
}
}
diff --git a/latest_migrations.manifest b/latest_migrations.manifest
index 9ade46300e425..f232dbc8c186c 100644
--- a/latest_migrations.manifest
+++ b/latest_migrations.manifest
@@ -2,10 +2,10 @@ admin: 0003_logentry_add_action_flag_choices
auth: 0012_alter_user_first_name_max_length
axes: 0006_remove_accesslog_trusted
contenttypes: 0002_remove_content_type_name
-ee: 0015_add_verified_properties
+ee: 0016_rolemembership_organization_member
otp_static: 0002_throttling
otp_totp: 0002_auto_20190420_0723
-posthog: 0395_alter_batchexportbackfill_end_at
+posthog: 0397_projects_backfill
sessions: 0001_initial
social_django: 0010_uid_db_index
two_factor: 0007_auto_20201201_1019
diff --git a/mypy-baseline.txt b/mypy-baseline.txt
index 1c1de99e0bdaf..c095bc7fff997 100644
--- a/mypy-baseline.txt
+++ b/mypy-baseline.txt
@@ -335,8 +335,6 @@ posthog/hogql/query.py:0: error: "SelectQuery" has no attribute "select_queries"
posthog/hogql/query.py:0: error: Subclass of "SelectQuery" and "SelectUnionQuery" cannot exist: would have incompatible method signatures [unreachable]
posthog/hogql/autocomplete.py:0: error: Unused "type: ignore" comment [unused-ignore]
posthog/hogql/autocomplete.py:0: error: Unused "type: ignore" comment [unused-ignore]
-posthog/hogql_queries/insights/trends/breakdown_values.py:0: error: Item "SelectUnionQuery" of "SelectQuery | SelectUnionQuery" has no attribute "select" [union-attr]
-posthog/hogql_queries/insights/trends/breakdown_values.py:0: error: Value of type "list[Any] | None" is not indexable [index]
posthog/hogql_queries/sessions_timeline_query_runner.py:0: error: Statement is unreachable [unreachable]
posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown_type" [union-attr]
posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown_histogram_bin_count" [union-attr]
@@ -355,9 +353,6 @@ posthog/hogql_queries/insights/trends/breakdown.py:0: error: Incompatible types
posthog/hogql_queries/insights/trends/breakdown.py:0: error: Incompatible types in assignment (expression has type "float", variable has type "int") [assignment]
posthog/hogql_queries/insights/trends/breakdown.py:0: error: Incompatible types in assignment (expression has type "str", variable has type "int") [assignment]
posthog/hogql_queries/insights/trends/breakdown.py:0: error: Incompatible types in assignment (expression has type "str", variable has type "int") [assignment]
-posthog/hogql_queries/insights/trends/breakdown.py:0: error: Unsupported operand types for + ("str" and "float") [operator]
-posthog/hogql_queries/insights/trends/breakdown.py:0: note: Left operand is of type "str | int"
-posthog/hogql_queries/insights/trends/breakdown.py:0: error: Incompatible return value type (got "list[tuple[str | int, Any | float | str | int]]", expected "list[tuple[float, float]]") [return-value]
posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown_type" [union-attr]
posthog/hogql_queries/insights/trends/breakdown.py:0: error: Item "None" of "BreakdownFilter | None" has no attribute "breakdown" [union-attr]
posthog/hogql_queries/insights/trends/breakdown.py:0: error: Argument "breakdown_field" to "get_properties_chain" has incompatible type "str | float | list[str | float] | Any | None"; expected "str" [arg-type]
@@ -678,6 +673,7 @@ posthog/queries/trends/test/test_person.py:0: error: "str" has no attribute "get
posthog/queries/trends/test/test_person.py:0: error: Invalid index type "int" for "HttpResponse"; expected type "str | bytes" [index]
posthog/queries/trends/test/test_person.py:0: error: "str" has no attribute "get" [attr-defined]
posthog/queries/trends/test/test_person.py:0: error: Invalid index type "int" for "HttpResponse"; expected type "str | bytes" [index]
+posthog/management/commands/migrate_team.py:0: error: Incompatible types in assignment (expression has type "None", variable has type "BatchExport") [assignment]
posthog/hogql/test/test_query.py:0: error: Argument 1 to "len" has incompatible type "list[Any] | None"; expected "Sized" [arg-type]
posthog/hogql/test/test_query.py:0: error: Value of type "list[QueryTiming] | None" is not indexable [index]
posthog/hogql/test/test_query.py:0: error: Value of type "list[QueryTiming] | None" is not indexable [index]
diff --git a/package.json b/package.json
index d68b4d49a4cd7..7c39d65568a7b 100644
--- a/package.json
+++ b/package.json
@@ -142,7 +142,7 @@
"pmtiles": "^2.11.0",
"postcss": "^8.4.31",
"postcss-preset-env": "^9.3.0",
- "posthog-js": "1.114.0",
+ "posthog-js": "1.115.2",
"posthog-js-lite": "2.5.0",
"prettier": "^2.8.8",
"prop-types": "^15.7.2",
diff --git a/plugin-server/functional_tests/api.ts b/plugin-server/functional_tests/api.ts
index 539fa266b8dd4..abbd770d7bb77 100644
--- a/plugin-server/functional_tests/api.ts
+++ b/plugin-server/functional_tests/api.ts
@@ -365,8 +365,18 @@ export const createTeam = async (
token?: string,
sessionRecordingOptIn = true
) => {
- const team = await insertRow(postgres, 'posthog_team', {
+ const id = Math.round(Math.random() * 1000000000)
+ await insertRow(postgres, 'posthog_project', {
+ // Every team (aka environment) must be a child of a project
+ id,
organization_id: organizationId,
+ name: 'TEST PROJECT',
+ created_at: new Date().toISOString(),
+ })
+ await insertRow(postgres, 'posthog_team', {
+ id,
+ organization_id: organizationId,
+ project_id: id,
app_urls: [],
name: 'TEST PROJECT',
event_names: [],
@@ -392,7 +402,7 @@ export const createTeam = async (
access_control: false,
slack_incoming_webhook,
})
- return team.id
+ return id
}
export const createAction = async (action: Omit, steps: Omit[]) => {
diff --git a/plugin-server/src/main/ingestion-queues/session-recording/process-event.ts b/plugin-server/src/main/ingestion-queues/session-recording/process-event.ts
index 9795bbcbeef2e..a729fb23fcff6 100644
--- a/plugin-server/src/main/ingestion-queues/session-recording/process-event.ts
+++ b/plugin-server/src/main/ingestion-queues/session-recording/process-event.ts
@@ -236,6 +236,18 @@ function isAnyMouseActivity(event: RRWebEvent) {
)
}
+/**
+ * meta event has type = 4 and event.data.href
+ * and custom events have type = 5 and _might_ have event.data.payload.href
+ *
+ * we don't really care what type of event they are just whether they have a href
+ */
+function hrefFrom(event: RRWebEvent): string | undefined {
+ const metaHref = event.data?.href?.trim()
+ const customHref = event.data?.payload?.href?.trim()
+ return metaHref || customHref || undefined
+}
+
export const createSessionReplayEvent = (
uuid: string,
team_id: number,
@@ -275,9 +287,12 @@ export const createSessionReplayEvent = (
keypressCount += 1
}
}
- if (url === null && !!event.data?.href?.trim().length) {
- url = event.data.href
+
+ const eventUrl: string | undefined = hrefFrom(event)
+ if (url === null && eventUrl) {
+ url = eventUrl
}
+
if (event.type === RRWebEventType.Plugin && event.data?.plugin === 'rrweb/console@1') {
const level = safeLevel(event.data.payload?.level)
if (level === 'info') {
diff --git a/plugin-server/tests/helpers/sql.ts b/plugin-server/tests/helpers/sql.ts
index eb167cccdb20f..8564250118550 100644
--- a/plugin-server/tests/helpers/sql.ts
+++ b/plugin-server/tests/helpers/sql.ts
@@ -224,8 +224,15 @@ export async function createUserTeamAndOrganization(
joined_at: new Date().toISOString(),
updated_at: new Date().toISOString(),
})
+ await insertRow(db, 'posthog_project', {
+ id: teamId,
+ organization_id: organizationId,
+ name: 'TEST PROJECT',
+ created_at: new Date().toISOString(),
+ })
await insertRow(db, 'posthog_team', {
id: teamId,
+ project_id: teamId,
organization_id: organizationId,
app_urls: [],
name: 'TEST PROJECT',
@@ -315,10 +322,19 @@ export const createOrganization = async (pg: PostgresRouter) => {
}
export const createTeam = async (pg: PostgresRouter, organizationId: string, token?: string) => {
- const team = await insertRow(pg, 'posthog_team', {
- // KLUDGE: auto increment IDs can be racy in tests so we ensure IDs don't clash
- id: Math.round(Math.random() * 1000000000),
+ // KLUDGE: auto increment IDs can be racy in tests so we ensure IDs don't clash
+ const id = Math.round(Math.random() * 1000000000)
+ await insertRow(pg, 'posthog_project', {
+ // Every team (aka environment) must be a child of a project
+ id,
+ organization_id: organizationId,
+ name: 'TEST PROJECT',
+ created_at: new Date().toISOString(),
+ })
+ await insertRow(pg, 'posthog_team', {
+ id,
organization_id: organizationId,
+ project_id: id,
app_urls: [],
name: 'TEST PROJECT',
event_names: [],
@@ -343,7 +359,7 @@ export const createTeam = async (pg: PostgresRouter, organizationId: string, tok
person_display_name_properties: [],
access_control: false,
})
- return team.id
+ return id
}
export const createUser = async (pg: PostgresRouter, distinctId: string) => {
diff --git a/plugin-server/tests/main/ingestion-queues/session-recording/process-event.test.ts b/plugin-server/tests/main/ingestion-queues/session-recording/process-event.test.ts
index 8bca6952603a1..d74d3a2de9e23 100644
--- a/plugin-server/tests/main/ingestion-queues/session-recording/process-event.test.ts
+++ b/plugin-server/tests/main/ingestion-queues/session-recording/process-event.test.ts
@@ -181,7 +181,44 @@ describe('session recording process event', () => {
},
},
{
- testDescription: 'first url detection',
+ testDescription: 'url can be detected in meta event',
+ snapshotData: {
+ events_summary: [
+ {
+ timestamp: 1682449093693,
+ type: 3,
+ data: {},
+ windowId: '1',
+ },
+ {
+ timestamp: 1682449093469,
+ type: 4,
+ data: {
+ href: 'http://127.0.0.1:8000/the/url',
+ },
+ windowId: '1',
+ },
+ ],
+ },
+ expected: {
+ click_count: 0,
+ keypress_count: 0,
+ mouse_activity_count: 0,
+ first_url: 'http://127.0.0.1:8000/the/url',
+ first_timestamp: '2023-04-25 18:58:13.469',
+ last_timestamp: '2023-04-25 18:58:13.693',
+ active_milliseconds: 0, // no data.source, so no activity
+ console_log_count: 0,
+ console_warn_count: 0,
+ console_error_count: 0,
+ size: 163,
+ event_count: 2,
+ message_count: 1,
+ snapshot_source: 'web',
+ },
+ },
+ {
+ testDescription: 'first url detection takes the first url whether meta url or payload url',
snapshotData: {
events_summary: [
{
@@ -189,7 +226,6 @@ describe('session recording process event', () => {
type: 5,
data: {
payload: {
- // doesn't match because href is nested in payload
href: 'http://127.0.0.1:8000/home',
},
},
@@ -209,7 +245,7 @@ describe('session recording process event', () => {
click_count: 0,
keypress_count: 0,
mouse_activity_count: 0,
- first_url: 'http://127.0.0.1:8000/second/url',
+ first_url: 'http://127.0.0.1:8000/home',
first_timestamp: '2023-04-25 18:58:13.469',
last_timestamp: '2023-04-25 18:58:13.693',
active_milliseconds: 0, // no data.source, so no activity
@@ -222,6 +258,51 @@ describe('session recording process event', () => {
snapshot_source: 'web',
},
},
+ {
+ testDescription: 'first url detection can use payload url',
+ snapshotData: {
+ events_summary: [
+ {
+ timestamp: 1682449093469,
+ type: 5,
+ data: {
+ payload: {
+ // we don't read just any URL
+ 'the-page-url': 'http://127.0.0.1:8000/second/url',
+ },
+ },
+ windowId: '1',
+ },
+ {
+ timestamp: 1682449093693,
+ type: 5,
+ data: {
+ payload: {
+ // matches href nested in payload
+ href: 'http://127.0.0.1:8000/my-spa',
+ },
+ },
+ windowId: '1',
+ },
+ ],
+ },
+ expected: {
+ click_count: 0,
+ keypress_count: 0,
+ mouse_activity_count: 0,
+ first_url: 'http://127.0.0.1:8000/my-spa',
+ first_timestamp: '2023-04-25 18:58:13.469',
+ last_timestamp: '2023-04-25 18:58:13.693',
+ active_milliseconds: 0, // no data.source, so no activity
+ console_log_count: 0,
+ console_warn_count: 0,
+ console_error_count: 0,
+ size: 235,
+ event_count: 2,
+ message_count: 1,
+ snapshot_source: 'web',
+ },
+ },
{
testDescription: 'negative timestamps are not included when picking timestamps',
snapshotData: {
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index 1d62cff364522..78423f993f9b0 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -245,8 +245,8 @@ dependencies:
specifier: ^9.3.0
version: 9.3.0(postcss@8.4.31)
posthog-js:
- specifier: 1.114.0
- version: 1.114.0
+ specifier: 1.115.2
+ version: 1.115.2
posthog-js-lite:
specifier: 2.5.0
version: 2.5.0
@@ -17252,8 +17252,8 @@ packages:
resolution: {integrity: sha512-Urvlp0Vu9h3td0BVFWt0QXFJDoOZcaAD83XM9d91NKMKTVPZtfU0ysoxstIf5mw/ce9ZfuMgpWPaagrZI4rmSg==}
dev: false
- /posthog-js@1.114.0:
- resolution: {integrity: sha512-5Xu4ZlVy/azChKdaXueM4nwWufld5qEcOecbSttWKE/lnysQEwBwgP7O+1qzNkKpQZSdOhoujd5C2/r9SVhcOA==}
+ /posthog-js@1.115.2:
+ resolution: {integrity: sha512-nGTxDjH8df0FTd1plIqKFsmSynkkI/LmvYlJP7sqeKvtXhcQpVi4+avMhNWIasoWvyQbp65hmvwXyXyQ7jk2cw==}
dependencies:
fflate: 0.4.8
preact: 10.19.6
diff --git a/posthog/admin.py b/posthog/admin.py
deleted file mode 100644
index 4e541310166dc..0000000000000
--- a/posthog/admin.py
+++ /dev/null
@@ -1,677 +0,0 @@
-import json
-
-from django.conf import settings
-from django.contrib import admin
-from django.contrib.auth.admin import UserAdmin as DjangoUserAdmin
-from django.contrib.auth.forms import UserChangeForm as DjangoUserChangeForm
-from django.contrib.auth.tokens import default_token_generator
-from django.utils.html import format_html
-from django.utils.translation import gettext_lazy as _
-from django_otp.plugins.otp_totp.models import TOTPDevice
-
-from posthog.models import (
- Action,
- AsyncDeletion,
- Cohort,
- Dashboard,
- DashboardTile,
- Experiment,
- FeatureFlag,
- GroupTypeMapping,
- Insight,
- InstanceSetting,
- Organization,
- OrganizationMembership,
- Person,
- PersonDistinctId,
- Plugin,
- PluginAttachment,
- PluginConfig,
- Survey,
- Team,
- Text,
- User,
-)
-from posthog.warehouse.models import DataWarehouseTable
-
-
-class DashboardTileInline(admin.TabularInline):
- extra = 0
- model = DashboardTile
- autocomplete_fields = ("insight", "text")
- readonly_fields = ("filters_hash",)
-
-
-class TOTPDeviceInline(admin.TabularInline):
- model = TOTPDevice
- extra = 0
-
-
-@admin.register(Dashboard)
-class DashboardAdmin(admin.ModelAdmin):
- list_display = (
- "id",
- "name",
- "team_link",
- "organization_link",
- "created_at",
- "created_by",
- )
- list_display_links = ("id", "name")
- list_select_related = ("team", "team__organization")
- search_fields = ("id", "name", "team__name", "team__organization__name")
- readonly_fields = (
- "last_accessed_at",
- "deprecated_tags",
- "deprecated_tags_v2",
- "share_token",
- )
- autocomplete_fields = ("team", "created_by")
- ordering = ("-created_at", "creation_mode")
- inlines = (DashboardTileInline,)
-
- def team_link(self, dashboard: Dashboard):
- return format_html(
- '{}',
- dashboard.team.pk,
- dashboard.team.name,
- )
-
- def organization_link(self, dashboard: Dashboard):
- return format_html(
- '{}',
- dashboard.team.organization.pk,
- dashboard.team.organization.name,
- )
-
-
-@admin.register(DataWarehouseTable)
-class DataWarehouseTableAdmin(admin.ModelAdmin):
- list_display = (
- "id",
- "name",
- "format",
- "url_pattern",
- "team_link",
- "organization_link",
- "created_at",
- "created_by",
- )
- list_display_links = ("id", "name")
- list_select_related = ("team", "team__organization")
- search_fields = ("id", "name", "team__name", "team__organization__name")
- autocomplete_fields = ("team", "created_by")
- ordering = ("-created_at",)
-
- def team_link(self, dashboard: Dashboard):
- return format_html(
- '{}',
- dashboard.team.pk,
- dashboard.team.name,
- )
-
- def organization_link(self, dashboard: Dashboard):
- return format_html(
- '{}',
- dashboard.team.organization.pk,
- dashboard.team.organization.name,
- )
-
-
-@admin.register(Text)
-class TextAdmin(admin.ModelAdmin):
- autocomplete_fields = ("created_by", "last_modified_by", "team")
- search_fields = ("id", "body", "team__name", "team__organization__name")
-
-
-@admin.register(Insight)
-class InsightAdmin(admin.ModelAdmin):
- list_display = (
- "id",
- "short_id",
- "effective_name",
- "team_link",
- "organization_link",
- "created_at",
- "created_by",
- )
- list_display_links = ("id", "short_id", "effective_name")
- list_select_related = ("team", "team__organization")
- search_fields = ("id", "name", "short_id", "team__name", "team__organization__name")
- readonly_fields = ("deprecated_tags", "deprecated_tags_v2", "dive_dashboard")
- autocomplete_fields = ("team", "dashboard", "created_by", "last_modified_by")
- ordering = ("-created_at",)
-
- def effective_name(self, insight: Insight):
- return insight.name or format_html("{}>", insight.derived_name)
-
- def team_link(self, insight: Insight):
- return format_html(
- '{}',
- insight.team.pk,
- insight.team.name,
- )
-
- def organization_link(self, insight: Insight):
- return format_html(
- '{}',
- insight.team.organization.pk,
- insight.team.organization.name,
- )
-
-
-@admin.register(Plugin)
-class PluginAdmin(admin.ModelAdmin):
- list_display = ("id", "name", "organization_id", "is_global")
- list_display_links = ("id", "name")
- list_filter = ("plugin_type", "is_global")
- autocomplete_fields = ("organization",)
- search_fields = ("name",)
- ordering = ("-created_at",)
-
-
-class ActionInline(admin.TabularInline):
- extra = 0
- model = Action
- classes = ("collapse",)
- autocomplete_fields = ("created_by",)
-
-
-class GroupTypeMappingInline(admin.TabularInline):
- extra = 0
- model = GroupTypeMapping
- fields = ("group_type_index", "group_type", "name_singular", "name_plural")
- readonly_fields = fields
- classes = ("collapse",)
- max_num = 5
- min_num = 5
-
-
-@admin.register(Cohort)
-class CohortAdmin(admin.ModelAdmin):
- list_display = (
- "id",
- "name",
- "team_link",
- "created_at",
- "created_by",
- )
- list_display_links = ("id", "name")
- list_select_related = ("team", "team__organization")
- search_fields = ("id", "name", "team__name", "team__organization__name")
- autocomplete_fields = ("team", "created_by")
- ordering = ("-created_at",)
-
- def team_link(self, cohort: Cohort):
- return format_html(
- '{}',
- cohort.team.pk,
- cohort.team.name,
- )
-
-
-@admin.register(FeatureFlag)
-class FeatureFlagAdmin(admin.ModelAdmin):
- list_display = (
- "id",
- "key",
- "team_link",
- "created_at",
- "created_by",
- )
- list_display_links = ("id", "key")
- list_select_related = ("team", "team__organization")
- search_fields = ("id", "key", "team__name", "team__organization__name")
- autocomplete_fields = ("team", "created_by")
- ordering = ("-created_at",)
-
- def team_link(self, flag: FeatureFlag):
- return format_html(
- '{}',
- flag.team.pk,
- flag.team.name,
- )
-
-
-@admin.register(Experiment)
-class ExperimentAdmin(admin.ModelAdmin):
- list_display = (
- "id",
- "name",
- "team_link",
- "created_at",
- "created_by",
- )
- list_display_links = ("id", "name")
- list_select_related = ("team", "team__organization")
- search_fields = ("id", "name", "team__name", "team__organization__name")
- autocomplete_fields = ("team", "created_by")
- ordering = ("-created_at",)
-
- def team_link(self, experiment: Experiment):
- return format_html(
- '{}',
- experiment.team.pk,
- experiment.team.name,
- )
-
-
-@admin.register(Survey)
-class SurveyAdmin(admin.ModelAdmin):
- list_display = (
- "id",
- "name",
- "team_link",
- "created_at",
- "created_by",
- )
- list_display_links = ("id", "name")
- list_select_related = ("team", "team__organization")
- search_fields = ("id", "name", "team__name", "team__organization__name")
- autocomplete_fields = ("team", "created_by")
- ordering = ("-created_at",)
-
- def team_link(self, experiment: Experiment):
- return format_html(
- '{}',
- experiment.team.pk,
- experiment.team.name,
- )
-
-
-@admin.register(Team)
-class TeamAdmin(admin.ModelAdmin):
- list_display = (
- "id",
- "name",
- "organization_link",
- "organization_id",
- "created_at",
- "updated_at",
- )
- list_display_links = ("id", "name")
- list_select_related = ("organization",)
- search_fields = (
- "id",
- "name",
- "organization__id",
- "organization__name",
- "api_token",
- )
- readonly_fields = ["organization", "primary_dashboard", "test_account_filters"]
- inlines = [GroupTypeMappingInline, ActionInline]
- fieldsets = [
- (
- None,
- {
- "fields": [
- "name",
- "organization",
- ],
- },
- ),
- (
- "General",
- {
- "classes": ["collapse"],
- "fields": [
- "api_token",
- "timezone",
- "slack_incoming_webhook",
- "primary_dashboard",
- ],
- },
- ),
- (
- "Onboarding",
- {
- "classes": ["collapse"],
- "fields": [
- "is_demo",
- "completed_snippet_onboarding",
- "ingested_event",
- "signup_token",
- ],
- },
- ),
- (
- "Settings",
- {
- "classes": ["collapse"],
- "fields": [
- "anonymize_ips",
- "autocapture_opt_out",
- "autocapture_exceptions_opt_in",
- "session_recording_opt_in",
- "capture_console_log_opt_in",
- "capture_performance_opt_in",
- "session_recording_sample_rate",
- "session_recording_minimum_duration_milliseconds",
- "session_recording_linked_flag",
- "data_attributes",
- "session_recording_version",
- "access_control",
- "inject_web_apps",
- "extra_settings",
- ],
- },
- ),
- (
- "Filters",
- {
- "classes": ["collapse"],
- "fields": [
- "test_account_filters",
- "test_account_filters_default_checked",
- "path_cleaning_filters",
- ],
- },
- ),
- ]
-
- def organization_link(self, team: Team):
- return format_html(
- '{}',
- team.organization.pk,
- team.organization.name,
- )
-
-
-ATTACHMENT_PREVIEW_SIZE_LIMIT_BYTES = 1024 * 1024
-
-
-class PluginAttachmentInline(admin.StackedInline):
- extra = 0
- model = PluginAttachment
- fields = ("key", "content_type", "file_size", "raw_contents", "json_contents")
- readonly_fields = fields
-
- def raw_contents(self, attachment: PluginAttachment):
- try:
- if attachment.file_size > ATTACHMENT_PREVIEW_SIZE_LIMIT_BYTES:
- raise ValueError(
- f"file size {attachment.file_size} is larger than {ATTACHMENT_PREVIEW_SIZE_LIMIT_BYTES} bytes"
- )
- return attachment.contents.tobytes()
- except Exception as err:
- return format_html(f"cannot preview: {err}")
-
- def json_contents(self, attachment: PluginAttachment):
- try:
- if attachment.file_size > ATTACHMENT_PREVIEW_SIZE_LIMIT_BYTES:
- raise ValueError(
- f"file size {attachment.file_size} is larger than {ATTACHMENT_PREVIEW_SIZE_LIMIT_BYTES} bytes"
- )
- return json.loads(attachment.contents.tobytes())
- except Exception as err:
- return format_html(f"cannot preview: {err}")
-
- def has_add_permission(self, request, obj):
- return False
-
- def has_change_permission(self, request, obj):
- return False
-
- def has_delete_permission(self, request, obj):
- return False
-
-
-@admin.register(PluginConfig)
-class PluginConfigAdmin(admin.ModelAdmin):
- list_select_related = ("plugin", "team")
- list_display = ("id", "plugin_name", "team_name", "enabled")
- list_display_links = ("id", "plugin_name")
- list_filter = (
- ("enabled", admin.BooleanFieldListFilter),
- ("deleted", admin.BooleanFieldListFilter),
- ("updated_at", admin.DateFieldListFilter),
- ("plugin", admin.RelatedOnlyFieldListFilter),
- )
- list_select_related = ("team", "plugin")
- search_fields = ("team__name", "team__organization__name", "plugin__name")
- ordering = ("-created_at",)
- inlines = [PluginAttachmentInline]
-
- def plugin_name(self, config: PluginConfig):
- return format_html(f"{config.plugin.name} ({config.plugin_id})")
-
- def team_name(self, config: PluginConfig):
- return format_html(f"{config.team.name} ({config.team_id})")
-
-
-class UserChangeForm(DjangoUserChangeForm):
- def __init__(self, *args, **kwargs):
- super().__init__(*args, **kwargs)
- # This is a riff on https://github.com/django/django/blob/stable/4.1.x/django/contrib/auth/forms.py#L151-L153.
- # The difference from the Django default is that instead of a form where the _admin_ sets the new password,
- # we have a link to the password reset page which the _user_ can use themselves.
- # This way if some user needs to reset their password and there's a problem with receiving the reset link email,
- # an admin can provide that reset link manually – much better than sending a new password in plain text.
- password_reset_token = default_token_generator.make_token(self.instance)
- self.fields["password"].help_text = (
- "Raw passwords are not stored, so there is no way to see this user’s password, but you can send them "
- f'this password reset link '
- "(it only works when logged out)."
- )
-
-
-class OrganizationMemberInline(admin.TabularInline):
- extra = 0
- model = OrganizationMembership
- readonly_fields = ("user", "joined_at", "updated_at")
- autocomplete_fields = ("user", "organization")
-
-
-@admin.register(User)
-class UserAdmin(DjangoUserAdmin):
- """Define admin model for custom User model with no email field."""
-
- form = UserChangeForm
- change_password_form = None # This view is not exposed in our subclass of UserChangeForm
- change_form_template = "loginas/change_form.html"
-
- inlines = [OrganizationMemberInline, TOTPDeviceInline]
- fieldsets = (
- (
- None,
- {
- "fields": (
- "email",
- "password",
- "current_organization",
- "is_email_verified",
- "pending_email",
- "strapi_id",
- )
- },
- ),
- (_("Personal info"), {"fields": ("first_name", "last_name")}),
- (_("Permissions"), {"fields": ("is_active", "is_staff")}),
- (_("Important dates"), {"fields": ("last_login", "date_joined")}),
- (_("Toolbar authentication"), {"fields": ("temporary_token",)}),
- )
- add_fieldsets = ((None, {"classes": ("wide",), "fields": ("email", "password1", "password2")}),)
- list_display = (
- "id",
- "email",
- "first_name",
- "last_name",
- "current_team_link",
- "current_organization_link",
- "is_staff",
- )
- list_display_links = ("id", "email")
- list_filter = ("is_staff", "is_active", "groups")
- list_select_related = ("current_team", "current_organization")
- search_fields = ("email", "first_name", "last_name")
- readonly_fields = ["current_team", "current_organization"]
- ordering = ("email",)
-
- def current_team_link(self, user: User):
- if not user.team:
- return "–"
-
- return format_html(
- '{}',
- user.team.pk,
- user.team.name,
- )
-
- def current_organization_link(self, user: User):
- if not user.organization:
- return "–"
-
- return format_html(
- '{}',
- user.organization.pk,
- user.organization.name,
- )
-
-
-class OrganizationTeamInline(admin.TabularInline):
- extra = 0
- model = Team
-
- fields = (
- "id",
- "displayed_name",
- "api_token",
- "app_urls",
- "name",
- "created_at",
- "updated_at",
- "anonymize_ips",
- "completed_snippet_onboarding",
- "ingested_event",
- "session_recording_opt_in",
- "autocapture_opt_out",
- "signup_token",
- "is_demo",
- "access_control",
- "test_account_filters",
- "path_cleaning_filters",
- "timezone",
- "data_attributes",
- "correlation_config",
- "plugins_opt_in",
- "opt_out_capture",
- )
- readonly_fields = ("id", "displayed_name", "created_at", "updated_at")
-
- def displayed_name(self, team: Team):
- return format_html(
- '{}. {}',
- team.pk,
- team.pk,
- team.name,
- )
-
-
-@admin.register(Organization)
-class OrganizationAdmin(admin.ModelAdmin):
- date_hierarchy = "created_at"
- fields = [
- "name",
- "created_at",
- "updated_at",
- "plugins_access_level",
- "billing_link_v2",
- "usage_posthog",
- "usage",
- "customer_trust_scores",
- "is_hipaa",
- ]
- inlines = [OrganizationTeamInline, OrganizationMemberInline]
- readonly_fields = ["created_at", "updated_at", "billing_link_v2", "usage_posthog", "usage", "customer_trust_scores"]
- search_fields = ("name", "members__email", "team__api_token")
- list_display = (
- "id",
- "name",
- "created_at",
- "plugins_access_level",
- "members_count",
- "first_member",
- "billing_link_v2",
- )
- list_display_links = (
- "id",
- "name",
- )
-
- def members_count(self, organization: Organization):
- return organization.members.count()
-
- def first_member(self, organization: Organization):
- user = organization.members.order_by("id").first()
- return (
- format_html(f'{user.email}')
- if user is not None
- else "None"
- )
-
- def billing_link_v2(self, organization: Organization) -> str:
- url = f"{settings.BILLING_SERVICE_URL}/admin/billing/customer/?q={organization.pk}"
- return format_html(f'Billing V2 →')
-
- def usage_posthog(self, organization: Organization):
- return format_html(
- 'See usage on PostHog →',
- organization.id,
- )
-
-
-class OrganizationBillingAdmin(admin.ModelAdmin):
- search_fields = ("name", "members__email")
-
-
-@admin.register(InstanceSetting)
-class InstanceSettingAdmin(admin.ModelAdmin):
- list_display = (
- "id",
- "key",
- "value",
- )
-
-
-@admin.register(Person)
-class PersonAdmin(admin.ModelAdmin):
- list_display = (
- "id",
- "distinct_ids",
- "created_at",
- "team",
- "is_user",
- "is_identified",
- "version",
- )
- list_filter = ("created_at", "is_identified", "version")
- search_fields = ("id",)
-
-
-@admin.register(PersonDistinctId)
-class PersonDistinctIdAdmin(admin.ModelAdmin):
- list_display = ("id", "team", "distinct_id", "version")
- list_filter = ("version",)
- search_fields = ("id", "distinct_id")
-
-
-@admin.register(AsyncDeletion)
-class AsyncDeletionAdmin(admin.ModelAdmin):
- list_display = (
- "id",
- "deletion_type",
- "group_type_index",
- "team_id",
- "key",
- "created_by",
- "created_at",
- "delete_verified_at",
- )
- list_filter = ("deletion_type", "delete_verified_at")
- search_fields = ("key",)
-
- def has_add_permission(self, request, obj=None):
- return False
-
- def has_change_permission(self, request, obj=None):
- return False
diff --git a/posthog/admin/__init__.py b/posthog/admin/__init__.py
new file mode 100644
index 0000000000000..41dc6d7efae08
--- /dev/null
+++ b/posthog/admin/__init__.py
@@ -0,0 +1,64 @@
+from django.contrib import admin
+
+from posthog.admin.admins import (
+ OrganizationAdmin,
+ UserAdmin,
+ TeamAdmin,
+ DashboardAdmin,
+ InsightAdmin,
+ ExperimentAdmin,
+ FeatureFlagAdmin,
+ AsyncDeletionAdmin,
+ InstanceSettingAdmin,
+ PluginConfigAdmin,
+ PluginAdmin,
+ TextAdmin,
+ CohortAdmin,
+ PersonAdmin,
+ PersonDistinctIdAdmin,
+ SurveyAdmin,
+ DataWarehouseTableAdmin,
+)
+from posthog.models import (
+ Organization,
+ User,
+ Team,
+ Dashboard,
+ Insight,
+ Experiment,
+ FeatureFlag,
+ AsyncDeletion,
+ InstanceSetting,
+ PluginConfig,
+ Plugin,
+ Text,
+ Cohort,
+ Person,
+ PersonDistinctId,
+ Survey,
+ DataWarehouseTable,
+)
+
+admin.site.register(Organization, OrganizationAdmin)
+admin.site.register(User, UserAdmin)
+admin.site.register(Team, TeamAdmin)
+
+admin.site.register(Dashboard, DashboardAdmin)
+admin.site.register(Insight, InsightAdmin)
+
+admin.site.register(Experiment, ExperimentAdmin)
+admin.site.register(FeatureFlag, FeatureFlagAdmin)
+
+admin.site.register(AsyncDeletion, AsyncDeletionAdmin)
+admin.site.register(InstanceSetting, InstanceSettingAdmin)
+admin.site.register(PluginConfig, PluginConfigAdmin)
+admin.site.register(Plugin, PluginAdmin)
+admin.site.register(Text, TextAdmin)
+
+admin.site.register(Cohort, CohortAdmin)
+admin.site.register(Person, PersonAdmin)
+admin.site.register(PersonDistinctId, PersonDistinctIdAdmin)
+
+admin.site.register(Survey, SurveyAdmin)
+
+admin.site.register(DataWarehouseTable, DataWarehouseTableAdmin)
diff --git a/posthog/admin/admins/__init__.py b/posthog/admin/admins/__init__.py
new file mode 100644
index 0000000000000..e3b6d0ec36a02
--- /dev/null
+++ b/posthog/admin/admins/__init__.py
@@ -0,0 +1,17 @@
+from .async_deletion_admin import AsyncDeletionAdmin
+from .cohort_admin import CohortAdmin
+from .dashboard_admin import DashboardAdmin
+from .data_warehouse_table_admin import DataWarehouseTableAdmin
+from .experiment_admin import ExperimentAdmin
+from .feature_flag_admin import FeatureFlagAdmin
+from .insight_admin import InsightAdmin
+from .instance_setting_admin import InstanceSettingAdmin
+from .organization_admin import OrganizationAdmin
+from .person_admin import PersonAdmin
+from .person_distinct_id_admin import PersonDistinctIdAdmin
+from .plugin_admin import PluginAdmin
+from .plugin_config_admin import PluginConfigAdmin
+from .survey_admin import SurveyAdmin
+from .team_admin import TeamAdmin
+from .text_admin import TextAdmin
+from .user_admin import UserAdmin
diff --git a/posthog/admin/admins/async_deletion_admin.py b/posthog/admin/admins/async_deletion_admin.py
new file mode 100644
index 0000000000000..a53556d9aae23
--- /dev/null
+++ b/posthog/admin/admins/async_deletion_admin.py
@@ -0,0 +1,22 @@
+from django.contrib import admin
+
+
+class AsyncDeletionAdmin(admin.ModelAdmin):
+ list_display = (
+ "id",
+ "deletion_type",
+ "group_type_index",
+ "team_id",
+ "key",
+ "created_by",
+ "created_at",
+ "delete_verified_at",
+ )
+ list_filter = ("deletion_type", "delete_verified_at")
+ search_fields = ("key",)
+
+ def has_add_permission(self, request, obj=None):
+ return False
+
+ def has_change_permission(self, request, obj=None):
+ return False
diff --git a/posthog/admin/admins/cohort_admin.py b/posthog/admin/admins/cohort_admin.py
new file mode 100644
index 0000000000000..11408e607fdb0
--- /dev/null
+++ b/posthog/admin/admins/cohort_admin.py
@@ -0,0 +1,26 @@
+from django.contrib import admin
+from django.utils.html import format_html
+
+from posthog.models import Cohort
+
+
+class CohortAdmin(admin.ModelAdmin):
+ list_display = (
+ "id",
+ "name",
+ "team_link",
+ "created_at",
+ "created_by",
+ )
+ list_display_links = ("id", "name")
+ list_select_related = ("team", "team__organization")
+ search_fields = ("id", "name", "team__name", "team__organization__name")
+ autocomplete_fields = ("team", "created_by")
+ ordering = ("-created_at",)
+
+ def team_link(self, cohort: Cohort):
+ return format_html(
+ '{}',
+ cohort.team.pk,
+ cohort.team.name,
+ )
diff --git a/posthog/admin/admins/dashboard_admin.py b/posthog/admin/admins/dashboard_admin.py
new file mode 100644
index 0000000000000..5ace63e094e11
--- /dev/null
+++ b/posthog/admin/admins/dashboard_admin.py
@@ -0,0 +1,48 @@
+from django.contrib import admin
+from django.utils.html import format_html
+
+from posthog.models import Dashboard, DashboardTile
+
+
+class DashboardTileInline(admin.TabularInline):
+ extra = 0
+ model = DashboardTile
+ autocomplete_fields = ("insight", "text")
+ readonly_fields = ("filters_hash",)
+
+
+class DashboardAdmin(admin.ModelAdmin):
+ list_display = (
+ "id",
+ "name",
+ "team_link",
+ "organization_link",
+ "created_at",
+ "created_by",
+ )
+ list_display_links = ("id", "name")
+ list_select_related = ("team", "team__organization")
+ search_fields = ("id", "name", "team__name", "team__organization__name")
+ readonly_fields = (
+ "last_accessed_at",
+ "deprecated_tags",
+ "deprecated_tags_v2",
+ "share_token",
+ )
+ autocomplete_fields = ("team", "created_by")
+ ordering = ("-created_at", "creation_mode")
+ inlines = (DashboardTileInline,)
+
+ def team_link(self, dashboard: Dashboard):
+ return format_html(
+ '{}',
+ dashboard.team.pk,
+ dashboard.team.name,
+ )
+
+ def organization_link(self, dashboard: Dashboard):
+ return format_html(
+ '{}',
+ dashboard.team.organization.pk,
+ dashboard.team.organization.name,
+ )
diff --git a/posthog/admin/admins/data_warehouse_table_admin.py b/posthog/admin/admins/data_warehouse_table_admin.py
new file mode 100644
index 0000000000000..f3e522cf73b7f
--- /dev/null
+++ b/posthog/admin/admins/data_warehouse_table_admin.py
@@ -0,0 +1,36 @@
+from django.contrib import admin
+from django.utils.html import format_html
+
+from posthog.models import Dashboard
+
+
+class DataWarehouseTableAdmin(admin.ModelAdmin):
+ list_display = (
+ "id",
+ "name",
+ "format",
+ "url_pattern",
+ "team_link",
+ "organization_link",
+ "created_at",
+ "created_by",
+ )
+ list_display_links = ("id", "name")
+ list_select_related = ("team", "team__organization")
+ search_fields = ("id", "name", "team__name", "team__organization__name")
+ autocomplete_fields = ("team", "created_by")
+ ordering = ("-created_at",)
+
+ def team_link(self, dashboard: Dashboard):
+ return format_html(
+ '{}',
+ dashboard.team.pk,
+ dashboard.team.name,
+ )
+
+ def organization_link(self, dashboard: Dashboard):
+ return format_html(
+ '{}',
+ dashboard.team.organization.pk,
+ dashboard.team.organization.name,
+ )
diff --git a/posthog/admin/admins/experiment_admin.py b/posthog/admin/admins/experiment_admin.py
new file mode 100644
index 0000000000000..85d187a17d4e8
--- /dev/null
+++ b/posthog/admin/admins/experiment_admin.py
@@ -0,0 +1,26 @@
+from django.contrib import admin
+from django.utils.html import format_html
+
+from posthog.models import Experiment
+
+
+class ExperimentAdmin(admin.ModelAdmin):
+ list_display = (
+ "id",
+ "name",
+ "team_link",
+ "created_at",
+ "created_by",
+ )
+ list_display_links = ("id", "name")
+ list_select_related = ("team", "team__organization")
+ search_fields = ("id", "name", "team__name", "team__organization__name")
+ autocomplete_fields = ("team", "created_by")
+ ordering = ("-created_at",)
+
+ def team_link(self, experiment: Experiment):
+ return format_html(
+ '{}',
+ experiment.team.pk,
+ experiment.team.name,
+ )
diff --git a/posthog/admin/admins/feature_flag_admin.py b/posthog/admin/admins/feature_flag_admin.py
new file mode 100644
index 0000000000000..81be2d47bc66a
--- /dev/null
+++ b/posthog/admin/admins/feature_flag_admin.py
@@ -0,0 +1,26 @@
+from django.contrib import admin
+from django.utils.html import format_html
+
+from posthog.models import FeatureFlag
+
+
+class FeatureFlagAdmin(admin.ModelAdmin):
+ list_display = (
+ "id",
+ "key",
+ "team_link",
+ "created_at",
+ "created_by",
+ )
+ list_display_links = ("id", "key")
+ list_select_related = ("team", "team__organization")
+ search_fields = ("id", "key", "team__name", "team__organization__name")
+ autocomplete_fields = ("team", "created_by")
+ ordering = ("-created_at",)
+
+ def team_link(self, flag: FeatureFlag):
+ return format_html(
+ '{}',
+ flag.team.pk,
+ flag.team.name,
+ )
diff --git a/posthog/admin/admins/insight_admin.py b/posthog/admin/admins/insight_admin.py
new file mode 100644
index 0000000000000..0de2bd2688d6b
--- /dev/null
+++ b/posthog/admin/admins/insight_admin.py
@@ -0,0 +1,39 @@
+from django.contrib import admin
+from django.utils.html import format_html
+
+from posthog.models import Insight
+
+
+class InsightAdmin(admin.ModelAdmin):
+ list_display = (
+ "id",
+ "short_id",
+ "effective_name",
+ "team_link",
+ "organization_link",
+ "created_at",
+ "created_by",
+ )
+ list_display_links = ("id", "short_id", "effective_name")
+ list_select_related = ("team", "team__organization")
+ search_fields = ("id", "name", "short_id", "team__name", "team__organization__name")
+ readonly_fields = ("deprecated_tags", "deprecated_tags_v2", "dive_dashboard")
+ autocomplete_fields = ("team", "dashboard", "created_by", "last_modified_by")
+ ordering = ("-created_at",)
+
+ def effective_name(self, insight: Insight):
+ return insight.name or format_html("{}>", insight.derived_name)
+
+ def team_link(self, insight: Insight):
+ return format_html(
+ '{}',
+ insight.team.pk,
+ insight.team.name,
+ )
+
+ def organization_link(self, insight: Insight):
+ return format_html(
+ '{}',
+ insight.team.organization.pk,
+ insight.team.organization.name,
+ )
diff --git a/posthog/admin/admins/instance_setting_admin.py b/posthog/admin/admins/instance_setting_admin.py
new file mode 100644
index 0000000000000..f24eb77096066
--- /dev/null
+++ b/posthog/admin/admins/instance_setting_admin.py
@@ -0,0 +1,9 @@
+from django.contrib import admin
+
+
+class InstanceSettingAdmin(admin.ModelAdmin):
+ list_display = (
+ "id",
+ "key",
+ "value",
+ )
diff --git a/posthog/admin/admins/organization_admin.py b/posthog/admin/admins/organization_admin.py
new file mode 100644
index 0000000000000..a0a3c6ea9c51e
--- /dev/null
+++ b/posthog/admin/admins/organization_admin.py
@@ -0,0 +1,61 @@
+from django.conf import settings
+from django.contrib import admin
+from django.utils.html import format_html
+from posthog.admin.inlines.organization_member_inline import OrganizationMemberInline
+from posthog.admin.inlines.organization_team_inline import OrganizationTeamInline
+from posthog.admin.paginators.no_count_paginator import NoCountPaginator
+
+from posthog.models.organization import Organization
+
+
+class OrganizationAdmin(admin.ModelAdmin):
+ show_full_result_count = False # prevent count() queries to show the no of filtered results
+ paginator = NoCountPaginator # prevent count() queries and return a fix page count instead
+ fields = [
+ "name",
+ "created_at",
+ "updated_at",
+ "plugins_access_level",
+ "billing_link_v2",
+ "usage_posthog",
+ "usage",
+ "customer_trust_scores",
+ "is_hipaa",
+ ]
+ inlines = [OrganizationTeamInline, OrganizationMemberInline]
+ readonly_fields = ["created_at", "updated_at", "billing_link_v2", "usage_posthog", "usage", "customer_trust_scores"]
+ search_fields = ("name", "members__email", "team__api_token")
+ list_display = (
+ "id",
+ "name",
+ "created_at",
+ "plugins_access_level",
+ "members_count",
+ "first_member",
+ "billing_link_v2",
+ )
+ list_display_links = (
+ "id",
+ "name",
+ )
+
+ def members_count(self, organization: Organization):
+ return organization.members.count()
+
+ def first_member(self, organization: Organization):
+ user = organization.members.order_by("id").first()
+ return (
+ format_html(f'{user.email}')
+ if user is not None
+ else "None"
+ )
+
+ def billing_link_v2(self, organization: Organization) -> str:
+ url = f"{settings.BILLING_SERVICE_URL}/admin/billing/customer/?q={organization.pk}"
+ return format_html(f'Billing V2 →')
+
+ def usage_posthog(self, organization: Organization):
+ return format_html(
+ 'See usage on PostHog →',
+ organization.id,
+ )
diff --git a/posthog/admin/admins/person_admin.py b/posthog/admin/admins/person_admin.py
new file mode 100644
index 0000000000000..79ac775787738
--- /dev/null
+++ b/posthog/admin/admins/person_admin.py
@@ -0,0 +1,19 @@
+from django.contrib import admin
+
+from posthog.admin.paginators.no_count_paginator import NoCountPaginator
+
+
+class PersonAdmin(admin.ModelAdmin):
+ show_full_result_count = False # prevent count() queries to show the no of filtered results
+ paginator = NoCountPaginator # prevent count() queries and return a fix page count instead
+ list_display = (
+ "id",
+ "distinct_ids",
+ "created_at",
+ "team",
+ "is_user",
+ "is_identified",
+ "version",
+ )
+ list_filter = ("created_at", "is_identified", "version")
+ search_fields = ("id",)
diff --git a/posthog/admin/admins/person_distinct_id_admin.py b/posthog/admin/admins/person_distinct_id_admin.py
new file mode 100644
index 0000000000000..93eb08d7b2fd5
--- /dev/null
+++ b/posthog/admin/admins/person_distinct_id_admin.py
@@ -0,0 +1,11 @@
+from django.contrib import admin
+
+from posthog.admin.paginators.no_count_paginator import NoCountPaginator
+
+
+class PersonDistinctIdAdmin(admin.ModelAdmin):
+ show_full_result_count = False # prevent count() queries to show the no of filtered results
+ paginator = NoCountPaginator # prevent count() queries and return a fix page count instead
+ list_display = ("id", "team", "distinct_id", "version")
+ list_filter = ("version",)
+ search_fields = ("id", "distinct_id")
diff --git a/posthog/admin/admins/plugin_admin.py b/posthog/admin/admins/plugin_admin.py
new file mode 100644
index 0000000000000..48875af32cc57
--- /dev/null
+++ b/posthog/admin/admins/plugin_admin.py
@@ -0,0 +1,10 @@
+from django.contrib import admin
+
+
+class PluginAdmin(admin.ModelAdmin):
+ list_display = ("id", "name", "organization_id", "is_global")
+ list_display_links = ("id", "name")
+ list_filter = ("plugin_type", "is_global")
+ autocomplete_fields = ("organization",)
+ search_fields = ("name",)
+ ordering = ("-created_at",)
diff --git a/posthog/admin/admins/plugin_config_admin.py b/posthog/admin/admins/plugin_config_admin.py
new file mode 100644
index 0000000000000..51e54c9237c28
--- /dev/null
+++ b/posthog/admin/admins/plugin_config_admin.py
@@ -0,0 +1,27 @@
+from django.contrib import admin
+from django.utils.html import format_html
+from posthog.admin.inlines.plugin_attachment_inline import PluginAttachmentInline
+
+from posthog.models import PluginConfig
+
+
+class PluginConfigAdmin(admin.ModelAdmin):
+ list_select_related = ("plugin", "team")
+ list_display = ("id", "plugin_name", "team_name", "enabled")
+ list_display_links = ("id", "plugin_name")
+ list_filter = (
+ ("enabled", admin.BooleanFieldListFilter),
+ ("deleted", admin.BooleanFieldListFilter),
+ ("updated_at", admin.DateFieldListFilter),
+ ("plugin", admin.RelatedOnlyFieldListFilter),
+ )
+ list_select_related = ("team", "plugin")
+ search_fields = ("team__name", "team__organization__name", "plugin__name")
+ ordering = ("-created_at",)
+ inlines = [PluginAttachmentInline]
+
+ def plugin_name(self, config: PluginConfig):
+ return format_html(f"{config.plugin.name} ({config.plugin_id})")
+
+ def team_name(self, config: PluginConfig):
+ return format_html(f"{config.team.name} ({config.team_id})")
diff --git a/posthog/admin/admins/survey_admin.py b/posthog/admin/admins/survey_admin.py
new file mode 100644
index 0000000000000..cefbf87ff3aa7
--- /dev/null
+++ b/posthog/admin/admins/survey_admin.py
@@ -0,0 +1,26 @@
+from django.contrib import admin
+from django.utils.html import format_html
+
+from posthog.models import Experiment
+
+
+class SurveyAdmin(admin.ModelAdmin):
+ list_display = (
+ "id",
+ "name",
+ "team_link",
+ "created_at",
+ "created_by",
+ )
+ list_display_links = ("id", "name")
+ list_select_related = ("team", "team__organization")
+ search_fields = ("id", "name", "team__name", "team__organization__name")
+ autocomplete_fields = ("team", "created_by")
+ ordering = ("-created_at",)
+
+ def team_link(self, experiment: Experiment):
+ return format_html(
+ '{}',
+ experiment.team.pk,
+ experiment.team.name,
+ )
diff --git a/posthog/admin/admins/team_admin.py b/posthog/admin/admins/team_admin.py
new file mode 100644
index 0000000000000..6cb0e1dfe4491
--- /dev/null
+++ b/posthog/admin/admins/team_admin.py
@@ -0,0 +1,103 @@
+from django.contrib import admin
+from django.utils.html import format_html
+from posthog.admin.inlines.action_inline import ActionInline
+from posthog.admin.inlines.group_type_mapping_inline import GroupTypeMappingInline
+
+from posthog.models import Team
+
+
+class TeamAdmin(admin.ModelAdmin):
+ list_display = (
+ "id",
+ "name",
+ "organization_link",
+ "organization_id",
+ "created_at",
+ "updated_at",
+ )
+ list_display_links = ("id", "name")
+ list_select_related = ("organization",)
+ search_fields = (
+ "id",
+ "name",
+ "organization__id",
+ "organization__name",
+ "api_token",
+ )
+ readonly_fields = ["organization", "primary_dashboard", "test_account_filters"]
+ inlines = [GroupTypeMappingInline, ActionInline]
+ fieldsets = [
+ (
+ None,
+ {
+ "fields": [
+ "name",
+ "organization",
+ ],
+ },
+ ),
+ (
+ "General",
+ {
+ "classes": ["collapse"],
+ "fields": [
+ "api_token",
+ "timezone",
+ "slack_incoming_webhook",
+ "primary_dashboard",
+ ],
+ },
+ ),
+ (
+ "Onboarding",
+ {
+ "classes": ["collapse"],
+ "fields": [
+ "is_demo",
+ "completed_snippet_onboarding",
+ "ingested_event",
+ "signup_token",
+ ],
+ },
+ ),
+ (
+ "Settings",
+ {
+ "classes": ["collapse"],
+ "fields": [
+ "anonymize_ips",
+ "autocapture_opt_out",
+ "autocapture_exceptions_opt_in",
+ "session_recording_opt_in",
+ "capture_console_log_opt_in",
+ "capture_performance_opt_in",
+ "session_recording_sample_rate",
+ "session_recording_minimum_duration_milliseconds",
+ "session_recording_linked_flag",
+ "data_attributes",
+ "session_recording_version",
+ "access_control",
+ "inject_web_apps",
+ "extra_settings",
+ ],
+ },
+ ),
+ (
+ "Filters",
+ {
+ "classes": ["collapse"],
+ "fields": [
+ "test_account_filters",
+ "test_account_filters_default_checked",
+ "path_cleaning_filters",
+ ],
+ },
+ ),
+ ]
+
+ def organization_link(self, team: Team):
+ return format_html(
+ '{}',
+ team.organization.pk,
+ team.organization.name,
+ )
diff --git a/posthog/admin/admins/text_admin.py b/posthog/admin/admins/text_admin.py
new file mode 100644
index 0000000000000..c4198d079ca97
--- /dev/null
+++ b/posthog/admin/admins/text_admin.py
@@ -0,0 +1,6 @@
+from django.contrib import admin
+
+
+class TextAdmin(admin.ModelAdmin):
+ autocomplete_fields = ("created_by", "last_modified_by", "team")
+ search_fields = ("id", "body", "team__name", "team__organization__name")
diff --git a/posthog/admin/admins/user_admin.py b/posthog/admin/admins/user_admin.py
new file mode 100644
index 0000000000000..c1129ef334fa4
--- /dev/null
+++ b/posthog/admin/admins/user_admin.py
@@ -0,0 +1,91 @@
+from django.utils.html import format_html
+
+from django.contrib.auth.admin import UserAdmin as DjangoUserAdmin
+from django.contrib.auth.forms import UserChangeForm as DjangoUserChangeForm
+from django.contrib.auth.tokens import default_token_generator
+from django.utils.translation import gettext_lazy as _
+
+from posthog.admin.inlines.organization_member_inline import OrganizationMemberInline
+from posthog.admin.inlines.totp_device_inline import TOTPDeviceInline
+from posthog.models import User
+
+
+class UserChangeForm(DjangoUserChangeForm):
+ def __init__(self, *args, **kwargs):
+ super().__init__(*args, **kwargs)
+ # This is a riff on https://github.com/django/django/blob/stable/4.1.x/django/contrib/auth/forms.py#L151-L153.
+ # The difference from the Django default is that instead of a form where the _admin_ sets the new password,
+ # we have a link to the password reset page which the _user_ can use themselves.
+ # This way if some user needs to reset their password and there's a problem with receiving the reset link email,
+ # an admin can provide that reset link manually – much better than sending a new password in plain text.
+ password_reset_token = default_token_generator.make_token(self.instance)
+ self.fields["password"].help_text = (
+ "Raw passwords are not stored, so there is no way to see this user’s password, but you can send them "
+ f'this password reset link '
+ "(it only works when logged out)."
+ )
+
+
+class UserAdmin(DjangoUserAdmin):
+ """Define admin model for custom User model with no email field."""
+
+ form = UserChangeForm
+ change_password_form = None # This view is not exposed in our subclass of UserChangeForm
+ change_form_template = "loginas/change_form.html"
+
+ inlines = [OrganizationMemberInline, TOTPDeviceInline]
+ fieldsets = (
+ (
+ None,
+ {
+ "fields": (
+ "email",
+ "password",
+ "current_organization",
+ "is_email_verified",
+ "pending_email",
+ "strapi_id",
+ )
+ },
+ ),
+ (_("Personal info"), {"fields": ("first_name", "last_name")}),
+ (_("Permissions"), {"fields": ("is_active", "is_staff")}),
+ (_("Important dates"), {"fields": ("last_login", "date_joined")}),
+ (_("Toolbar authentication"), {"fields": ("temporary_token",)}),
+ )
+ add_fieldsets = ((None, {"classes": ("wide",), "fields": ("email", "password1", "password2")}),)
+ list_display = (
+ "id",
+ "email",
+ "first_name",
+ "last_name",
+ "current_team_link",
+ "current_organization_link",
+ "is_staff",
+ )
+ list_display_links = ("id", "email")
+ list_filter = ("is_staff", "is_active", "groups")
+ list_select_related = ("current_team", "current_organization")
+ search_fields = ("email", "first_name", "last_name")
+ readonly_fields = ["current_team", "current_organization"]
+ ordering = ("email",)
+
+ def current_team_link(self, user: User):
+ if not user.team:
+ return "–"
+
+ return format_html(
+ '{}',
+ user.team.pk,
+ user.team.name,
+ )
+
+ def current_organization_link(self, user: User):
+ if not user.organization:
+ return "–"
+
+ return format_html(
+ '{}',
+ user.organization.pk,
+ user.organization.name,
+ )
diff --git a/posthog/admin/inlines/action_inline.py b/posthog/admin/inlines/action_inline.py
new file mode 100644
index 0000000000000..47b8b9b7600dd
--- /dev/null
+++ b/posthog/admin/inlines/action_inline.py
@@ -0,0 +1,10 @@
+from django.contrib import admin
+
+from posthog.models import Action
+
+
+class ActionInline(admin.TabularInline):
+ extra = 0
+ model = Action
+ classes = ("collapse",)
+ autocomplete_fields = ("created_by",)
diff --git a/posthog/admin/inlines/group_type_mapping_inline.py b/posthog/admin/inlines/group_type_mapping_inline.py
new file mode 100644
index 0000000000000..00bd0a616775d
--- /dev/null
+++ b/posthog/admin/inlines/group_type_mapping_inline.py
@@ -0,0 +1,13 @@
+from django.contrib import admin
+
+from posthog.models import GroupTypeMapping
+
+
+class GroupTypeMappingInline(admin.TabularInline):
+ extra = 0
+ model = GroupTypeMapping
+ fields = ("group_type_index", "group_type", "name_singular", "name_plural")
+ readonly_fields = fields
+ classes = ("collapse",)
+ max_num = 5
+ min_num = 5
diff --git a/posthog/admin/inlines/organization_member_inline.py b/posthog/admin/inlines/organization_member_inline.py
new file mode 100644
index 0000000000000..e68636feb4c76
--- /dev/null
+++ b/posthog/admin/inlines/organization_member_inline.py
@@ -0,0 +1,10 @@
+from django.contrib import admin
+
+from posthog.models.organization import OrganizationMembership
+
+
+class OrganizationMemberInline(admin.TabularInline):
+ extra = 0
+ model = OrganizationMembership
+ readonly_fields = ("user", "joined_at", "updated_at")
+ autocomplete_fields = ("user", "organization")
diff --git a/posthog/admin/inlines/organization_team_inline.py b/posthog/admin/inlines/organization_team_inline.py
new file mode 100644
index 0000000000000..18ce52c4e7dee
--- /dev/null
+++ b/posthog/admin/inlines/organization_team_inline.py
@@ -0,0 +1,43 @@
+from django.contrib import admin
+from django.utils.html import format_html
+
+from posthog.models.team.team import Team
+
+
+class OrganizationTeamInline(admin.TabularInline):
+ extra = 0
+ model = Team
+
+ fields = (
+ "id",
+ "displayed_name",
+ "api_token",
+ "app_urls",
+ "name",
+ "created_at",
+ "updated_at",
+ "anonymize_ips",
+ "completed_snippet_onboarding",
+ "ingested_event",
+ "session_recording_opt_in",
+ "autocapture_opt_out",
+ "signup_token",
+ "is_demo",
+ "access_control",
+ "test_account_filters",
+ "path_cleaning_filters",
+ "timezone",
+ "data_attributes",
+ "correlation_config",
+ "plugins_opt_in",
+ "opt_out_capture",
+ )
+ readonly_fields = ("id", "displayed_name", "created_at", "updated_at")
+
+ def displayed_name(self, team: Team):
+ return format_html(
+ '{}. {}',
+ team.pk,
+ team.pk,
+ team.name,
+ )
diff --git a/posthog/admin/inlines/plugin_attachment_inline.py b/posthog/admin/inlines/plugin_attachment_inline.py
new file mode 100644
index 0000000000000..5f568c67affaf
--- /dev/null
+++ b/posthog/admin/inlines/plugin_attachment_inline.py
@@ -0,0 +1,44 @@
+import json
+
+from django.contrib import admin
+from django.utils.html import format_html
+
+from posthog.models import PluginAttachment
+
+ATTACHMENT_PREVIEW_SIZE_LIMIT_BYTES = 1024 * 1024
+
+
+class PluginAttachmentInline(admin.StackedInline):
+ extra = 0
+ model = PluginAttachment
+ fields = ("key", "content_type", "file_size", "raw_contents", "json_contents")
+ readonly_fields = fields
+
+ def raw_contents(self, attachment: PluginAttachment):
+ try:
+ if attachment.file_size > ATTACHMENT_PREVIEW_SIZE_LIMIT_BYTES:
+ raise ValueError(
+ f"file size {attachment.file_size} is larger than {ATTACHMENT_PREVIEW_SIZE_LIMIT_BYTES} bytes"
+ )
+ return attachment.contents.tobytes()
+ except Exception as err:
+ return format_html(f"cannot preview: {err}")
+
+ def json_contents(self, attachment: PluginAttachment):
+ try:
+ if attachment.file_size > ATTACHMENT_PREVIEW_SIZE_LIMIT_BYTES:
+ raise ValueError(
+ f"file size {attachment.file_size} is larger than {ATTACHMENT_PREVIEW_SIZE_LIMIT_BYTES} bytes"
+ )
+ return json.loads(attachment.contents.tobytes())
+ except Exception as err:
+ return format_html(f"cannot preview: {err}")
+
+ def has_add_permission(self, request, obj):
+ return False
+
+ def has_change_permission(self, request, obj):
+ return False
+
+ def has_delete_permission(self, request, obj):
+ return False
diff --git a/posthog/admin/inlines/totp_device_inline.py b/posthog/admin/inlines/totp_device_inline.py
new file mode 100644
index 0000000000000..eef506f601611
--- /dev/null
+++ b/posthog/admin/inlines/totp_device_inline.py
@@ -0,0 +1,7 @@
+from django.contrib import admin
+from django_otp.plugins.otp_totp.models import TOTPDevice
+
+
+class TOTPDeviceInline(admin.TabularInline):
+ model = TOTPDevice
+ extra = 0
diff --git a/posthog/admin/paginators/no_count_paginator.py b/posthog/admin/paginators/no_count_paginator.py
new file mode 100644
index 0000000000000..3bfea7a78e5e1
--- /dev/null
+++ b/posthog/admin/paginators/no_count_paginator.py
@@ -0,0 +1,7 @@
+from django.core.paginator import Paginator
+
+
+class NoCountPaginator(Paginator):
+ @property
+ def count(self):
+ return 999999
diff --git a/posthog/api/authentication.py b/posthog/api/authentication.py
index 47e0e720cc68d..d06e7168d0df2 100644
--- a/posthog/api/authentication.py
+++ b/posthog/api/authentication.py
@@ -23,6 +23,7 @@
from rest_framework.request import Request
from rest_framework.response import Response
from rest_framework.throttling import UserRateThrottle
+from sentry_sdk import capture_exception
from social_django.views import auth
from two_factor.utils import default_device
from two_factor.views.core import REMEMBER_COOKIE_PREFIX
@@ -276,7 +277,7 @@ def create(self, validated_data):
user = None
if user:
- user.requested_password_reset_at = datetime.datetime.now()
+ user.requested_password_reset_at = datetime.datetime.now(datetime.timezone.utc)
user.save()
token = password_reset_token_generator.make_token(user)
send_password_reset(user.id, token)
@@ -296,12 +297,20 @@ def create(self, validated_data):
try:
user = User.objects.filter(is_active=True).get(uuid=self.context["view"].kwargs["user_uuid"])
except User.DoesNotExist:
+ capture_exception(
+ Exception("User not found in password reset serializer"),
+ {"user_uuid": self.context["view"].kwargs["user_uuid"]},
+ )
raise serializers.ValidationError(
{"token": ["This reset token is invalid or has expired."]},
code="invalid_token",
)
if not password_reset_token_generator.check_token(user, validated_data["token"]):
+ capture_exception(
+ Exception("Invalid password reset token in serializer"),
+ {"user_uuid": user.uuid, "token": validated_data["token"]},
+ )
raise serializers.ValidationError(
{"token": ["This reset token is invalid or has expired."]},
code="invalid_token",
@@ -353,9 +362,18 @@ def get_object(self):
try:
user = User.objects.filter(is_active=True).get(uuid=user_uuid)
except User.DoesNotExist:
- user = None
+ capture_exception(
+ Exception("User not found in password reset viewset"), {"user_uuid": user_uuid, "token": token}
+ )
+ raise serializers.ValidationError(
+ {"token": ["This reset token is invalid or has expired."]},
+ code="invalid_token",
+ )
- if not user or not password_reset_token_generator.check_token(user, token):
+ if not password_reset_token_generator.check_token(user, token):
+ capture_exception(
+ Exception("Invalid password reset token in viewset"), {"user_uuid": user_uuid, "token": token}
+ )
raise serializers.ValidationError(
{"token": ["This reset token is invalid or has expired."]},
code="invalid_token",
diff --git a/posthog/api/organization.py b/posthog/api/organization.py
index 2e5a1e2466267..ea1a9f31615b1 100644
--- a/posthog/api/organization.py
+++ b/posthog/api/organization.py
@@ -3,6 +3,7 @@
from django.db.models import Model, QuerySet
from django.shortcuts import get_object_or_404
+from django.views import View
from rest_framework import exceptions, permissions, serializers, viewsets
from rest_framework.request import Request
@@ -48,11 +49,11 @@ def has_permission(self, request: Request, view) -> bool:
class OrganizationPermissionsWithDelete(OrganizationAdminWritePermissions):
- def has_object_permission(self, request: Request, view, object: Model) -> bool:
+ def has_object_permission(self, request: Request, view: View, object: Model) -> bool:
if request.method in permissions.SAFE_METHODS:
return True
# TODO: Optimize so that this computation is only done once, on `OrganizationMemberPermissions`
- organization = extract_organization(object)
+ organization = extract_organization(object, view)
min_level = (
OrganizationMembership.Level.OWNER if request.method == "DELETE" else OrganizationMembership.Level.ADMIN
)
diff --git a/posthog/api/organization_invite.py b/posthog/api/organization_invite.py
index 72345f6960759..cebf7e1f61e2d 100644
--- a/posthog/api/organization_invite.py
+++ b/posthog/api/organization_invite.py
@@ -83,7 +83,7 @@ class OrganizationInviteViewSet(
mixins.ListModelMixin,
viewsets.GenericViewSet,
):
- scope_object = "INTERNAL"
+ scope_object = "organization_member"
serializer_class = OrganizationInviteSerializer
queryset = OrganizationInvite.objects.all()
lookup_field = "id"
@@ -96,7 +96,7 @@ def get_queryset(self):
.order_by(self.ordering)
)
- @action(methods=["POST"], detail=False)
+ @action(methods=["POST"], detail=False, required_scopes=["organization_member:write"])
def bulk(self, request: request.Request, **kwargs) -> response.Response:
data = cast(Any, request.data)
if not isinstance(data, list):
diff --git a/posthog/api/organization_member.py b/posthog/api/organization_member.py
index ee6813a1dc48f..d6eeee3e95ac5 100644
--- a/posthog/api/organization_member.py
+++ b/posthog/api/organization_member.py
@@ -2,6 +2,7 @@
from django.db.models import Model, Prefetch, QuerySet
from django.shortcuts import get_object_or_404
+from django.views import View
from django_otp.plugins.otp_totp.models import TOTPDevice
from rest_framework import exceptions, mixins, serializers, viewsets
from rest_framework.permissions import SAFE_METHODS, BasePermission
@@ -22,10 +23,10 @@ class OrganizationMemberObjectPermissions(BasePermission):
message = "Your cannot edit other organization members."
- def has_object_permission(self, request: Request, view, membership: OrganizationMembership) -> bool:
+ def has_object_permission(self, request: Request, view: View, membership: OrganizationMembership) -> bool:
if request.method in SAFE_METHODS:
return True
- organization = extract_organization(membership)
+ organization = extract_organization(membership, view)
requesting_membership: OrganizationMembership = OrganizationMembership.objects.get(
user_id=cast(User, request.user).id,
organization=organization,
diff --git a/posthog/api/property_definition.py b/posthog/api/property_definition.py
index 6b37519ac1960..9ffa473189243 100644
--- a/posthog/api/property_definition.py
+++ b/posthog/api/property_definition.py
@@ -4,14 +4,8 @@
from django.db import connection
from django.db.models import Prefetch
-from rest_framework import (
- mixins,
- serializers,
- viewsets,
- status,
- request,
- response,
-)
+from loginas.utils import is_impersonated_session
+from rest_framework import mixins, request, response, serializers, status, viewsets
from rest_framework.decorators import action
from rest_framework.exceptions import ValidationError
from rest_framework.pagination import LimitOffsetPagination
@@ -23,10 +17,9 @@
from posthog.event_usage import report_user_action
from posthog.exceptions import EnterpriseFeatureException
from posthog.filters import TermSearchFilterBackend, term_search_filter_sql
-from posthog.models import PropertyDefinition, TaggedItem, User, EventProperty
-from posthog.models.activity_logging.activity_log import log_activity, Detail
+from posthog.models import EventProperty, PropertyDefinition, TaggedItem, User
+from posthog.models.activity_logging.activity_log import Detail, log_activity
from posthog.models.utils import UUIDT
-from loginas.utils import is_impersonated_session
class SeenTogetherQuerySerializer(serializers.Serializer):
@@ -245,9 +238,11 @@ def with_excluded_properties(self, excluded_properties: Optional[str], type: str
)
return dataclasses.replace(
self,
- excluded_properties_filter=f"AND {self.property_definition_table}.name NOT IN %(excluded_properties)s"
- if len(excluded_list) > 0
- else "",
+ excluded_properties_filter=(
+ f"AND {self.property_definition_table}.name NOT IN %(excluded_properties)s"
+ if len(excluded_list) > 0
+ else ""
+ ),
params={
**self.params,
"excluded_properties": excluded_list,
@@ -580,7 +575,7 @@ def get_object(self):
def list(self, request, *args, **kwargs):
return super().list(request, *args, **kwargs)
- @action(methods=["GET"], detail=False)
+ @action(methods=["GET"], detail=False, required_scopes=["property_definition:read"])
def seen_together(self, request: request.Request, *args: Any, **kwargs: Any) -> response.Response:
"""
Allows a caller to provide a list of event names and a single property name
diff --git a/posthog/api/services/query.py b/posthog/api/services/query.py
index f3f5dcf2be497..75d326afead3a 100644
--- a/posthog/api/services/query.py
+++ b/posthog/api/services/query.py
@@ -16,6 +16,7 @@
from posthog.queries.time_to_see_data.serializers import SessionEventsQuerySerializer, SessionsQuerySerializer
from posthog.queries.time_to_see_data.sessions import get_session_events, get_sessions
from posthog.schema import (
+ FunnelCorrelationQuery,
FunnelsQuery,
HogQLAutocomplete,
HogQLMetadata,
@@ -47,6 +48,7 @@
| PathsQuery
| StickinessQuery
| LifecycleQuery
+ | FunnelCorrelationQuery
| WebOverviewQuery
| WebTopClicksQuery
| WebStatsTableQuery
diff --git a/posthog/api/team.py b/posthog/api/team.py
index 15ab9bae71a0d..1b615bd692643 100644
--- a/posthog/api/team.py
+++ b/posthog/api/team.py
@@ -63,24 +63,19 @@ def has_permission(self, request: request.Request, view) -> bool:
except ValueError:
return False
- # if we're not requesting to make a demo project
- # and if the org already has more than 1 non-demo project (need to be able to make the initial project)
- # and the org isn't allowed to make multiple projects
- if (
- ("is_demo" not in request.data or not request.data["is_demo"])
- and organization.teams.exclude(is_demo=True).count() >= 1
- and not organization.is_feature_available(AvailableFeature.ORGANIZATIONS_PROJECTS)
- ):
- return False
-
- # if we ARE requesting to make a demo project
- # but the org already has a demo project
- if (
- "is_demo" in request.data
- and request.data["is_demo"]
- and organization.teams.exclude(is_demo=False).count() > 0
- ):
- return False
+ if not request.data.get("is_demo"):
+ # if we're not requesting to make a demo project
+ # and if the org already has more than 1 non-demo project (need to be able to make the initial project)
+ # and the org isn't allowed to make multiple projects
+ if organization.teams.exclude(is_demo=True).count() >= 1 and not organization.is_feature_available(
+ AvailableFeature.ORGANIZATIONS_PROJECTS
+ ):
+ return False
+ else:
+ # if we ARE requesting to make a demo project
+ # but the org already has a demo project
+ if organization.teams.filter(is_demo=True).count() > 0:
+ return False
# in any other case, we're good to go
return True
diff --git a/posthog/api/test/__snapshots__/test_action.ambr b/posthog/api/test/__snapshots__/test_action.ambr
index 74d16c63216a4..3b71cb1191f91 100644
--- a/posthog/api/test/__snapshots__/test_action.ambr
+++ b/posthog/api/test/__snapshots__/test_action.ambr
@@ -35,6 +35,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -196,6 +197,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -538,6 +540,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
diff --git a/posthog/api/test/__snapshots__/test_annotation.ambr b/posthog/api/test/__snapshots__/test_annotation.ambr
index 62159d2f01b27..b75f8a9661f7e 100644
--- a/posthog/api/test/__snapshots__/test_annotation.ambr
+++ b/posthog/api/test/__snapshots__/test_annotation.ambr
@@ -35,6 +35,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -118,6 +119,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -454,6 +456,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
diff --git a/posthog/api/test/__snapshots__/test_api_docs.ambr b/posthog/api/test/__snapshots__/test_api_docs.ambr
index c378bbb274781..728dabc13af0e 100644
--- a/posthog/api/test/__snapshots__/test_api_docs.ambr
+++ b/posthog/api/test/__snapshots__/test_api_docs.ambr
@@ -26,6 +26,7 @@
'/home/runner/work/posthog/posthog/posthog/api/dashboards/dashboard_templates.py: Warning [DashboardTemplateViewSet]: could not derive type of path parameter "project_id" because it is untyped and obtaining queryset from the viewset failed. Consider adding a type to the path (e.g. ) or annotating the parameter type with @extend_schema. Defaulting to "string".',
'/home/runner/work/posthog/posthog/posthog/api/dashboards/dashboard_templates.py: Warning [DashboardTemplateViewSet]: could not derive type of path parameter "id" because it is untyped and obtaining queryset from the viewset failed. Consider adding a type to the path (e.g. ) or annotating the parameter type with @extend_schema. Defaulting to "string".',
'/home/runner/work/posthog/posthog/posthog/api/dashboards/dashboard.py: Warning [DashboardsViewSet]: could not derive type of path parameter "project_id" because model "posthog.models.dashboard.Dashboard" contained no such field. Consider annotating parameter with @extend_schema. Defaulting to "string".',
+ '/home/runner/work/posthog/posthog/ee/api/dashboard_collaborator.py: Warning [DashboardCollaboratorViewSet]: could not derive type of path parameter "project_id" because model "ee.models.dashboard_privilege.DashboardPrivilege" contained no such field. Consider annotating parameter with @extend_schema. Defaulting to "string".',
'/home/runner/work/posthog/posthog/posthog/api/sharing.py: Warning [SharingConfigurationViewSet]: could not derive type of path parameter "project_id" because model "posthog.models.sharing_configuration.SharingConfiguration" contained no such field. Consider annotating parameter with @extend_schema. Defaulting to "string".',
'/home/runner/work/posthog/posthog/posthog/api/early_access_feature.py: Warning [EarlyAccessFeatureViewSet]: could not derive type of path parameter "project_id" because model "posthog.models.early_access_feature.EarlyAccessFeature" contained no such field. Consider annotating parameter with @extend_schema. Defaulting to "string".',
"/home/runner/work/posthog/posthog/posthog/api/event_definition.py: Error [EventDefinitionViewSet]: exception raised while getting serializer. Hint: Is get_serializer_class() returning None or is get_queryset() not working without a request? Ignoring the view for now. (Exception: 'AnonymousUser' object has no attribute 'organization')",
@@ -88,6 +89,8 @@
'/home/runner/work/posthog/posthog/posthog/api/survey.py: Warning [SurveyViewSet]: could not derive type of path parameter "project_id" because model "posthog.models.feedback.survey.Survey" contained no such field. Consider annotating parameter with @extend_schema. Defaulting to "string".',
'Warning: encountered multiple names for the same choice set (HrefMatchingEnum). This may be unwanted even though the generated schema is technically correct. Add an entry to ENUM_NAME_OVERRIDES to fix the naming.',
'Warning: enum naming encountered a non-optimally resolvable collision for fields named "type". The same name has been used for multiple choice sets in multiple components. The collision was resolved with "Type7baEnum". add an entry to ENUM_NAME_OVERRIDES to fix the naming.',
+ 'Warning: enum naming encountered a non-optimally resolvable collision for fields named "level". The same name has been used for multiple choice sets in multiple components. The collision was resolved with "LevelD7eEnum". add an entry to ENUM_NAME_OVERRIDES to fix the naming.',
+ 'Warning: encountered multiple names for the same choice set (RestrictionLevelEnum). This may be unwanted even though the generated schema is technically correct. Add an entry to ENUM_NAME_OVERRIDES to fix the naming.',
'Warning: encountered multiple names for the same choice set (EffectivePrivilegeLevelEnum). This may be unwanted even though the generated schema is technically correct. Add an entry to ENUM_NAME_OVERRIDES to fix the naming.',
'Warning: encountered multiple names for the same choice set (MembershipLevelEnum). This may be unwanted even though the generated schema is technically correct. Add an entry to ENUM_NAME_OVERRIDES to fix the naming.',
'Warning: operationId "list" has collisions [(\'/api/organizations/\', \'get\'), (\'/api/organizations/{organization_id}/projects/\', \'get\')]. resolving with numeral suffixes.',
diff --git a/posthog/api/test/__snapshots__/test_decide.ambr b/posthog/api/test/__snapshots__/test_decide.ambr
index 32ba4e38c4500..1bb6ffa074d20 100644
--- a/posthog/api/test/__snapshots__/test_decide.ambr
+++ b/posthog/api/test/__snapshots__/test_decide.ambr
@@ -35,6 +35,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -284,6 +285,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -445,6 +447,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -599,6 +602,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
diff --git a/posthog/api/test/__snapshots__/test_early_access_feature.ambr b/posthog/api/test/__snapshots__/test_early_access_feature.ambr
index 4db55d14a92a2..7a472bdce6edc 100644
--- a/posthog/api/test/__snapshots__/test_early_access_feature.ambr
+++ b/posthog/api/test/__snapshots__/test_early_access_feature.ambr
@@ -4,6 +4,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -140,6 +141,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
diff --git a/posthog/api/test/__snapshots__/test_element.ambr b/posthog/api/test/__snapshots__/test_element.ambr
index 0729c028fec5f..67722a01933c0 100644
--- a/posthog/api/test/__snapshots__/test_element.ambr
+++ b/posthog/api/test/__snapshots__/test_element.ambr
@@ -35,6 +35,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
diff --git a/posthog/api/test/__snapshots__/test_feature_flag.ambr b/posthog/api/test/__snapshots__/test_feature_flag.ambr
index 615477721a72f..2d11fc4500367 100644
--- a/posthog/api/test/__snapshots__/test_feature_flag.ambr
+++ b/posthog/api/test/__snapshots__/test_feature_flag.ambr
@@ -444,6 +444,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -645,6 +646,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -1006,6 +1008,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -1143,6 +1146,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -1437,6 +1441,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -1527,6 +1532,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -1616,6 +1622,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -1675,6 +1682,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
diff --git a/posthog/api/test/__snapshots__/test_insight.ambr b/posthog/api/test/__snapshots__/test_insight.ambr
index 687de0f873767..5bdf7b792790b 100644
--- a/posthog/api/test/__snapshots__/test_insight.ambr
+++ b/posthog/api/test/__snapshots__/test_insight.ambr
@@ -674,6 +674,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -726,6 +727,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -854,6 +856,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -1092,6 +1095,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -1240,6 +1244,7 @@
"posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -1373,6 +1378,7 @@
"posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -1485,6 +1491,7 @@
"posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -1614,6 +1621,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -1701,6 +1709,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -1787,6 +1796,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -1846,6 +1856,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
diff --git a/posthog/api/test/__snapshots__/test_organization_feature_flag.ambr b/posthog/api/test/__snapshots__/test_organization_feature_flag.ambr
index c16bf238a6045..38996ee675b74 100644
--- a/posthog/api/test/__snapshots__/test_organization_feature_flag.ambr
+++ b/posthog/api/test/__snapshots__/test_organization_feature_flag.ambr
@@ -84,6 +84,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -191,6 +192,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -278,6 +280,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -489,6 +492,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -617,6 +621,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -765,6 +770,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -852,6 +858,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -1069,6 +1076,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -1197,6 +1205,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -1249,6 +1258,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -1397,6 +1407,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -1671,6 +1682,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
diff --git a/posthog/api/test/__snapshots__/test_preflight.ambr b/posthog/api/test/__snapshots__/test_preflight.ambr
index b5686cfb79935..b4c8a06aad816 100644
--- a/posthog/api/test/__snapshots__/test_preflight.ambr
+++ b/posthog/api/test/__snapshots__/test_preflight.ambr
@@ -46,6 +46,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
diff --git a/posthog/api/test/__snapshots__/test_survey.ambr b/posthog/api/test/__snapshots__/test_survey.ambr
index 41c085776554e..97493427f7527 100644
--- a/posthog/api/test/__snapshots__/test_survey.ambr
+++ b/posthog/api/test/__snapshots__/test_survey.ambr
@@ -114,6 +114,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
diff --git a/posthog/api/test/dashboards/__snapshots__/test_dashboard.ambr b/posthog/api/test/dashboards/__snapshots__/test_dashboard.ambr
index e0dabc9899cbc..642602f396f8d 100644
--- a/posthog/api/test/dashboards/__snapshots__/test_dashboard.ambr
+++ b/posthog/api/test/dashboards/__snapshots__/test_dashboard.ambr
@@ -35,6 +35,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -164,6 +165,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -290,6 +292,7 @@
"posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -509,6 +512,7 @@
"posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -681,6 +685,7 @@
"posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -863,6 +868,7 @@
"posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -1036,6 +1042,7 @@
"posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -1306,6 +1313,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -1365,6 +1373,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -1526,6 +1535,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -1638,6 +1648,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -1697,6 +1708,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -1854,6 +1866,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -1982,6 +1995,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -2246,6 +2260,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -2490,6 +2505,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -2624,6 +2640,7 @@
"posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -2750,6 +2767,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -2873,6 +2891,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -2974,6 +2993,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -3126,6 +3146,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -3224,6 +3245,7 @@
"posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -3353,6 +3375,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -3478,6 +3501,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -3614,6 +3638,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -3940,6 +3965,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -4102,6 +4128,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -4242,6 +4269,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -4329,6 +4357,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -4492,6 +4521,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -4551,6 +4581,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -4676,6 +4707,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -4830,6 +4862,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -5261,6 +5294,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -5408,6 +5442,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -5495,6 +5530,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -5620,6 +5656,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -5706,6 +5743,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -5765,6 +5803,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -5890,6 +5929,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -6032,6 +6072,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -6196,6 +6237,7 @@
"posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -6613,6 +6655,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -6770,6 +6813,7 @@
"posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -6957,6 +7001,7 @@
"posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -7129,6 +7174,7 @@
"posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -7272,6 +7318,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -7363,6 +7410,7 @@
"posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -7536,6 +7584,7 @@
"posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -8182,6 +8231,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -8445,6 +8495,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -8608,6 +8659,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -8667,6 +8719,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -8792,6 +8845,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -8946,6 +9000,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -9071,6 +9126,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -9208,6 +9264,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -9350,6 +9407,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -9663,6 +9721,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -9821,6 +9880,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -9929,6 +9989,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -10019,6 +10080,181 @@
'''
# ---
# name: TestDashboard.test_retrieve_dashboard.10
+ '''
+ SELECT "posthog_dashboardtile"."id",
+ "posthog_dashboardtile"."dashboard_id",
+ "posthog_dashboardtile"."insight_id",
+ "posthog_dashboardtile"."text_id",
+ "posthog_dashboardtile"."layouts",
+ "posthog_dashboardtile"."color",
+ "posthog_dashboardtile"."filters_hash",
+ "posthog_dashboardtile"."last_refresh",
+ "posthog_dashboardtile"."refreshing",
+ "posthog_dashboardtile"."refresh_attempt",
+ "posthog_dashboardtile"."deleted",
+ "posthog_dashboarditem"."id",
+ "posthog_dashboarditem"."name",
+ "posthog_dashboarditem"."derived_name",
+ "posthog_dashboarditem"."description",
+ "posthog_dashboarditem"."team_id",
+ "posthog_dashboarditem"."filters",
+ "posthog_dashboarditem"."filters_hash",
+ "posthog_dashboarditem"."query",
+ "posthog_dashboarditem"."order",
+ "posthog_dashboarditem"."deleted",
+ "posthog_dashboarditem"."saved",
+ "posthog_dashboarditem"."created_at",
+ "posthog_dashboarditem"."last_refresh",
+ "posthog_dashboarditem"."refreshing",
+ "posthog_dashboarditem"."created_by_id",
+ "posthog_dashboarditem"."is_sample",
+ "posthog_dashboarditem"."short_id",
+ "posthog_dashboarditem"."favorited",
+ "posthog_dashboarditem"."refresh_attempt",
+ "posthog_dashboarditem"."last_modified_at",
+ "posthog_dashboarditem"."last_modified_by_id",
+ "posthog_dashboarditem"."dashboard_id",
+ "posthog_dashboarditem"."layouts",
+ "posthog_dashboarditem"."color",
+ "posthog_dashboarditem"."dive_dashboard_id",
+ "posthog_dashboarditem"."updated_at",
+ "posthog_dashboarditem"."deprecated_tags",
+ "posthog_dashboarditem"."tags",
+ "posthog_team"."id",
+ "posthog_team"."uuid",
+ "posthog_team"."organization_id",
+ "posthog_team"."project_id",
+ "posthog_team"."api_token",
+ "posthog_team"."app_urls",
+ "posthog_team"."name",
+ "posthog_team"."slack_incoming_webhook",
+ "posthog_team"."created_at",
+ "posthog_team"."updated_at",
+ "posthog_team"."anonymize_ips",
+ "posthog_team"."completed_snippet_onboarding",
+ "posthog_team"."has_completed_onboarding_for",
+ "posthog_team"."ingested_event",
+ "posthog_team"."autocapture_opt_out",
+ "posthog_team"."autocapture_exceptions_opt_in",
+ "posthog_team"."autocapture_exceptions_errors_to_ignore",
+ "posthog_team"."session_recording_opt_in",
+ "posthog_team"."session_recording_sample_rate",
+ "posthog_team"."session_recording_minimum_duration_milliseconds",
+ "posthog_team"."session_recording_linked_flag",
+ "posthog_team"."session_recording_network_payload_capture_config",
+ "posthog_team"."session_replay_config",
+ "posthog_team"."capture_console_log_opt_in",
+ "posthog_team"."capture_performance_opt_in",
+ "posthog_team"."surveys_opt_in",
+ "posthog_team"."session_recording_version",
+ "posthog_team"."signup_token",
+ "posthog_team"."is_demo",
+ "posthog_team"."access_control",
+ "posthog_team"."week_start_day",
+ "posthog_team"."inject_web_apps",
+ "posthog_team"."test_account_filters",
+ "posthog_team"."test_account_filters_default_checked",
+ "posthog_team"."path_cleaning_filters",
+ "posthog_team"."timezone",
+ "posthog_team"."data_attributes",
+ "posthog_team"."person_display_name_properties",
+ "posthog_team"."live_events_columns",
+ "posthog_team"."recording_domains",
+ "posthog_team"."primary_dashboard_id",
+ "posthog_team"."extra_settings",
+ "posthog_team"."correlation_config",
+ "posthog_team"."session_recording_retention_period_days",
+ "posthog_team"."plugins_opt_in",
+ "posthog_team"."opt_out_capture",
+ "posthog_team"."event_names",
+ "posthog_team"."event_names_with_usage",
+ "posthog_team"."event_properties",
+ "posthog_team"."event_properties_with_usage",
+ "posthog_team"."event_properties_numerical",
+ "posthog_team"."external_data_workspace_id",
+ "posthog_team"."external_data_workspace_last_synced_at",
+ "posthog_user"."id",
+ "posthog_user"."password",
+ "posthog_user"."last_login",
+ "posthog_user"."first_name",
+ "posthog_user"."last_name",
+ "posthog_user"."is_staff",
+ "posthog_user"."is_active",
+ "posthog_user"."date_joined",
+ "posthog_user"."uuid",
+ "posthog_user"."current_organization_id",
+ "posthog_user"."current_team_id",
+ "posthog_user"."email",
+ "posthog_user"."pending_email",
+ "posthog_user"."temporary_token",
+ "posthog_user"."distinct_id",
+ "posthog_user"."is_email_verified",
+ "posthog_user"."requested_password_reset_at",
+ "posthog_user"."has_seen_product_intro_for",
+ "posthog_user"."strapi_id",
+ "posthog_user"."email_opt_in",
+ "posthog_user"."theme_mode",
+ "posthog_user"."partial_notification_settings",
+ "posthog_user"."anonymize_data",
+ "posthog_user"."toolbar_mode",
+ "posthog_user"."events_column_config",
+ T6."id",
+ T6."password",
+ T6."last_login",
+ T6."first_name",
+ T6."last_name",
+ T6."is_staff",
+ T6."is_active",
+ T6."date_joined",
+ T6."uuid",
+ T6."current_organization_id",
+ T6."current_team_id",
+ T6."email",
+ T6."pending_email",
+ T6."temporary_token",
+ T6."distinct_id",
+ T6."is_email_verified",
+ T6."requested_password_reset_at",
+ T6."has_seen_product_intro_for",
+ T6."strapi_id",
+ T6."email_opt_in",
+ T6."theme_mode",
+ T6."partial_notification_settings",
+ T6."anonymize_data",
+ T6."toolbar_mode",
+ T6."events_column_config",
+ "posthog_text"."id",
+ "posthog_text"."body",
+ "posthog_text"."created_by_id",
+ "posthog_text"."last_modified_at",
+ "posthog_text"."last_modified_by_id",
+ "posthog_text"."team_id"
+ FROM "posthog_dashboardtile"
+ INNER JOIN "posthog_dashboard" ON ("posthog_dashboardtile"."dashboard_id" = "posthog_dashboard"."id")
+ LEFT OUTER JOIN "posthog_dashboarditem" ON ("posthog_dashboardtile"."insight_id" = "posthog_dashboarditem"."id")
+ LEFT OUTER JOIN "posthog_team" ON ("posthog_dashboarditem"."team_id" = "posthog_team"."id")
+ LEFT OUTER JOIN "posthog_user" ON ("posthog_dashboarditem"."created_by_id" = "posthog_user"."id")
+ LEFT OUTER JOIN "posthog_user" T6 ON ("posthog_dashboarditem"."last_modified_by_id" = T6."id")
+ LEFT OUTER JOIN "posthog_text" ON ("posthog_dashboardtile"."text_id" = "posthog_text"."id")
+ WHERE (NOT ("posthog_dashboardtile"."deleted"
+ AND "posthog_dashboardtile"."deleted" IS NOT NULL)
+ AND NOT ("posthog_dashboard"."deleted")
+ AND NOT ("posthog_dashboard"."deleted"
+ AND "posthog_dashboardtile"."deleted"
+ AND "posthog_dashboardtile"."deleted" IS NOT NULL)
+ AND (NOT "posthog_dashboarditem"."deleted"
+ OR "posthog_dashboardtile"."insight_id" IS NULL)
+ AND "posthog_dashboardtile"."dashboard_id" = 2
+ AND "posthog_dashboardtile"."dashboard_id" = 2
+ AND NOT ("posthog_dashboard"."deleted"
+ AND "posthog_dashboardtile"."deleted"
+ AND "posthog_dashboardtile"."deleted" IS NOT NULL)
+ AND (NOT "posthog_dashboarditem"."deleted"
+ OR "posthog_dashboardtile"."insight_id" IS NULL))
+ ORDER BY "posthog_dashboarditem"."order" ASC /*controller='project_dashboards-detail',route='api/projects/%28%3FP%3Cparent_lookup_team_id%3E%5B%5E/.%5D%2B%29/dashboards/%28%3FP%3Cpk%3E%5B%5E/.%5D%2B%29/%3F%24'*/
+ '''
+# ---
+# name: TestDashboard.test_retrieve_dashboard.11
'''
SELECT "posthog_dashboardtile"."id",
"posthog_dashboardtile"."dashboard_id",
@@ -10197,6 +10433,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -10334,6 +10571,7 @@
"posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -10521,6 +10759,7 @@
"posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -10686,6 +10925,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -10791,6 +11031,7 @@
"posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -10959,6 +11200,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -11145,6 +11387,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -11257,6 +11500,7 @@
"posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -11425,6 +11669,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -11570,6 +11815,7 @@
"posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -11788,6 +12034,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
diff --git a/posthog/api/test/notebooks/__snapshots__/test_notebook.ambr b/posthog/api/test/notebooks/__snapshots__/test_notebook.ambr
index 7677f829e61c5..eaa59d4e760a8 100644
--- a/posthog/api/test/notebooks/__snapshots__/test_notebook.ambr
+++ b/posthog/api/test/notebooks/__snapshots__/test_notebook.ambr
@@ -35,6 +35,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -171,6 +172,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -394,6 +396,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -501,6 +504,7 @@
"posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
diff --git a/posthog/api/test/test_signup.py b/posthog/api/test/test_signup.py
index d4e71415b4569..e62be1ffd4893 100644
--- a/posthog/api/test/test_signup.py
+++ b/posthog/api/test/test_signup.py
@@ -78,7 +78,7 @@ def test_api_sign_up(self, mock_capture):
self.assertFalse(user.is_email_verified)
# Assert that the team was properly created
- self.assertEqual(team.name, "Default Project")
+ self.assertEqual(team.name, "Default project")
# Assert that the org was properly created
self.assertEqual(organization.name, "Hedgehogs United, LLC")
diff --git a/posthog/api/test/test_team.py b/posthog/api/test/test_team.py
index 6b336ee1f15c7..11bc5c664e1dd 100644
--- a/posthog/api/test/test_team.py
+++ b/posthog/api/test/test_team.py
@@ -87,7 +87,7 @@ def test_retrieve_project(self):
def test_cant_retrieve_project_from_another_org(self):
org = Organization.objects.create(name="New Org")
- team = Team.objects.create(organization=org, name="Default Project")
+ team = Team.objects.create(organization=org, name="Default project")
response = self.client.get(f"/api/projects/{team.pk}/")
self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
@@ -131,9 +131,9 @@ def test_cant_create_team_without_license_on_selfhosted(self):
def test_cant_create_a_second_project_without_license(self):
self.organization_membership.level = OrganizationMembership.Level.ADMIN
self.organization_membership.save()
- response = self.client.post("/api/projects/", {"name": "Hedgebox", "is_demo": False})
-
self.assertEqual(Team.objects.count(), 1)
+
+ response = self.client.post("/api/projects/", {"name": "Hedgebox", "is_demo": False})
self.assertEqual(response.status_code, 403)
response_data = response.json()
self.assertDictContainsSubset(
@@ -144,10 +144,10 @@ def test_cant_create_a_second_project_without_license(self):
},
response_data,
)
+ self.assertEqual(Team.objects.count(), 1)
# another request without the is_demo parameter
response = self.client.post("/api/projects/", {"name": "Hedgebox"})
- self.assertEqual(Team.objects.count(), 1)
self.assertEqual(response.status_code, 403)
response_data = response.json()
self.assertDictContainsSubset(
@@ -158,6 +158,7 @@ def test_cant_create_a_second_project_without_license(self):
},
response_data,
)
+ self.assertEqual(Team.objects.count(), 1)
@freeze_time("2022-02-08")
def test_update_project_timezone(self):
@@ -188,7 +189,48 @@ def test_update_project_timezone(self):
"type": "Team",
},
],
- "name": "Default Project",
+ "name": "Default project",
+ "short_id": None,
+ "trigger": None,
+ "type": None,
+ },
+ "item_id": str(self.team.pk),
+ "scope": "Team",
+ "user": {
+ "email": "user1@posthog.com",
+ "first_name": "",
+ },
+ },
+ ]
+ )
+
+ @freeze_time("2022-02-08")
+ def test_activity_log_tracks_extra_settings(self):
+ self._assert_activity_log_is_empty()
+
+ response = self.client.patch("/api/projects/@current/", {"extra_settings": {"poe_v2_enabled": True}})
+ self.assertEqual(response.status_code, status.HTTP_200_OK)
+
+ response_data = response.json()
+ self.assertEqual(response_data["name"], self.team.name)
+ self.assertEqual(response_data["extra_settings"], {"poe_v2_enabled": True})
+
+ self._assert_activity_log(
+ [
+ {
+ "activity": "updated",
+ "created_at": "2022-02-08T00:00:00Z",
+ "detail": {
+ "changes": [
+ {
+ "action": "created",
+ "after": {"poe_v2_enabled": True},
+ "before": None,
+ "field": "extra_settings",
+ "type": "Team",
+ },
+ ],
+ "name": "Default project",
"short_id": None,
"trigger": None,
"type": None,
@@ -231,7 +273,7 @@ def test_cannot_set_invalid_timezone_for_project(self):
def test_cant_update_project_from_another_org(self):
org = Organization.objects.create(name="New Org")
- team = Team.objects.create(organization=org, name="Default Project")
+ team = Team.objects.create(organization=org, name="Default project")
response = self.client.patch(f"/api/projects/{team.pk}/", {"timezone": "Africa/Accra"})
self.assertEqual(response.status_code, status.HTTP_404_NOT_FOUND)
@@ -282,7 +324,7 @@ def test_delete_team_activity_log(self, mock_capture: MagicMock, mock_delete_bul
"created_at": ANY,
"detail": {
"changes": None,
- "name": "Default Project",
+ "name": "Default project",
"short_id": None,
"trigger": None,
"type": None,
@@ -458,7 +500,7 @@ def test_reset_token(self):
"type": "Team",
},
],
- "name": "Default Project",
+ "name": "Default project",
"short_id": None,
"trigger": None,
"type": None,
@@ -492,7 +534,7 @@ def test_update_primary_dashboard(self):
self.assertEqual(response_data["primary_dashboard"], d.id)
def test_cant_set_primary_dashboard_to_another_teams_dashboard(self):
- team_2 = Team.objects.create(organization=self.organization, name="Default Project")
+ team_2 = Team.objects.create(organization=self.organization, name="Default project")
d = Dashboard.objects.create(name="Test", team=team_2)
response = self.client.patch("/api/projects/@current/", {"primary_dashboard": d.id})
@@ -565,7 +607,7 @@ def test_team_float_config_can_be_serialized_to_activity_log(self):
"type": "Team",
},
],
- "name": "Default Project",
+ "name": "Default project",
"short_id": None,
"trigger": None,
"type": None,
diff --git a/posthog/batch_exports/http.py b/posthog/batch_exports/http.py
index 5e84d7f446b3d..dd833f521c4f3 100644
--- a/posthog/batch_exports/http.py
+++ b/posthog/batch_exports/http.py
@@ -28,11 +28,9 @@
BatchExportSchema,
BatchExportServiceError,
BatchExportServiceRPCError,
- BatchExportServiceScheduleNotFound,
BatchExportWithNoEndNotAllowedError,
backfill_export,
- batch_export_delete_schedule,
- cancel_running_batch_export_backfill,
+ disable_and_delete_export,
pause_batch_export,
sync_batch_export,
unpause_batch_export,
@@ -43,7 +41,6 @@
from posthog.hogql.printer import prepare_ast_for_printing, print_prepared_ast
from posthog.models import (
BatchExport,
- BatchExportBackfill,
BatchExportDestination,
BatchExportRun,
Team,
@@ -436,23 +433,7 @@ def perform_destroy(self, instance: BatchExport):
since we are deleting, we assume that we can recover from this state by finishing the delete operation by calling
instance.save().
"""
- temporal = sync_connect()
-
- instance.deleted = True
-
- try:
- batch_export_delete_schedule(temporal, str(instance.pk))
- except BatchExportServiceScheduleNotFound as e:
- logger.warning(
- "The Schedule %s could not be deleted as it was not found",
- e.schedule_id,
- )
-
- instance.save()
-
- for backfill in BatchExportBackfill.objects.filter(batch_export=instance):
- if backfill.status == BatchExportBackfill.Status.RUNNING:
- cancel_running_batch_export_backfill(temporal, backfill.workflow_id)
+ disable_and_delete_export(instance)
class BatchExportOrganizationViewSet(BatchExportViewSet):
diff --git a/posthog/batch_exports/service.py b/posthog/batch_exports/service.py
index 4930665d13f6d..f3d5715220bf3 100644
--- a/posthog/batch_exports/service.py
+++ b/posthog/batch_exports/service.py
@@ -3,6 +3,7 @@
from dataclasses import asdict, dataclass, fields
from uuid import UUID
+import structlog
import temporalio
from asgiref.sync import async_to_sync
from temporalio.client import (
@@ -32,6 +33,8 @@
update_schedule,
)
+logger = structlog.get_logger(__name__)
+
class BatchExportField(typing.TypedDict):
"""A field to be queried from ClickHouse.
@@ -291,6 +294,27 @@ def unpause_batch_export(
backfill_export(temporal, batch_export_id, batch_export.team_id, start_at, end_at)
+def disable_and_delete_export(instance: BatchExport):
+ """Mark a BatchExport as deleted and delete its Temporal Schedule (including backfills)."""
+ temporal = sync_connect()
+
+ instance.deleted = True
+
+ try:
+ batch_export_delete_schedule(temporal, str(instance.pk))
+ except BatchExportServiceScheduleNotFound as e:
+ logger.warning(
+ "The Schedule %s could not be deleted as it was not found",
+ e.schedule_id,
+ )
+
+ instance.save()
+
+ for backfill in BatchExportBackfill.objects.filter(batch_export=instance):
+ if backfill.status == BatchExportBackfill.Status.RUNNING:
+ cancel_running_batch_export_backfill(temporal, backfill.workflow_id)
+
+
def batch_export_delete_schedule(temporal: Client, schedule_id: str) -> None:
"""Delete a Temporal Schedule."""
try:
diff --git a/posthog/clickhouse/migrations/0055_create_person_overrides_dictionary.py b/posthog/clickhouse/migrations/0055_create_person_overrides_dictionary.py
new file mode 100644
index 0000000000000..85ce30caf83f0
--- /dev/null
+++ b/posthog/clickhouse/migrations/0055_create_person_overrides_dictionary.py
@@ -0,0 +1,6 @@
+from posthog.clickhouse.client.migration_tools import run_sql_with_exceptions
+from posthog.models.person.sql import CREATE_PERSON_DISTINCT_ID_OVERRIDES_DICTIONARY
+
+operations = [
+ run_sql_with_exceptions(CREATE_PERSON_DISTINCT_ID_OVERRIDES_DICTIONARY),
+]
diff --git a/posthog/clickhouse/migrations/0056_session_replay_embeddings_input.py b/posthog/clickhouse/migrations/0056_session_replay_embeddings_input.py
new file mode 100644
index 0000000000000..fd59501ed9575
--- /dev/null
+++ b/posthog/clickhouse/migrations/0056_session_replay_embeddings_input.py
@@ -0,0 +1,12 @@
+from posthog.clickhouse.client.migration_tools import run_sql_with_exceptions
+from posthog.session_recordings.sql.session_replay_embeddings_migrations import (
+ DISTRIBUTED_TABLE_ALTER_SESSION_REPLAY_EMBEDDINGS_ADD_INPUT_COLUMN,
+ WRITEABLE_TABLE_ALTER_SESSION_REPLAY_EMBEDDINGS_ADD_INPUT_COLUMN,
+ SHARDED_TABLE_ALTER_SESSION_REPLAY_EMBEDDINGS_ADD_INPUT_COLUMN,
+)
+
+operations = [
+ run_sql_with_exceptions(DISTRIBUTED_TABLE_ALTER_SESSION_REPLAY_EMBEDDINGS_ADD_INPUT_COLUMN()),
+ run_sql_with_exceptions(WRITEABLE_TABLE_ALTER_SESSION_REPLAY_EMBEDDINGS_ADD_INPUT_COLUMN()),
+ run_sql_with_exceptions(SHARDED_TABLE_ALTER_SESSION_REPLAY_EMBEDDINGS_ADD_INPUT_COLUMN()),
+]
diff --git a/posthog/clickhouse/schema.py b/posthog/clickhouse/schema.py
index 7c310bea2a9c1..0c0b5656ba204 100644
--- a/posthog/clickhouse/schema.py
+++ b/posthog/clickhouse/schema.py
@@ -1,27 +1,52 @@
# This file contains all CREATE TABLE queries, used to sync and test schema
import re
-from posthog.clickhouse.dead_letter_queue import *
+from posthog.clickhouse.dead_letter_queue import (
+ DEAD_LETTER_QUEUE_TABLE_SQL,
+ DEAD_LETTER_QUEUE_TABLE_MV_SQL,
+ KAFKA_DEAD_LETTER_QUEUE_TABLE_SQL,
+)
from posthog.clickhouse.log_entries import (
- KAFKA_LOG_ENTRIES_TABLE_SQL,
- LOG_ENTRIES_TABLE_MV_SQL,
LOG_ENTRIES_TABLE_SQL,
+ LOG_ENTRIES_TABLE_MV_SQL,
+ KAFKA_LOG_ENTRIES_TABLE_SQL,
+)
+from posthog.clickhouse.plugin_log_entries import (
+ PLUGIN_LOG_ENTRIES_TABLE_SQL,
+ PLUGIN_LOG_ENTRIES_TABLE_MV_SQL,
+ KAFKA_PLUGIN_LOG_ENTRIES_TABLE_SQL,
+)
+from posthog.models.app_metrics.sql import (
+ APP_METRICS_DATA_TABLE_SQL,
+ APP_METRICS_MV_TABLE_SQL,
+ KAFKA_APP_METRICS_TABLE_SQL,
+ DISTRIBUTED_APP_METRICS_TABLE_SQL,
)
-from posthog.clickhouse.plugin_log_entries import *
-from posthog.models.app_metrics.sql import *
from posthog.models.channel_type.sql import (
CHANNEL_DEFINITION_TABLE_SQL,
CHANNEL_DEFINITION_DATA_SQL,
CHANNEL_DEFINITION_DICTIONARY_SQL,
)
-from posthog.models.cohort.sql import *
-from posthog.models.event.sql import *
-from posthog.models.group.sql import *
+from posthog.models.cohort.sql import (
+ CREATE_COHORTPEOPLE_TABLE_SQL,
+)
+from posthog.models.event.sql import (
+ EVENTS_TABLE_SQL,
+ EVENTS_TABLE_JSON_MV_SQL,
+ WRITABLE_EVENTS_TABLE_SQL,
+ KAFKA_EVENTS_TABLE_JSON_SQL,
+ DISTRIBUTED_EVENTS_TABLE_SQL,
+)
+from posthog.models.group.sql import (
+ GROUPS_TABLE_SQL,
+ GROUPS_TABLE_MV_SQL,
+ KAFKA_GROUPS_TABLE_SQL,
+)
from posthog.models.ingestion_warnings.sql import (
- DISTRIBUTED_INGESTION_WARNINGS_TABLE_SQL,
INGESTION_WARNINGS_DATA_TABLE_SQL,
INGESTION_WARNINGS_MV_TABLE_SQL,
KAFKA_INGESTION_WARNINGS_TABLE_SQL,
+ DISTRIBUTED_INGESTION_WARNINGS_TABLE_SQL,
)
from posthog.models.performance.sql import (
DISTRIBUTED_PERFORMANCE_EVENTS_TABLE_SQL,
@@ -30,26 +55,46 @@
PERFORMANCE_EVENTS_TABLE_SQL,
WRITABLE_PERFORMANCE_EVENTS_TABLE_SQL,
)
-from posthog.models.person.sql import *
+from posthog.models.person.sql import (
+ PERSON_STATIC_COHORT_TABLE_SQL,
+ PERSONS_TABLE_SQL,
+ PERSONS_TABLE_MV_SQL,
+ KAFKA_PERSONS_TABLE_SQL,
+ PERSONS_DISTINCT_ID_TABLE_SQL,
+ PERSONS_DISTINCT_ID_TABLE_MV_SQL,
+ KAFKA_PERSONS_DISTINCT_ID_TABLE_SQL,
+ PERSON_DISTINCT_ID2_TABLE_SQL,
+ PERSON_DISTINCT_ID2_MV_SQL,
+ KAFKA_PERSON_DISTINCT_ID2_TABLE_SQL,
+ PERSON_DISTINCT_ID_OVERRIDES_TABLE_SQL,
+ PERSON_DISTINCT_ID_OVERRIDES_MV_SQL,
+ KAFKA_PERSON_DISTINCT_ID_OVERRIDES_TABLE_SQL,
+)
from posthog.models.person_overrides.sql import (
- KAFKA_PERSON_OVERRIDES_TABLE_SQL,
+ PERSON_OVERRIDES_CREATE_TABLE_SQL,
PERSON_OVERRIDES_CREATE_DICTIONARY_SQL,
PERSON_OVERRIDES_CREATE_MATERIALIZED_VIEW_SQL,
- PERSON_OVERRIDES_CREATE_TABLE_SQL,
+ KAFKA_PERSON_OVERRIDES_TABLE_SQL,
)
from posthog.models.sessions.sql import (
SESSIONS_TABLE_SQL,
+ SESSIONS_TABLE_MV_SQL,
WRITABLE_SESSIONS_TABLE_SQL,
DISTRIBUTED_SESSIONS_TABLE_SQL,
- SESSIONS_TABLE_MV_SQL,
SESSIONS_VIEW_SQL,
)
-from posthog.session_recordings.sql.session_recording_event_sql import *
+from posthog.session_recordings.sql.session_recording_event_sql import (
+ SESSION_RECORDING_EVENTS_TABLE_SQL,
+ SESSION_RECORDING_EVENTS_TABLE_MV_SQL,
+ KAFKA_SESSION_RECORDING_EVENTS_TABLE_SQL,
+ WRITABLE_SESSION_RECORDING_EVENTS_TABLE_SQL,
+ DISTRIBUTED_SESSION_RECORDING_EVENTS_TABLE_SQL,
+)
from posthog.session_recordings.sql.session_replay_event_sql import (
- DISTRIBUTED_SESSION_REPLAY_EVENTS_TABLE_SQL,
- KAFKA_SESSION_REPLAY_EVENTS_TABLE_SQL,
- SESSION_REPLAY_EVENTS_TABLE_MV_SQL,
SESSION_REPLAY_EVENTS_TABLE_SQL,
+ SESSION_REPLAY_EVENTS_TABLE_MV_SQL,
+ KAFKA_SESSION_REPLAY_EVENTS_TABLE_SQL,
+ DISTRIBUTED_SESSION_REPLAY_EVENTS_TABLE_SQL,
)
CREATE_MERGETREE_TABLE_QUERIES = (
diff --git a/posthog/clickhouse/test/__snapshots__/test_schema.ambr b/posthog/clickhouse/test/__snapshots__/test_schema.ambr
index b40f78c801e09..74540ceca8b78 100644
--- a/posthog/clickhouse/test/__snapshots__/test_schema.ambr
+++ b/posthog/clickhouse/test/__snapshots__/test_schema.ambr
@@ -1713,7 +1713,7 @@
sumIf(1, event='$autocapture') as autocapture_count
FROM posthog_test.sharded_events
- WHERE `$session_id` IS NOT NULL AND `$session_id` != '' AND toStartOfDay(timestamp) >= '2024-03-08'
+ WHERE `$session_id` IS NOT NULL AND `$session_id` != ''
GROUP BY `$session_id`, team_id
'''
diff --git a/posthog/demo/products/hedgebox/matrix.py b/posthog/demo/products/hedgebox/matrix.py
index 719d7541356e9..dd3b7bf06859c 100644
--- a/posthog/demo/products/hedgebox/matrix.py
+++ b/posthog/demo/products/hedgebox/matrix.py
@@ -26,7 +26,21 @@
)
from .models import HedgeboxAccount, HedgeboxPerson
-from .taxonomy import *
+from .taxonomy import (
+ COMPANY_CLUSTERS_PROPORTION,
+ EVENT_SIGNED_UP,
+ EVENT_UPLOADED_FILE,
+ EVENT_DOWNLOADED_FILE,
+ EVENT_DELETED_FILE,
+ EVENT_SHARED_FILE_LINK,
+ EVENT_UPGRADED_PLAN,
+ EVENT_PAID_BILL,
+ URL_HOME,
+ URL_SIGNUP,
+ FILE_PREVIEWS_FLAG_KEY,
+ NEW_SIGNUP_PAGE_FLAG_KEY,
+ NEW_SIGNUP_PAGE_FLAG_ROLLOUT_PERCENT,
+)
@dataclass
diff --git a/posthog/demo/products/hedgebox/models.py b/posthog/demo/products/hedgebox/models.py
index add6fb8c73ce8..324dc6b473762 100644
--- a/posthog/demo/products/hedgebox/models.py
+++ b/posthog/demo/products/hedgebox/models.py
@@ -18,7 +18,36 @@
import pytz
from posthog.demo.matrix.models import Effect, SimPerson, SimSessionIntent
-from .taxonomy import *
+from .taxonomy import (
+ EVENT_SIGNED_UP,
+ EVENT_LOGGED_IN,
+ EVENT_UPLOADED_FILE,
+ EVENT_DOWNLOADED_FILE,
+ EVENT_DELETED_FILE,
+ EVENT_SHARED_FILE_LINK,
+ EVENT_UPGRADED_PLAN,
+ EVENT_PAID_BILL,
+ EVENT_DOWNGRADED_PLAN,
+ EVENT_INVITED_TEAM_MEMBER,
+ EVENT_REMOVED_TEAM_MEMBER,
+ EVENT_LOGGED_OUT,
+ URL_HOME,
+ URL_SIGNUP,
+ URL_LOGIN,
+ URL_MARIUS_TECH_TIPS,
+ URL_PRICING,
+ URL_FILES,
+ URL_ACCOUNT_SETTINGS,
+ URL_ACCOUNT_BILLING,
+ URL_ACCOUNT_TEAM,
+ NEW_SIGNUP_PAGE_FLAG_KEY,
+ NEW_SIGNUP_PAGE_FLAG_ROLLOUT_PERCENT,
+ SIGNUP_SUCCESS_RATE_TEST,
+ SIGNUP_SUCCESS_RATE_CONTROL,
+ GROUP_TYPE_ACCOUNT,
+ dyn_url_file,
+ dyn_url_invite,
+)
if TYPE_CHECKING:
from posthog.demo.products.hedgebox.matrix import HedgeboxCluster
diff --git a/posthog/hogql/database/database.py b/posthog/hogql/database/database.py
index 72fef04af9dd8..52a65644f76cf 100644
--- a/posthog/hogql/database/database.py
+++ b/posthog/hogql/database/database.py
@@ -1,6 +1,7 @@
from typing import TYPE_CHECKING, Any, ClassVar, Dict, List, Literal, Optional, TypedDict
from zoneinfo import ZoneInfo, ZoneInfoNotFoundError
from pydantic import ConfigDict, BaseModel
+from sentry_sdk import capture_exception
from posthog.hogql import ast
from posthog.hogql.context import HogQLContext
from posthog.hogql.database.models import (
@@ -44,6 +45,7 @@
RawSessionReplayEventsTable,
SessionReplayEventsTable,
)
+from posthog.hogql.database.schema.sessions import RawSessionsTable, SessionsTable
from posthog.hogql.database.schema.static_cohort_people import StaticCohortPeople
from posthog.hogql.errors import HogQLException
from posthog.hogql.parser import parse_expr
@@ -72,6 +74,7 @@ class Database(BaseModel):
log_entries: LogEntriesTable = LogEntriesTable()
console_logs_log_entries: ReplayConsoleLogsLogEntriesTable = ReplayConsoleLogsLogEntriesTable()
batch_export_log_entries: BatchExportLogEntriesTable = BatchExportLogEntriesTable()
+ sessions: SessionsTable = SessionsTable()
raw_session_replay_events: RawSessionReplayEventsTable = RawSessionReplayEventsTable()
raw_person_distinct_ids: RawPersonDistinctIdsTable = RawPersonDistinctIdsTable()
@@ -79,6 +82,7 @@ class Database(BaseModel):
raw_groups: RawGroupsTable = RawGroupsTable()
raw_cohort_people: RawCohortPeople = RawCohortPeople()
raw_person_overrides: RawPersonOverridesTable = RawPersonOverridesTable()
+ raw_sessions: RawSessionsTable = RawSessionsTable()
# system tables
numbers: NumbersTable = NumbersTable()
@@ -94,6 +98,7 @@ class Database(BaseModel):
"cohortpeople",
"person_static_cohort",
"log_entries",
+ "sessions",
]
_warehouse_table_names: List[str] = []
@@ -194,56 +199,98 @@ def create_hogql_database(
for table in DataWarehouseTable.objects.filter(team_id=team.pk).exclude(deleted=True):
tables[table.name] = table.hogql_definition()
+ if modifiers.dataWarehouseEventsModifiers:
+ for warehouse_modifier in modifiers.dataWarehouseEventsModifiers:
+ # TODO: add all field mappings
+ if "id" not in tables[warehouse_modifier.table_name].fields.keys():
+ tables[warehouse_modifier.table_name].fields["id"] = ExpressionField(
+ name="id",
+ expr=parse_expr(warehouse_modifier.id_field),
+ )
+
+ if "timestamp" not in tables[warehouse_modifier.table_name].fields.keys():
+ tables[warehouse_modifier.table_name].fields["timestamp"] = ExpressionField(
+ name="timestamp",
+ expr=ast.Call(name="toDateTime", args=[ast.Field(chain=[warehouse_modifier.timestamp_field])]),
+ )
+
+ # TODO: Need to decide how the distinct_id and person_id fields are going to be handled
+ if "distinct_id" not in tables[warehouse_modifier.table_name].fields.keys():
+ tables[warehouse_modifier.table_name].fields["distinct_id"] = ExpressionField(
+ name="distinct_id",
+ expr=parse_expr(warehouse_modifier.distinct_id_field),
+ )
+
+ if "person_id" not in tables[warehouse_modifier.table_name].fields.keys():
+ tables[warehouse_modifier.table_name].fields["person_id"] = ExpressionField(
+ name="person_id",
+ expr=parse_expr(warehouse_modifier.distinct_id_field),
+ )
+
for saved_query in DataWarehouseSavedQuery.objects.filter(team_id=team.pk).exclude(deleted=True):
tables[saved_query.name] = saved_query.hogql_definition()
database.add_warehouse_tables(**tables)
for join in DataWarehouseJoin.objects.filter(team_id=team.pk).exclude(deleted=True):
- source_table = database.get_table(join.source_table_name)
- joining_table = database.get_table(join.joining_table_name)
-
- field = parse_expr(join.source_table_key)
- if not isinstance(field, ast.Field):
- raise HogQLException("Data Warehouse Join HogQL expression should be a Field node")
- from_field = field.chain
-
- field = parse_expr(join.joining_table_key)
- if not isinstance(field, ast.Field):
- raise HogQLException("Data Warehouse Join HogQL expression should be a Field node")
- to_field = field.chain
-
- source_table.fields[join.field_name] = LazyJoin(
- from_field=from_field,
- to_field=to_field,
- join_table=joining_table,
- join_function=join.join_function,
- )
+ try:
+ source_table = database.get_table(join.source_table_name)
+ joining_table = database.get_table(join.joining_table_name)
+
+ field = parse_expr(join.source_table_key)
+ if not isinstance(field, ast.Field):
+ raise HogQLException("Data Warehouse Join HogQL expression should be a Field node")
+ from_field = field.chain
+
+ field = parse_expr(join.joining_table_key)
+ if not isinstance(field, ast.Field):
+ raise HogQLException("Data Warehouse Join HogQL expression should be a Field node")
+ to_field = field.chain
+
+ source_table.fields[join.field_name] = LazyJoin(
+ from_field=from_field,
+ to_field=to_field,
+ join_table=joining_table,
+ join_function=join.join_function,
+ )
- if join.source_table_name == "persons":
- person_field = database.events.fields["person"]
- if isinstance(person_field, ast.FieldTraverser):
- table_or_field: ast.FieldOrTable = database.events
- for chain in person_field.chain:
- if isinstance(table_or_field, ast.LazyJoin):
- table_or_field = table_or_field.resolve_table(HogQLContext(team_id=team_id, database=database))
- if table_or_field.has_field(chain):
+ if join.source_table_name == "persons":
+ person_field = database.events.fields["person"]
+ if isinstance(person_field, ast.FieldTraverser):
+ table_or_field: ast.FieldOrTable = database.events
+ for chain in person_field.chain:
+ if isinstance(table_or_field, ast.LazyJoin):
+ table_or_field = table_or_field.resolve_table(
+ HogQLContext(team_id=team_id, database=database)
+ )
+ if table_or_field.has_field(chain):
+ table_or_field = table_or_field.get_field(chain)
+ if isinstance(table_or_field, ast.LazyJoin):
+ table_or_field = table_or_field.resolve_table(
+ HogQLContext(team_id=team_id, database=database)
+ )
+ elif isinstance(table_or_field, ast.Table):
table_or_field = table_or_field.get_field(chain)
- if isinstance(table_or_field, ast.LazyJoin):
- table_or_field = table_or_field.resolve_table(
- HogQLContext(team_id=team_id, database=database)
- )
- elif isinstance(table_or_field, ast.Table):
- table_or_field = table_or_field.get_field(chain)
-
- assert isinstance(table_or_field, ast.Table)
-
- table_or_field.fields[join.field_name] = LazyJoin(
- from_field=from_field,
- to_field=to_field,
- join_table=joining_table,
- join_function=join.join_function,
- )
+
+ assert isinstance(table_or_field, ast.Table)
+
+ if isinstance(table_or_field, ast.VirtualTable):
+ table_or_field.fields[join.field_name] = ast.FieldTraverser(chain=["..", join.field_name])
+ database.events.fields[join.field_name] = LazyJoin(
+ from_field=from_field,
+ to_field=to_field,
+ join_table=joining_table,
+ join_function=join.join_function,
+ )
+ else:
+ table_or_field.fields[join.field_name] = LazyJoin(
+ from_field=from_field,
+ to_field=to_field,
+ join_table=joining_table,
+ join_function=join.join_function,
+ )
+ except Exception as e:
+ capture_exception(e)
return database
diff --git a/posthog/hogql/database/schema/channel_type.py b/posthog/hogql/database/schema/channel_type.py
index 702681aeeb29f..5dee575fc59a3 100644
--- a/posthog/hogql/database/schema/channel_type.py
+++ b/posthog/hogql/database/schema/channel_type.py
@@ -41,8 +41,29 @@ def create_initial_domain_type(name: str):
def create_initial_channel_type(name: str):
return ExpressionField(
name=name,
- expr=parse_expr(
- """
+ expr=create_channel_type_expr(
+ campaign=ast.Call(name="toString", args=[ast.Field(chain=["properties", "$initial_utm_campaign"])]),
+ medium=ast.Call(name="toString", args=[ast.Field(chain=["properties", "$initial_utm_medium"])]),
+ source=ast.Call(name="toString", args=[ast.Field(chain=["properties", "$initial_utm_source"])]),
+ referring_domain=ast.Call(
+ name="toString", args=[ast.Field(chain=["properties", "$initial_referring_domain"])]
+ ),
+ gclid=ast.Call(name="toString", args=[ast.Field(chain=["properties", "$initial_gclid"])]),
+ gad_source=ast.Call(name="toString", args=[ast.Field(chain=["properties", "$initial_gad_source"])]),
+ ),
+ )
+
+
+def create_channel_type_expr(
+ campaign: ast.Expr,
+ medium: ast.Expr,
+ source: ast.Expr,
+ referring_domain: ast.Expr,
+ gclid: ast.Expr,
+ gad_source: ast.Expr,
+) -> ast.Expr:
+ return parse_expr(
+ """
multiIf(
match({campaign}, 'cross-network'),
'Cross Network',
@@ -99,16 +120,13 @@ def create_initial_channel_type(name: str):
)
)
)""",
- start=None,
- placeholders={
- "campaign": ast.Call(name="toString", args=[ast.Field(chain=["properties", "$initial_utm_campaign"])]),
- "medium": ast.Call(name="toString", args=[ast.Field(chain=["properties", "$initial_utm_medium"])]),
- "source": ast.Call(name="toString", args=[ast.Field(chain=["properties", "$initial_utm_source"])]),
- "referring_domain": ast.Call(
- name="toString", args=[ast.Field(chain=["properties", "$initial_referring_domain"])]
- ),
- "gclid": ast.Call(name="toString", args=[ast.Field(chain=["properties", "$initial_gclid"])]),
- "gad_source": ast.Call(name="toString", args=[ast.Field(chain=["properties", "$initial_gad_source"])]),
- },
- ),
+ start=None,
+ placeholders={
+ "campaign": campaign,
+ "medium": medium,
+ "source": source,
+ "referring_domain": referring_domain,
+ "gclid": gclid,
+ "gad_source": gad_source,
+ },
)
diff --git a/posthog/hogql/database/schema/sessions.py b/posthog/hogql/database/schema/sessions.py
new file mode 100644
index 0000000000000..2a4865798eeb8
--- /dev/null
+++ b/posthog/hogql/database/schema/sessions.py
@@ -0,0 +1,158 @@
+from typing import Dict, List, cast
+
+from posthog.hogql.database.models import (
+ StringDatabaseField,
+ DateTimeDatabaseField,
+ IntegerDatabaseField,
+ Table,
+ FieldOrTable,
+ StringArrayDatabaseField,
+ DatabaseField,
+ LazyTable,
+)
+from posthog.hogql.database.schema.channel_type import create_channel_type_expr
+from posthog.schema import HogQLQueryModifiers
+
+
+SESSIONS_COMMON_FIELDS: Dict[str, FieldOrTable] = {
+ "session_id": StringDatabaseField(name="session_id"),
+ "team_id": IntegerDatabaseField(name="team_id"),
+ "distinct_id": StringDatabaseField(name="distinct_id"),
+ "min_timestamp": DateTimeDatabaseField(name="min_timestamp"),
+ "max_timestamp": DateTimeDatabaseField(name="max_timestamp"),
+ "urls": StringArrayDatabaseField(name="urls"),
+ "entry_url": DatabaseField(name="entry_url"),
+ "exit_url": DatabaseField(name="exit_url"),
+ "initial_utm_source": DatabaseField(name="initial_utm_source"),
+ "initial_utm_campaign": DatabaseField(name="initial_utm_campaign"),
+ "initial_utm_medium": DatabaseField(name="initial_utm_medium"),
+ "initial_utm_term": DatabaseField(name="initial_utm_term"),
+ "initial_utm_content": DatabaseField(name="initial_utm_content"),
+ "initial_referring_domain": DatabaseField(name="initial_referring_domain"),
+ "initial_gclid": DatabaseField(name="initial_gclid"),
+ "initial_gad_source": DatabaseField(name="initial_gad_source"),
+ "event_count_map": DatabaseField(name="event_count_map"),
+ "pageview_count": IntegerDatabaseField(name="pageview_count"),
+ "autocapture_count": IntegerDatabaseField(name="autocapture_count"),
+}
+
+
+class RawSessionsTable(Table):
+ fields: Dict[str, FieldOrTable] = SESSIONS_COMMON_FIELDS
+
+ def to_printed_clickhouse(self, context):
+ return "sessions"
+
+ def to_printed_hogql(self):
+ return "raw_sessions"
+
+ def avoid_asterisk_fields(self) -> List[str]:
+ # our clickhouse driver can't return aggregate states
+ return [
+ "entry_url",
+ "exit_url",
+ "initial_utm_source",
+ "initial_utm_campaign",
+ "initial_utm_medium",
+ "initial_utm_term",
+ "initial_utm_content",
+ "initial_referring_domain",
+ "initial_gclid",
+ "initial_gad_source",
+ ]
+
+
+def select_from_sessions_table(requested_fields: Dict[str, List[str | int]]):
+ from posthog.hogql import ast
+
+ table_name = "raw_sessions"
+
+ aggregate_fields = {
+ "distinct_id": ast.Call(name="any", args=[ast.Field(chain=[table_name, "distinct_id"])]),
+ "min_timestamp": ast.Call(name="min", args=[ast.Field(chain=[table_name, "min_timestamp"])]),
+ "max_timestamp": ast.Call(name="max", args=[ast.Field(chain=[table_name, "max_timestamp"])]),
+ "urls": ast.Call(
+ name="arrayDistinct",
+ args=[
+ ast.Call(
+ name="arrayFlatten",
+ args=[ast.Call(name="groupArray", args=[ast.Field(chain=[table_name, "urls"])])],
+ )
+ ],
+ ),
+ "entry_url": ast.Call(name="argMinMerge", args=[ast.Field(chain=[table_name, "entry_url"])]),
+ "exit_url": ast.Call(name="argMaxMerge", args=[ast.Field(chain=[table_name, "exit_url"])]),
+ "initial_utm_source": ast.Call(name="argMinMerge", args=[ast.Field(chain=[table_name, "initial_utm_source"])]),
+ "initial_utm_campaign": ast.Call(
+ name="argMinMerge", args=[ast.Field(chain=[table_name, "initial_utm_campaign"])]
+ ),
+ "initial_utm_medium": ast.Call(name="argMinMerge", args=[ast.Field(chain=[table_name, "initial_utm_medium"])]),
+ "initial_utm_term": ast.Call(name="argMinMerge", args=[ast.Field(chain=[table_name, "initial_utm_term"])]),
+ "initial_utm_content": ast.Call(
+ name="argMinMerge", args=[ast.Field(chain=[table_name, "initial_utm_content"])]
+ ),
+ "initial_referring_domain": ast.Call(
+ name="argMinMerge", args=[ast.Field(chain=[table_name, "initial_referring_domain"])]
+ ),
+ "initial_gclid": ast.Call(name="argMinMerge", args=[ast.Field(chain=[table_name, "initial_gclid"])]),
+ "initial_gad_source": ast.Call(name="argMinMerge", args=[ast.Field(chain=[table_name, "initial_gad_source"])]),
+ "event_count_map": ast.Call(
+ name="sumMap",
+ args=[ast.Field(chain=[table_name, "event_count_map"])],
+ ),
+ "pageview_count": ast.Call(name="sum", args=[ast.Field(chain=[table_name, "pageview_count"])]),
+ "autocapture_count": ast.Call(name="sum", args=[ast.Field(chain=[table_name, "autocapture_count"])]),
+ "duration": ast.Call(
+ name="dateDiff",
+ args=[
+ ast.Constant(value="second"),
+ ast.Call(name="min", args=[ast.Field(chain=[table_name, "min_timestamp"])]),
+ ast.Call(name="max", args=[ast.Field(chain=[table_name, "max_timestamp"])]),
+ ],
+ ),
+ "channel_type": create_channel_type_expr(
+ campaign=ast.Call(name="argMinMerge", args=[ast.Field(chain=[table_name, "initial_utm_campaign"])]),
+ medium=ast.Call(name="argMinMerge", args=[ast.Field(chain=[table_name, "initial_utm_medium"])]),
+ source=ast.Call(name="argMinMerge", args=[ast.Field(chain=[table_name, "initial_utm_source"])]),
+ referring_domain=ast.Call(
+ name="argMinMerge", args=[ast.Field(chain=[table_name, "initial_referring_domain"])]
+ ),
+ gclid=ast.Call(name="argMinMerge", args=[ast.Field(chain=[table_name, "initial_gclid"])]),
+ gad_source=ast.Call(name="argMinMerge", args=[ast.Field(chain=[table_name, "initial_gad_source"])]),
+ ),
+ }
+
+ select_fields: List[ast.Expr] = []
+ group_by_fields: List[ast.Expr] = [ast.Field(chain=[table_name, "session_id"])]
+
+ for name, chain in requested_fields.items():
+ if name in aggregate_fields:
+ select_fields.append(ast.Alias(alias=name, expr=aggregate_fields[name]))
+ else:
+ select_fields.append(
+ ast.Alias(alias=name, expr=ast.Field(chain=cast(list[str | int], [table_name]) + chain))
+ )
+ group_by_fields.append(ast.Field(chain=cast(list[str | int], [table_name]) + chain))
+
+ return ast.SelectQuery(
+ select=select_fields,
+ select_from=ast.JoinExpr(table=ast.Field(chain=[table_name])),
+ group_by=group_by_fields,
+ )
+
+
+class SessionsTable(LazyTable):
+ fields: Dict[str, FieldOrTable] = {
+ **SESSIONS_COMMON_FIELDS,
+ "duration": IntegerDatabaseField(name="duration"),
+ "channel_type": StringDatabaseField(name="channel_type"),
+ }
+
+ def lazy_select(self, requested_fields: Dict[str, List[str | int]], modifiers: HogQLQueryModifiers):
+ return select_from_sessions_table(requested_fields)
+
+ def to_printed_clickhouse(self, context):
+ return "sessions"
+
+ def to_printed_hogql(self):
+ return "sessions"
diff --git a/posthog/hogql/database/schema/test/test_sessions.py b/posthog/hogql/database/schema/test/test_sessions.py
new file mode 100644
index 0000000000000..dc3ba50b5be60
--- /dev/null
+++ b/posthog/hogql/database/schema/test/test_sessions.py
@@ -0,0 +1,57 @@
+from posthog.hogql import ast
+from posthog.hogql.parser import parse_select
+from posthog.hogql.query import execute_hogql_query
+from posthog.test.base import (
+ APIBaseTest,
+ ClickhouseTestMixin,
+ _create_event,
+)
+
+
+class TestReferringDomainType(ClickhouseTestMixin, APIBaseTest):
+ def test_select_star(self):
+ session_id = "session_test_select_star"
+
+ _create_event(
+ event="$pageview",
+ team=self.team,
+ distinct_id="d1",
+ properties={"$current_url": "https://example.com", "$session_id": session_id},
+ )
+
+ response = execute_hogql_query(
+ parse_select(
+ "select * from sessions where session_id = {session_id}",
+ placeholders={"session_id": ast.Constant(value=session_id)},
+ ),
+ self.team,
+ )
+
+ self.assertEqual(
+ len(response.results or []),
+ 1,
+ )
+
+ def test_channel_type(self):
+ session_id = "session_test_channel_type"
+
+ _create_event(
+ event="$pageview",
+ team=self.team,
+ distinct_id="d1",
+ properties={"gad_source": "1", "$session_id": session_id},
+ )
+
+ response = execute_hogql_query(
+ parse_select(
+ "select channel_type from sessions where session_id = {session_id}",
+ placeholders={"session_id": ast.Constant(value=session_id)},
+ ),
+ self.team,
+ )
+
+ result = (response.results or [])[0]
+ self.assertEqual(
+ result[0],
+ "Paid Search",
+ )
diff --git a/posthog/hogql/database/test/__snapshots__/test_database.ambr b/posthog/hogql/database/test/__snapshots__/test_database.ambr
index 7823b80094700..21c60457a1fd3 100644
--- a/posthog/hogql/database/test/__snapshots__/test_database.ambr
+++ b/posthog/hogql/database/test/__snapshots__/test_database.ambr
@@ -535,6 +535,44 @@
"type": "string"
}
],
+ "sessions": [
+ {
+ "key": "session_id",
+ "type": "string"
+ },
+ {
+ "key": "distinct_id",
+ "type": "string"
+ },
+ {
+ "key": "min_timestamp",
+ "type": "datetime"
+ },
+ {
+ "key": "max_timestamp",
+ "type": "datetime"
+ },
+ {
+ "key": "urls",
+ "type": "array"
+ },
+ {
+ "key": "pageview_count",
+ "type": "integer"
+ },
+ {
+ "key": "autocapture_count",
+ "type": "integer"
+ },
+ {
+ "key": "duration",
+ "type": "integer"
+ },
+ {
+ "key": "channel_type",
+ "type": "string"
+ }
+ ],
"raw_session_replay_events": [
{
"key": "session_id",
@@ -770,6 +808,36 @@
"type": "integer"
}
],
+ "raw_sessions": [
+ {
+ "key": "session_id",
+ "type": "string"
+ },
+ {
+ "key": "distinct_id",
+ "type": "string"
+ },
+ {
+ "key": "min_timestamp",
+ "type": "datetime"
+ },
+ {
+ "key": "max_timestamp",
+ "type": "datetime"
+ },
+ {
+ "key": "urls",
+ "type": "array"
+ },
+ {
+ "key": "pageview_count",
+ "type": "integer"
+ },
+ {
+ "key": "autocapture_count",
+ "type": "integer"
+ }
+ ],
"numbers": [
{
"key": "number",
@@ -1310,6 +1378,44 @@
"type": "string"
}
],
+ "sessions": [
+ {
+ "key": "session_id",
+ "type": "string"
+ },
+ {
+ "key": "distinct_id",
+ "type": "string"
+ },
+ {
+ "key": "min_timestamp",
+ "type": "datetime"
+ },
+ {
+ "key": "max_timestamp",
+ "type": "datetime"
+ },
+ {
+ "key": "urls",
+ "type": "array"
+ },
+ {
+ "key": "pageview_count",
+ "type": "integer"
+ },
+ {
+ "key": "autocapture_count",
+ "type": "integer"
+ },
+ {
+ "key": "duration",
+ "type": "integer"
+ },
+ {
+ "key": "channel_type",
+ "type": "string"
+ }
+ ],
"raw_session_replay_events": [
{
"key": "session_id",
@@ -1545,6 +1651,36 @@
"type": "integer"
}
],
+ "raw_sessions": [
+ {
+ "key": "session_id",
+ "type": "string"
+ },
+ {
+ "key": "distinct_id",
+ "type": "string"
+ },
+ {
+ "key": "min_timestamp",
+ "type": "datetime"
+ },
+ {
+ "key": "max_timestamp",
+ "type": "datetime"
+ },
+ {
+ "key": "urls",
+ "type": "array"
+ },
+ {
+ "key": "pageview_count",
+ "type": "integer"
+ },
+ {
+ "key": "autocapture_count",
+ "type": "integer"
+ }
+ ],
"numbers": [
{
"key": "number",
diff --git a/posthog/hogql/database/test/test_database.py b/posthog/hogql/database/test/test_database.py
index ec1ade4231a04..da17e15c03107 100644
--- a/posthog/hogql/database/test/test_database.py
+++ b/posthog/hogql/database/test/test_database.py
@@ -1,5 +1,5 @@
import json
-from typing import Any
+from typing import Any, cast
from unittest.mock import patch
import pytest
@@ -7,15 +7,19 @@
from parameterized import parameterized
from posthog.hogql.database.database import create_hogql_database, serialize_database
-from posthog.hogql.database.models import FieldTraverser, StringDatabaseField, ExpressionField
+from posthog.hogql.database.models import FieldTraverser, LazyJoin, StringDatabaseField, ExpressionField, Table
+from posthog.hogql.errors import HogQLException
from posthog.hogql.modifiers import create_default_modifiers_for_team
from posthog.hogql.parser import parse_expr, parse_select
from posthog.hogql.printer import print_ast
from posthog.hogql.context import HogQLContext
from posthog.models.group_type_mapping import GroupTypeMapping
+from posthog.models.organization import Organization
+from posthog.models.team.team import Team
from posthog.test.base import BaseTest
from posthog.warehouse.models import DataWarehouseTable, DataWarehouseCredential
from posthog.hogql.query import execute_hogql_query
+from posthog.warehouse.models.join import DataWarehouseJoin
class TestDatabase(BaseTest):
@@ -132,3 +136,155 @@ def test_database_expression_fields(self):
query
== "SELECT number AS number FROM (SELECT numbers.number AS number FROM numbers(2) AS numbers) LIMIT 10000"
), query
+
+ def test_database_warehouse_joins(self):
+ DataWarehouseJoin.objects.create(
+ team=self.team,
+ source_table_name="events",
+ source_table_key="event",
+ joining_table_name="groups",
+ joining_table_key="key",
+ field_name="some_field",
+ )
+
+ db = create_hogql_database(team_id=self.team.pk)
+ context = HogQLContext(
+ team_id=self.team.pk,
+ enable_select_queries=True,
+ database=db,
+ )
+
+ sql = "select some_field.key from events"
+ print_ast(parse_select(sql), context, dialect="clickhouse")
+
+ def test_database_warehouse_joins_deleted_join(self):
+ DataWarehouseJoin.objects.create(
+ team=self.team,
+ source_table_name="events",
+ source_table_key="lower(event)",
+ joining_table_name="groups",
+ joining_table_key="upper(key)",
+ field_name="some_field",
+ deleted=True,
+ )
+
+ db = create_hogql_database(team_id=self.team.pk)
+ context = HogQLContext(
+ team_id=self.team.pk,
+ enable_select_queries=True,
+ database=db,
+ )
+
+ sql = "select some_field.key from events"
+ with pytest.raises(HogQLException):
+ print_ast(parse_select(sql), context, dialect="clickhouse")
+
+ def test_database_warehouse_joins_other_team(self):
+ other_organization = Organization.objects.create(name="some_other_org")
+ other_team = Team.objects.create(organization=other_organization)
+
+ DataWarehouseJoin.objects.create(
+ team=other_team,
+ source_table_name="events",
+ source_table_key="lower(event)",
+ joining_table_name="groups",
+ joining_table_key="upper(key)",
+ field_name="some_field",
+ )
+
+ db = create_hogql_database(team_id=self.team.pk)
+ context = HogQLContext(
+ team_id=self.team.pk,
+ enable_select_queries=True,
+ database=db,
+ )
+
+ sql = "select some_field.key from events"
+ with pytest.raises(HogQLException):
+ print_ast(parse_select(sql), context, dialect="clickhouse")
+
+ def test_database_warehouse_joins_bad_key_expression(self):
+ DataWarehouseJoin.objects.create(
+ team=self.team,
+ source_table_name="events",
+ source_table_key="blah_de_blah(event)",
+ joining_table_name="groups",
+ joining_table_key="upper(key)",
+ field_name="some_field",
+ )
+
+ create_hogql_database(team_id=self.team.pk)
+
+ @override_settings(PERSON_ON_EVENTS_OVERRIDE=False, PERSON_ON_EVENTS_V2_OVERRIDE=False)
+ def test_database_warehouse_joins_persons_no_poe(self):
+ DataWarehouseJoin.objects.create(
+ team=self.team,
+ source_table_name="persons",
+ source_table_key="properties.email",
+ joining_table_name="groups",
+ joining_table_key="key",
+ field_name="some_field",
+ )
+
+ db = create_hogql_database(team_id=self.team.pk)
+ context = HogQLContext(
+ team_id=self.team.pk,
+ enable_select_queries=True,
+ database=db,
+ )
+
+ pdi = cast(LazyJoin, db.events.fields["pdi"])
+ pdi_persons_join = cast(LazyJoin, pdi.resolve_table(context).fields["person"])
+ pdi_table = pdi_persons_join.resolve_table(context)
+
+ assert pdi_table.fields["some_field"] is not None
+
+ print_ast(parse_select("select person.some_field.key from events"), context, dialect="clickhouse")
+
+ @override_settings(PERSON_ON_EVENTS_OVERRIDE=True, PERSON_ON_EVENTS_V2_OVERRIDE=False)
+ def test_database_warehouse_joins_persons_poe_v1(self):
+ DataWarehouseJoin.objects.create(
+ team=self.team,
+ source_table_name="persons",
+ source_table_key="properties.email",
+ joining_table_name="groups",
+ joining_table_key="key",
+ field_name="some_field",
+ )
+
+ db = create_hogql_database(team_id=self.team.pk)
+ context = HogQLContext(
+ team_id=self.team.pk,
+ enable_select_queries=True,
+ database=db,
+ )
+
+ poe = cast(Table, db.events.fields["poe"])
+
+ assert poe.fields["some_field"] is not None
+
+ print_ast(parse_select("select person.some_field.key from events"), context, dialect="clickhouse")
+
+ @override_settings(PERSON_ON_EVENTS_OVERRIDE=False, PERSON_ON_EVENTS_V2_OVERRIDE=True)
+ def test_database_warehouse_joins_persons_poe_v2(self):
+ DataWarehouseJoin.objects.create(
+ team=self.team,
+ source_table_name="persons",
+ source_table_key="properties.email",
+ joining_table_name="groups",
+ joining_table_key="key",
+ field_name="some_field",
+ )
+
+ db = create_hogql_database(team_id=self.team.pk)
+ context = HogQLContext(
+ team_id=self.team.pk,
+ enable_select_queries=True,
+ database=db,
+ )
+
+ poe = cast(Table, db.events.fields["poe"])
+
+ assert poe.fields["some_field"] is not None
+
+ print_ast(parse_select("select person.some_field.key from events"), context, dialect="clickhouse")
diff --git a/posthog/hogql/functions/mapping.py b/posthog/hogql/functions/mapping.py
index 5490a0e453887..5edf1a68a826a 100644
--- a/posthog/hogql/functions/mapping.py
+++ b/posthog/hogql/functions/mapping.py
@@ -493,6 +493,7 @@ class HogQLFunctionMeta:
"JSONExtractKeys": HogQLFunctionMeta("JSONExtractKeys", 1, None),
"JSONExtractRaw": HogQLFunctionMeta("JSONExtractRaw", 1, None),
"JSONExtractArrayRaw": HogQLFunctionMeta("JSONExtractArrayRaw", 1, None),
+ "JSONExtractKeysAndValues": HogQLFunctionMeta("JSONExtractKeysAndValues", 1, 3),
"JSONExtractKeysAndValuesRaw": HogQLFunctionMeta("JSONExtractKeysAndValuesRaw", 1, None),
# in
"in": HogQLFunctionMeta("in", 2, 2),
@@ -578,6 +579,7 @@ class HogQLFunctionMeta:
"count": HogQLFunctionMeta("count", 0, 1, aggregate=True),
"COUNT": HogQLFunctionMeta("count", 0, 1, aggregate=True),
"countIf": HogQLFunctionMeta("countIf", 1, 2, aggregate=True),
+ "countDistinctIf": HogQLFunctionMeta("countIf", 1, 2, aggregate=True),
"min": HogQLFunctionMeta("min", 1, 1, aggregate=True),
"minIf": HogQLFunctionMeta("minIf", 2, 2, aggregate=True),
"max": HogQLFunctionMeta("max", 1, 1, aggregate=True),
@@ -610,6 +612,7 @@ class HogQLFunctionMeta:
"argMax": HogQLFunctionMeta("argMax", 2, 2, aggregate=True),
"argMaxIf": HogQLFunctionMeta("argMaxIf", 3, 3, aggregate=True),
"argMinMerge": HogQLFunctionMeta("argMinMerge", 1, 1, aggregate=True),
+ "argMaxMerge": HogQLFunctionMeta("argMaxMerge", 1, 1, aggregate=True),
"avgWeighted": HogQLFunctionMeta("avgWeighted", 2, 2, aggregate=True),
"avgWeightedIf": HogQLFunctionMeta("avgWeightedIf", 3, 3, aggregate=True),
# "topK": HogQLFunctionMeta("topK", 1, 1, aggregate=True),
@@ -650,6 +653,7 @@ class HogQLFunctionMeta:
"deltaSumTimestampIf": HogQLFunctionMeta("deltaSumTimestampIf", 3, 3, aggregate=True),
"sumMap": HogQLFunctionMeta("sumMap", 1, 2, aggregate=True),
"sumMapIf": HogQLFunctionMeta("sumMapIf", 2, 3, aggregate=True),
+ "sumMapMerge": HogQLFunctionMeta("sumMapMerge", 1, 1, aggregate=True),
"minMap": HogQLFunctionMeta("minMap", 1, 2, aggregate=True),
"minMapIf": HogQLFunctionMeta("minMapIf", 2, 3, aggregate=True),
"maxMap": HogQLFunctionMeta("maxMap", 1, 2, aggregate=True),
diff --git a/posthog/hogql/property.py b/posthog/hogql/property.py
index 99b2749f2d024..ba9f92443b4e8 100644
--- a/posthog/hogql/property.py
+++ b/posthog/hogql/property.py
@@ -72,7 +72,14 @@ def property_to_expr(
scope: Literal["event", "person"] = "event",
) -> ast.Expr:
if isinstance(property, dict):
- property = Property(**property)
+ try:
+ property = Property(**property)
+ # The property was saved as an incomplete object. Instead of crashing the entire query, pretend it's not there.
+ # TODO: revert this when removing legacy insights?
+ except ValueError:
+ return ast.Constant(value=True)
+ except TypeError:
+ return ast.Constant(value=True)
elif isinstance(property, list):
properties = [property_to_expr(p, team, scope) for p in property]
if len(properties) == 0:
@@ -112,7 +119,12 @@ def property_to_expr(
else:
return ast.Or(exprs=[property_to_expr(p, team, scope) for p in property.values])
elif isinstance(property, BaseModel):
- property = Property(**property.dict())
+ try:
+ property = Property(**property.dict())
+ except ValueError:
+ # The property was saved as an incomplete object. Instead of crashing the entire query, pretend it's not there.
+ # TODO: revert this when removing legacy insights?
+ return ast.Constant(value=True)
else:
raise NotImplementedException(
f"property_to_expr with property of type {type(property).__name__} not implemented"
diff --git a/posthog/hogql/test/test_property.py b/posthog/hogql/test/test_property.py
index 06c1769e47569..f271ee5e2f4ff 100644
--- a/posthog/hogql/test/test_property.py
+++ b/posthog/hogql/test/test_property.py
@@ -87,12 +87,7 @@ def test_property_to_expr_group(self):
self._parse_expr("group_0.properties.a = 'b' OR group_0.properties.a = 'c'"),
)
- with self.assertRaises(Exception) as e:
- self._property_to_expr({"type": "group", "key": "a", "value": "b"})
- self.assertEqual(
- str(e.exception),
- "Missing required attr group_type_index for property type group with key a",
- )
+ self.assertEqual(self._property_to_expr({"type": "group", "key": "a", "value": "b"}), self._parse_expr("1"))
def test_property_to_expr_event(self):
self.assertEqual(
@@ -155,6 +150,14 @@ def test_property_to_expr_event(self):
self._property_to_expr({"type": "event", "key": "a", "value": [], "operator": "exact"}),
self._parse_expr("true"),
)
+ self.assertEqual(
+ self._parse_expr("1"),
+ self._property_to_expr({"type": "event", "key": "a", "operator": "icontains"}), # value missing
+ )
+ self.assertEqual(
+ self._parse_expr("1"),
+ self._property_to_expr({}), # incomplete event
+ )
def test_property_to_expr_boolean(self):
PropertyDefinition.objects.create(
diff --git a/posthog/hogql_queries/insights/data_warehouse_mixin.py b/posthog/hogql_queries/insights/data_warehouse_mixin.py
new file mode 100644
index 0000000000000..9653915d5e6aa
--- /dev/null
+++ b/posthog/hogql_queries/insights/data_warehouse_mixin.py
@@ -0,0 +1,14 @@
+from posthog.hogql import ast
+from posthog.models.filters.mixins.utils import cached_property
+from posthog.schema import ActionsNode, EventsNode, DataWarehouseNode
+
+
+class DataWarehouseInsightQueryMixin:
+ series: EventsNode | ActionsNode | DataWarehouseNode
+
+ @cached_property
+ def _table_expr(self) -> ast.Field:
+ if isinstance(self.series, DataWarehouseNode):
+ return ast.Field(chain=[self.series.table_name])
+
+ return ast.Field(chain=["events"])
diff --git a/posthog/hogql_queries/insights/funnels/base.py b/posthog/hogql_queries/insights/funnels/base.py
index ddecc4c0eb2cd..5d84328d2063d 100644
--- a/posthog/hogql_queries/insights/funnels/base.py
+++ b/posthog/hogql_queries/insights/funnels/base.py
@@ -432,8 +432,8 @@ def _get_inner_event_query(
extra_fields: List[str] = []
- # for prop in self._include_properties:
- # extra_fields.append(prop)
+ for prop in self.context.includeProperties:
+ extra_fields.append(prop)
funnel_events_query = FunnelEventQuery(
context=self.context,
@@ -623,13 +623,12 @@ def _build_step_query(
return event_expr
def _get_timestamp_outer_select(self) -> List[ast.Expr]:
- return []
- # if self._include_preceding_timestamp:
- # return ", max_timestamp, min_timestamp"
- # elif self._include_timestamp:
- # return ", timestamp"
- # else:
- # return ""
+ if self.context.includePrecedingTimestamp:
+ return [ast.Field(chain=["max_timestamp"]), ast.Field(chain=["min_timestamp"])]
+ elif self.context.includeTimestamp:
+ return [ast.Field(chain=["timestamp"])]
+ else:
+ return []
def _get_funnel_person_step_condition(self) -> ast.Expr:
actorsQuery, breakdownType, max_steps = (
@@ -673,10 +672,9 @@ def _get_funnel_person_step_events(self) -> List[ast.Expr]:
and self.context.actorsQuery.includeRecordings
):
step_num = self.context.actorsQuery.funnelStep
- # if self._filter.include_final_matching_events:
- if False: # TODO: Implement with correlations
+ if self.context.includeFinalMatchingEvents:
# Always returns the user's final step of the funnel
- return [parse_expr("final_matching_events as matching_events")] # type: ignore
+ return [parse_expr("final_matching_events as matching_events")]
elif step_num is None:
raise ValueError("Missing funnelStep actors query property")
if step_num >= 0:
@@ -772,45 +770,56 @@ def _get_timestamp_selects(self) -> Tuple[List[ast.Expr], List[ast.Expr]]:
Returns timestamp selectors for the target step and optionally the preceding step.
In the former case, always returns the timestamp for the first and last step as well.
"""
- # actorsQuery, max_steps = (
- # self.context.actorsQuery,
- # self.context.max_steps,
- # )
- # assert actorsQuery is not None
-
- # target_step = actorsQuery.funnelStep
- # final_step = max_steps - 1
- # first_step = 0
-
- # if not target_step:
- # return [], []
-
- # if target_step < 0:
- # # the first valid dropoff argument for funnel_step is -2
- # # -2 refers to persons who performed the first step but never made it to the second
- # if target_step == -1:
- # raise ValueError("To request dropoff of initial step use -2")
-
- # target_step = abs(target_step) - 2
- # else:
- # target_step -= 1
-
- # if self._include_preceding_timestamp:
- # if target_step == 0:
- # raise ValueError("Cannot request preceding step timestamp if target funnel step is the first step")
-
- # return (
- # f", latest_{target_step}, latest_{target_step - 1}",
- # f", argMax(latest_{target_step}, steps) as max_timestamp, argMax(latest_{target_step - 1}, steps) as min_timestamp",
- # )
- # elif self._include_timestamp:
- # return (
- # f", latest_{target_step}, latest_{final_step}, latest_{first_step}",
- # f", argMax(latest_{target_step}, steps) as timestamp, argMax(latest_{final_step}, steps) as final_timestamp, argMax(latest_{first_step}, steps) as first_timestamp",
- # )
- # else:
- # return [], []
- return [], []
+ actorsQuery, max_steps = (
+ self.context.actorsQuery,
+ self.context.max_steps,
+ )
+ if not actorsQuery:
+ return [], []
+
+ target_step = actorsQuery.funnelStep
+ final_step = max_steps - 1
+ first_step = 0
+
+ if not target_step:
+ return [], []
+
+ if target_step < 0:
+ # the first valid dropoff argument for funnel_step is -2
+ # -2 refers to persons who performed the first step but never made it to the second
+ if target_step == -1:
+ raise ValueError("To request dropoff of initial step use -2")
+
+ target_step = abs(target_step) - 2
+ else:
+ target_step -= 1
+
+ if self.context.includePrecedingTimestamp:
+ if target_step == 0:
+ raise ValueError("Cannot request preceding step timestamp if target funnel step is the first step")
+
+ return (
+ [ast.Field(chain=[f"latest_{target_step}"]), ast.Field(chain=[f"latest_{target_step - 1}"])],
+ [
+ parse_expr(f"argMax(latest_{target_step}, steps) as max_timestamp"),
+ parse_expr(f"argMax(latest_{target_step - 1}, steps) as min_timestamp"),
+ ],
+ )
+ elif self.context.includeTimestamp:
+ return (
+ [
+ ast.Field(chain=[f"latest_{target_step}"]),
+ ast.Field(chain=[f"latest_{final_step}"]),
+ ast.Field(chain=[f"latest_{first_step}"]),
+ ],
+ [
+ parse_expr(f"argMax(latest_{target_step}, steps) as timestamp"),
+ parse_expr(f"argMax(latest_{final_step}, steps) as final_timestamp"),
+ parse_expr(f"argMax(latest_{first_step}, steps) as first_timestamp"),
+ ],
+ )
+ else:
+ return [], []
def _get_step_times(self, max_steps: int) -> List[ast.Expr]:
windowInterval = self.context.funnelWindowInterval
@@ -990,10 +999,10 @@ def _get_sorting_condition(self, curr_index: int, max_steps: int) -> ast.Expr:
],
)
- def _get_person_and_group_properties(self) -> List[ast.Expr]:
+ def _get_person_and_group_properties(self, aggregate: bool = False) -> List[ast.Expr]:
exprs: List[ast.Expr] = []
- # for prop in self._include_properties:
- # exprs.append(f"any({prop}) as {prop}" if aggregate else prop)
+ for prop in self.context.includeProperties:
+ exprs.append(parse_expr(f"any({prop}) as {prop}") if aggregate else parse_expr(prop))
return exprs
diff --git a/posthog/hogql_queries/insights/funnels/funnel.py b/posthog/hogql_queries/insights/funnels/funnel.py
index e7425c6aed560..b5ce2bb7faf53 100644
--- a/posthog/hogql_queries/insights/funnels/funnel.py
+++ b/posthog/hogql_queries/insights/funnels/funnel.py
@@ -52,7 +52,7 @@ def get_step_counts_query(self):
max_steps = self.context.max_steps
breakdown_exprs = self._get_breakdown_prop_expr()
inner_timestamps, outer_timestamps = self._get_timestamp_selects()
- person_and_group_properties = self._get_person_and_group_properties()
+ person_and_group_properties = self._get_person_and_group_properties(aggregate=True)
group_by_columns: List[ast.Expr] = [
ast.Field(chain=["aggregation_target"]),
diff --git a/posthog/hogql_queries/insights/funnels/funnel_correlation_query_runner.py b/posthog/hogql_queries/insights/funnels/funnel_correlation_query_runner.py
new file mode 100644
index 0000000000000..945a1b7da5cce
--- /dev/null
+++ b/posthog/hogql_queries/insights/funnels/funnel_correlation_query_runner.py
@@ -0,0 +1,898 @@
+import dataclasses
+from datetime import timedelta
+from typing import List, Literal, Optional, Any, Dict, Set, TypedDict, cast
+
+from posthog.constants import AUTOCAPTURE_EVENT
+from posthog.hogql.parser import parse_select
+from posthog.hogql.property import property_to_expr
+from posthog.hogql_queries.insights.funnels.funnel_event_query import FunnelEventQuery
+from posthog.hogql_queries.insights.funnels.funnel_persons import FunnelActors
+from posthog.hogql_queries.insights.funnels.funnel_strict_persons import FunnelStrictActors
+from posthog.hogql_queries.insights.funnels.funnel_unordered_persons import FunnelUnorderedActors
+from posthog.models.action.action import Action
+from posthog.models.element.element import chain_to_elements
+from posthog.models.event.util import ElementSerializer
+from rest_framework.exceptions import ValidationError
+
+from posthog.hogql import ast
+from posthog.hogql.constants import LimitContext
+from posthog.hogql.printer import to_printed_hogql
+from posthog.hogql.query import execute_hogql_query
+from posthog.hogql.timings import HogQLTimings
+from posthog.hogql_queries.insights.funnels.funnel_query_context import FunnelQueryContext
+from posthog.hogql_queries.insights.funnels.utils import funnel_window_interval_unit_to_sql, get_funnel_actor_class
+from posthog.hogql_queries.query_runner import QueryRunner
+from posthog.models import Team
+from posthog.models.property.util import get_property_string_expr
+from posthog.queries.util import correct_result_for_sampling
+from posthog.schema import (
+ ActionsNode,
+ CorrelationType,
+ EventDefinition,
+ EventsNode,
+ FunnelCorrelationActorsQuery,
+ FunnelCorrelationQuery,
+ FunnelCorrelationResponse,
+ FunnelCorrelationResult,
+ FunnelCorrelationResultsType,
+ FunnelsActorsQuery,
+ FunnelsQuery,
+ HogQLQueryModifiers,
+ HogQLQueryResponse,
+ EventOddsRatioSerialized,
+)
+
+
+class EventOddsRatio(TypedDict):
+ event: str
+
+ success_count: int
+ failure_count: int
+
+ odds_ratio: float
+ correlation_type: Literal["success", "failure"]
+
+
+@dataclasses.dataclass
+class EventStats:
+ success_count: int
+ failure_count: int
+
+
+@dataclasses.dataclass
+class EventContingencyTable:
+ """
+ Represents a contingency table for a single event. Note that this isn't a
+ complete contingency table, but rather only includes totals for
+ failure/success as opposed to including the number of successes for cases
+ that a persons _doesn't_ visit an event.
+ """
+
+ event: str
+ visited: EventStats
+
+ success_total: int
+ failure_total: int
+
+
+PRIOR_COUNT = 1
+
+
+class FunnelCorrelationQueryRunner(QueryRunner):
+ TOTAL_IDENTIFIER = "Total_Values_In_Query"
+ ELEMENTS_DIVIDER = "__~~__"
+ AUTOCAPTURE_EVENT_TYPE = "$event_type"
+ MIN_PERSON_COUNT = 25
+ MIN_PERSON_PERCENTAGE = 0.02
+
+ query: FunnelCorrelationQuery
+ query_type = FunnelCorrelationQuery
+ funnels_query: FunnelsQuery
+ actors_query: FunnelsActorsQuery
+ correlation_actors_query: Optional[FunnelCorrelationActorsQuery]
+
+ _funnel_actors_generator: FunnelActors | FunnelStrictActors | FunnelUnorderedActors
+
+ def __init__(
+ self,
+ query: FunnelCorrelationQuery | Dict[str, Any],
+ team: Team,
+ timings: Optional[HogQLTimings] = None,
+ modifiers: Optional[HogQLQueryModifiers] = None,
+ limit_context: Optional[LimitContext] = None,
+ ):
+ super().__init__(query, team=team, timings=timings, modifiers=modifiers, limit_context=limit_context)
+ self.actors_query = self.query.source
+ self.funnels_query = self.actors_query.source
+
+ # Funnel Step by default set to 1, to give us all people who entered the funnel
+ if self.actors_query.funnelStep is None:
+ self.actors_query.funnelStep = 1
+
+ self.context = FunnelQueryContext(
+ query=self.funnels_query,
+ team=team,
+ timings=timings,
+ modifiers=modifiers,
+ limit_context=limit_context,
+ # NOTE: we want to include the latest timestamp of the `target_step`,
+ # from this we can deduce if the person reached the end of the funnel,
+ # i.e. successful
+ include_timestamp=True,
+ # NOTE: we don't need these as we have all the information we need to
+ # deduce if the person was successful or not
+ include_preceding_timestamp=False,
+ include_properties=self.properties_to_include,
+ # NOTE: we always use the final matching event for the recording because this
+ # is the the right event for both drop off and successful funnels
+ include_final_matching_events=self.actors_query.includeRecordings,
+ )
+ self.context.actorsQuery = self.actors_query
+
+ # Used for generating the funnel persons cte
+ funnel_order_actor_class = get_funnel_actor_class(self.context.funnelsFilter)(context=self.context)
+ assert isinstance(
+ funnel_order_actor_class, (FunnelActors, FunnelStrictActors, FunnelUnorderedActors)
+ ) # for typings
+ self._funnel_actors_generator = funnel_order_actor_class
+
+ def _is_stale(self, cached_result_package):
+ return True
+
+ def _refresh_frequency(self):
+ return timedelta(minutes=1)
+
+ def calculate(self) -> FunnelCorrelationResponse:
+ """
+ Funnel Correlation queries take as input the same as the funnel query,
+ and returns the correlation of person events with a person successfully
+ getting to the end of the funnel. We use Odds Ratios as the correlation
+ metric. See https://en.wikipedia.org/wiki/Odds_ratio for more details.
+
+ Roughly speaking, to calculate the odds ratio, we build a contingency
+ table https://en.wikipedia.org/wiki/Contingency_table for each
+ dimension, then calculate the odds ratio for each.
+
+ For example, take for simplicity the cohort of all people, and the
+ success criteria of having a "signed up" event. First we would build a
+ contingency table like:
+
+ | | success | failure | total |
+ | -----------------: | :-----: | :-----: | :---: |
+ | watched video | 5 | 1 | 6 |
+ | didn't watch video | 2 | 10 | 12 |
+
+
+ Then the odds that a person signs up given they watched the video is 5 /
+ 1.
+
+ And the odds that a person signs up given they didn't watch the video is
+ 2 / 10.
+
+ So we say the odds ratio is 5 / 1 over 2 / 10 = 25 . The further away the
+ odds ratio is from 1, the greater the correlation.
+
+ Requirements:
+
+ - Intitially we only need to consider the names of events that a cohort
+ person has emitted. So we explicitly are not interested in e.g.
+ correlating properties, although this will be a follow-up.
+
+ Non-functional requirements:
+
+ - there can be perhaps millions of people in a cohort, so we should
+ consider this when writing the algorithm. e.g. we should probably
+ avoid pulling all people into across the wire.
+ - there can be an order of magnitude more events than people, so we
+ should avoid pulling all events across the wire.
+ - there may be a large but not huge number of distinct events, let's say
+ 100 different names for events. We should avoid n+1 queries for the
+ event names dimension
+
+ Contincency tables are something we can pull out of the db, so we can
+ have a query that:
+
+ 1. filters people by the cohort criteria
+ 2. groups these people by the success criteria
+ 3. groups people by our criterion with which we want to test
+ correlation, e.g. "watched video"
+
+ --
+
+ For each event a person that started going through the funnel, gets stats
+ for how many of these users are sucessful and how many are unsuccessful.
+
+ It's a partial table as it doesn't include numbers of the negation of the
+ event, but does include the total success/failure numbers, which is enough
+ for us to calculate the odds ratio.
+ """
+ if not self.funnels_query.series:
+ return FunnelCorrelationResponse(results=FunnelCorrelationResult(events=[], skewed=False))
+
+ events, skewed_totals, hogql, response = self._calculate()
+
+ return FunnelCorrelationResponse(
+ results=FunnelCorrelationResult(
+ events=[self.serialize_event_odds_ratio(odds_ratio=odds_ratio) for odds_ratio in events],
+ skewed=skewed_totals,
+ ),
+ timings=response.timings,
+ hogql=hogql,
+ columns=response.columns,
+ types=response.types,
+ hasMore=response.hasMore,
+ limit=response.limit,
+ offset=response.offset,
+ )
+
+ def _calculate(self) -> tuple[List[EventOddsRatio], bool, str, HogQLQueryResponse]:
+ query = self.to_query()
+
+ hogql = to_printed_hogql(query, self.team)
+
+ response = execute_hogql_query(
+ query_type="FunnelsQuery",
+ query=query,
+ team=self.team,
+ timings=self.timings,
+ modifiers=self.modifiers,
+ )
+ assert response.results
+
+ # Get the total success/failure counts from the results
+ results = [result for result in response.results if result[0] != self.TOTAL_IDENTIFIER]
+ _, success_total, failure_total = [result for result in response.results if result[0] == self.TOTAL_IDENTIFIER][
+ 0
+ ]
+
+ # Add a little structure, and keep it close to the query definition so it's
+ # obvious what's going on with result indices.
+ event_contingency_tables = [
+ EventContingencyTable(
+ event=result[0],
+ visited=EventStats(success_count=result[1], failure_count=result[2]),
+ success_total=success_total,
+ failure_total=failure_total,
+ )
+ for result in results
+ ]
+
+ success_total = int(correct_result_for_sampling(success_total, self.funnels_query.samplingFactor))
+ failure_total = int(correct_result_for_sampling(failure_total, self.funnels_query.samplingFactor))
+
+ if not success_total or not failure_total:
+ return [], True, hogql, response
+
+ skewed_totals = False
+
+ # If the ratio is greater than 1:10, then we have a skewed result, so we should
+ # warn the user.
+ if success_total / failure_total > 10 or failure_total / success_total > 10:
+ skewed_totals = True
+
+ odds_ratios = [
+ get_entity_odds_ratio(event_stats, PRIOR_COUNT)
+ for event_stats in event_contingency_tables
+ if not self.are_results_insignificant(event_stats)
+ ]
+
+ positively_correlated_events = sorted(
+ [odds_ratio for odds_ratio in odds_ratios if odds_ratio["correlation_type"] == "success"],
+ key=lambda x: x["odds_ratio"],
+ reverse=True,
+ )
+
+ negatively_correlated_events = sorted(
+ [odds_ratio for odds_ratio in odds_ratios if odds_ratio["correlation_type"] == "failure"],
+ key=lambda x: x["odds_ratio"],
+ reverse=False,
+ )
+
+ # Return the top ten positively correlated events, and top then negatively correlated events
+ events = positively_correlated_events[:10] + negatively_correlated_events[:10]
+ return events, skewed_totals, hogql, response
+
+ def serialize_event_odds_ratio(self, odds_ratio: EventOddsRatio) -> EventOddsRatioSerialized:
+ event_definition = self.serialize_event_with_property(event=odds_ratio["event"])
+ return EventOddsRatioSerialized(
+ success_count=odds_ratio["success_count"],
+ failure_count=odds_ratio["failure_count"],
+ odds_ratio=odds_ratio["odds_ratio"],
+ correlation_type=(
+ CorrelationType.success if odds_ratio["correlation_type"] == "success" else CorrelationType.failure
+ ),
+ event=event_definition,
+ )
+
+ def serialize_event_with_property(self, event: str) -> EventDefinition:
+ """
+ Format the event name for display.
+ """
+ if not self.support_autocapture_elements():
+ return EventDefinition(event=event, properties={}, elements=[])
+
+ event_name, property_name, property_value = event.split("::")
+ if event_name == AUTOCAPTURE_EVENT and property_name == "elements_chain":
+ event_type, elements_chain = property_value.split(self.ELEMENTS_DIVIDER)
+ return EventDefinition(
+ event=event,
+ properties={self.AUTOCAPTURE_EVENT_TYPE: event_type},
+ elements=cast(
+ list,
+ ElementSerializer(chain_to_elements(elements_chain), many=True).data,
+ ),
+ )
+
+ return EventDefinition(event=event, properties={}, elements=[])
+
+ def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery:
+ """
+ Returns a query string and params, which are used to generate the contingency table.
+ The query returns success and failure count for event / property values, along with total success and failure counts.
+ """
+ if self.query.funnelCorrelationType == FunnelCorrelationResultsType.properties:
+ return self.get_properties_query()
+
+ if self.query.funnelCorrelationType == FunnelCorrelationResultsType.event_with_properties:
+ return self.get_event_property_query()
+
+ return self.get_event_query()
+
+ def to_actors_query(self) -> ast.SelectQuery | ast.SelectUnionQuery:
+ assert self.correlation_actors_query is not None
+
+ if self.query.funnelCorrelationType == FunnelCorrelationResultsType.properties:
+ # Filtering on persons / groups properties can be pushed down to funnel events query
+ if (
+ self.correlation_actors_query.funnelCorrelationPropertyValues
+ and len(self.correlation_actors_query.funnelCorrelationPropertyValues) > 0
+ ):
+ self.context.query.properties = [
+ *(self.context.query.properties or []),
+ *self.correlation_actors_query.funnelCorrelationPropertyValues,
+ ]
+ return self.properties_actor_query()
+ else:
+ return self.events_actor_query()
+
+ def events_actor_query(self) -> ast.SelectQuery | ast.SelectUnionQuery:
+ assert self.correlation_actors_query is not None
+
+ if not self.correlation_actors_query.funnelCorrelationPersonEntity:
+ raise ValidationError("No entity for persons specified")
+
+ assert isinstance(self.correlation_actors_query.funnelCorrelationPersonEntity, EventsNode)
+
+ target_step = self.context.max_steps
+ target_event = self.correlation_actors_query.funnelCorrelationPersonEntity.event
+ funnel_step_names = self._get_funnel_step_names()
+ funnel_persons_query = self.get_funnel_actors_cte()
+ funnel_event_query = FunnelEventQuery(context=self.context)
+ date_from = funnel_event_query._date_range().date_from_as_hogql()
+ date_to = funnel_event_query._date_range().date_to_as_hogql()
+
+ properties = self.correlation_actors_query.funnelCorrelationPersonEntity.properties
+ prop_query = None
+ if properties is not None and properties != []:
+ prop_query = property_to_expr(properties, self.team)
+
+ conversion_filter = (
+ f'AND funnel_actors.steps {"=" if self.correlation_actors_query.funnelCorrelationPersonConverted else "<>"} target_step'
+ if self.correlation_actors_query.funnelCorrelationPersonConverted is not None
+ else ""
+ )
+
+ event_join_query = self._get_events_join_query()
+
+ recording_event_select_statement = (
+ ", any(funnel_actors.matching_events) AS matching_events" if self.actors_query.includeRecordings else ""
+ )
+
+ query = parse_select(
+ f"""
+ WITH
+ funnel_actors as (
+ {{funnel_persons_query}}
+ ),
+ {{date_from}} AS date_from,
+ {{date_to}} AS date_to,
+ {target_step} AS target_step,
+ {funnel_step_names} AS funnel_step_names
+ SELECT
+ funnel_actors.actor_id AS actor_id
+ {recording_event_select_statement}
+ FROM events AS event
+ {event_join_query}
+ AND event.event = '{target_event}'
+ {conversion_filter}
+ GROUP BY actor_id
+ ORDER BY actor_id
+ """,
+ placeholders={
+ "funnel_persons_query": funnel_persons_query,
+ "date_from": date_from,
+ "date_to": date_to,
+ },
+ )
+
+ if prop_query:
+ assert isinstance(query, ast.SelectQuery)
+ assert isinstance(query.where, ast.And)
+ query.where.exprs = [*query.where.exprs, prop_query]
+
+ return query
+
+ def properties_actor_query(
+ self,
+ ) -> ast.SelectQuery | ast.SelectUnionQuery:
+ assert self.correlation_actors_query is not None
+
+ if not self.correlation_actors_query.funnelCorrelationPropertyValues:
+ raise ValidationError("Property Correlation expects atleast one Property to get persons for")
+
+ target_step = self.context.max_steps
+ funnel_persons_query = self.get_funnel_actors_cte()
+
+ conversion_filter = (
+ f'funnel_actors.steps {"=" if self.correlation_actors_query.funnelCorrelationPersonConverted else "<>"} target_step'
+ if self.correlation_actors_query.funnelCorrelationPersonConverted is not None
+ else ""
+ )
+
+ recording_event_select_statement = (
+ ", any(funnel_actors.matching_events) AS matching_events" if self.actors_query.includeRecordings else ""
+ )
+
+ query = parse_select(
+ f"""
+ WITH
+ funnel_actors as (
+ {{funnel_persons_query}}
+ ),
+ {target_step} AS target_step
+ SELECT
+ funnel_actors.actor_id AS actor_id
+ {recording_event_select_statement}
+ FROM funnel_actors
+ WHERE {conversion_filter}
+ GROUP BY funnel_actors.actor_id
+ ORDER BY funnel_actors.actor_id
+ """,
+ placeholders={"funnel_persons_query": funnel_persons_query},
+ )
+
+ return query
+
+ def get_event_query(self) -> ast.SelectQuery | ast.SelectUnionQuery:
+ funnel_persons_query = self.get_funnel_actors_cte()
+ event_join_query = self._get_events_join_query()
+ target_step = self.context.max_steps
+ funnel_step_names = self._get_funnel_step_names()
+ funnel_event_query = FunnelEventQuery(context=self.context)
+ date_from = funnel_event_query._date_range().date_from_as_hogql()
+ date_to = funnel_event_query._date_range().date_to_as_hogql()
+
+ event_correlation_query = parse_select(
+ f"""
+ WITH
+ funnel_actors AS (
+ {{funnel_persons_query}}
+ ),
+ {{date_from}} AS date_from,
+ {{date_to}} AS date_to,
+ {target_step} AS target_step,
+ {funnel_step_names} AS funnel_step_names
+
+ SELECT
+ event.event AS name,
+
+ -- If we have a `person.steps = target_step`, we know the person
+ -- reached the end of the funnel
+ countDistinctIf(
+ funnel_actors.actor_id,
+ funnel_actors.steps = target_step
+ ) AS success_count,
+
+ -- And the converse being for failures
+ countDistinctIf(
+ funnel_actors.actor_id,
+ funnel_actors.steps <> target_step
+ ) AS failure_count
+
+ FROM events AS event
+ {event_join_query}
+ AND event.event NOT IN {self.query.funnelCorrelationExcludeEventNames or []}
+ GROUP BY name
+
+ -- To get the total success/failure numbers, we do an aggregation on
+ -- the funnel people CTE and count distinct actor_ids
+ UNION ALL
+
+ -- :HACKY: HogQL does not have access to a CTE in the second union query, thus
+ -- we're repeating the CTE here. This likely is a big hit on query performance.
+ WITH
+ funnel_actors AS (
+ {{funnel_persons_query}}
+ ),
+ {target_step} AS target_step
+
+ SELECT
+ -- We're not using WITH TOTALS because the resulting queries are
+ -- not runnable in Metabase
+ '{self.TOTAL_IDENTIFIER}' as name,
+
+ countDistinctIf(
+ funnel_actors.actor_id,
+ funnel_actors.steps = target_step
+ ) AS success_count,
+
+ countDistinctIf(
+ funnel_actors.actor_id,
+ funnel_actors.steps <> target_step
+ ) AS failure_count
+ FROM funnel_actors
+ """,
+ placeholders={
+ "funnel_persons_query": funnel_persons_query,
+ "date_from": date_from,
+ "date_to": date_to,
+ },
+ )
+
+ return event_correlation_query
+
+ def get_event_property_query(self) -> ast.SelectQuery | ast.SelectUnionQuery:
+ if not self.query.funnelCorrelationEventNames:
+ raise ValidationError("Event Property Correlation expects atleast one event name to run correlation on")
+
+ funnel_persons_query = self.get_funnel_actors_cte()
+ event_join_query = self._get_events_join_query()
+ target_step = self.context.max_steps
+ funnel_step_names = self._get_funnel_step_names()
+ funnel_event_query = FunnelEventQuery(context=self.context)
+ date_from = funnel_event_query._date_range().date_from_as_hogql()
+ date_to = funnel_event_query._date_range().date_to_as_hogql()
+ event_names = self.query.funnelCorrelationEventNames
+ exclude_property_names = self.query.funnelCorrelationEventExcludePropertyNames or []
+
+ if self.support_autocapture_elements():
+ event_type_expression, _ = get_property_string_expr(
+ "events",
+ self.AUTOCAPTURE_EVENT_TYPE,
+ f"'{self.AUTOCAPTURE_EVENT_TYPE}'",
+ "properties",
+ allow_denormalized_props=False,
+ )
+ array_join_query = f"""
+ 'elements_chain' as prop_key,
+ concat({event_type_expression}, '{self.ELEMENTS_DIVIDER}', elements_chain) as prop_value,
+ tuple(prop_key, prop_value) as prop
+ """
+ else:
+ array_join_query = f"""
+ arrayJoin(JSONExtractKeysAndValues(properties, 'String')) as prop
+ """
+
+ query = parse_select(
+ f"""
+ WITH
+ funnel_actors AS (
+ {{funnel_persons_query}}
+ ),
+ {{date_from}} AS date_from,
+ {{date_to}} AS date_to,
+ {target_step} AS target_step,
+ {funnel_step_names} AS funnel_step_names
+
+ SELECT concat(event_name, '::', prop.1, '::', prop.2) as name,
+ countDistinctIf(actor_id, steps = target_step) as success_count,
+ countDistinctIf(actor_id, steps <> target_step) as failure_count
+ FROM (
+ SELECT
+ funnel_actors.actor_id as actor_id,
+ funnel_actors.steps as steps,
+ event.event as event_name,
+ -- Same as what we do in $all property queries
+ {array_join_query}
+ FROM events AS event
+ {event_join_query}
+ AND event.event IN {event_names}
+ )
+ GROUP BY name
+ -- Discard high cardinality / low hits properties
+ -- This removes the long tail of random properties with empty, null, or very small values
+ HAVING (success_count + failure_count) > 2
+ AND prop.1 NOT IN {exclude_property_names}
+
+ UNION ALL
+ -- To get the total success/failure numbers, we do an aggregation on
+ -- the funnel people CTE and count distinct actor_ids
+
+ -- :HACKY: HogQL does not have access to a CTE in the second union query, thus
+ -- we're repeating the CTE here. This likely is a big hit on query performance.
+ WITH
+ funnel_actors AS (
+ {{funnel_persons_query}}
+ ),
+ {target_step} AS target_step
+
+ SELECT
+ '{self.TOTAL_IDENTIFIER}' as name,
+
+ countDistinctIf(
+ funnel_actors.actor_id,
+ funnel_actors.steps = target_step
+ ) AS success_count,
+
+ countDistinctIf(
+ funnel_actors.actor_id,
+ funnel_actors.steps <> target_step
+ ) AS failure_count
+ FROM funnel_actors
+ """,
+ placeholders={
+ "funnel_persons_query": funnel_persons_query,
+ "date_from": date_from,
+ "date_to": date_to,
+ },
+ )
+
+ return query
+
+ def get_properties_query(self) -> ast.SelectQuery | ast.SelectUnionQuery:
+ if not self.query.funnelCorrelationNames:
+ raise ValidationError("Property Correlation expects atleast one Property to run correlation on")
+
+ funnel_persons_query = self.get_funnel_actors_cte()
+ target_step = self.context.max_steps
+ exclude_property_names = self.query.funnelCorrelationExcludeNames or []
+
+ person_prop_query = self._get_properties_prop_clause()
+ aggregation_join_query = self._get_aggregation_join_query()
+
+ query = parse_select(
+ f"""
+ WITH
+ funnel_actors AS (
+ {{funnel_persons_query}}
+ ),
+ {target_step} AS target_step
+ SELECT
+ concat(prop.1, '::', prop.2) as name,
+ -- We generate a unique identifier for each property value as: PropertyName::Value
+ countDistinctIf(actor_id, steps = target_step) AS success_count,
+ countDistinctIf(actor_id, steps <> target_step) AS failure_count
+ FROM (
+ SELECT
+ actor_id,
+ funnel_actors.steps as steps,
+ /*
+ We can extract multiple property values at the same time, since we're
+ already querying the person table.
+ This gives us something like:
+ --------------------
+ person1, steps, [property_value_0, property_value_1, property_value_2]
+ person2, steps, [property_value_0, property_value_1, property_value_2]
+
+ To group by property name, we need to extract the property from the array. ArrayJoin helps us do that.
+ It transforms the above into:
+
+ --------------------
+
+ person1, steps, property_value_0
+ person1, steps, property_value_1
+ person1, steps, property_value_2
+
+ person2, steps, property_value_0
+ person2, steps, property_value_1
+ person2, steps, property_value_2
+
+ To avoid clashes and clarify the values, we also zip with the property name, to generate
+ tuples like: (property_name, property_value), which we then group by
+ */
+ {person_prop_query}
+ FROM funnel_actors
+ {aggregation_join_query}
+
+ ) aggregation_target_with_props
+ -- Group by the tuple items: (property_name, property_value) generated by zip
+ GROUP BY prop.1, prop.2
+ HAVING prop.1 NOT IN {exclude_property_names}
+
+ UNION ALL
+
+ -- :HACKY: HogQL does not have access to a CTE in the second union query, thus
+ -- we're repeating the CTE here. This likely is a big hit on query performance.
+ WITH
+ funnel_actors AS (
+ {{funnel_persons_query}}
+ ),
+ {target_step} AS target_step
+
+ SELECT
+ '{self.TOTAL_IDENTIFIER}' as name,
+ countDistinctIf(actor_id, steps = target_step) AS success_count,
+ countDistinctIf(actor_id, steps <> target_step) AS failure_count
+ FROM funnel_actors
+ """,
+ placeholders={
+ "funnel_persons_query": funnel_persons_query,
+ },
+ )
+
+ return query
+
+ def get_funnel_actors_cte(self) -> ast.SelectQuery:
+ extra_fields = ["steps", "final_timestamp", "first_timestamp"]
+
+ for prop in self.properties_to_include:
+ extra_fields.append(prop)
+
+ return self._funnel_actors_generator.actor_query(extra_fields=extra_fields)
+
+ def _get_events_join_query(self) -> str:
+ """
+ This query is used to join and filter the events table corresponding to the funnel_actors CTE.
+ It expects the following variables to be present in the CTE expression:
+ - funnel_actors
+ - date_to
+ - date_from
+ - funnel_step_names
+ """
+ windowInterval = self.context.funnelWindowInterval
+ windowIntervalUnit = funnel_window_interval_unit_to_sql(self.context.funnelWindowIntervalUnit)
+
+ return f"""
+ {self._get_aggregation_target_join_query()}
+
+ -- Make sure we're only looking at events before the final step, or
+ -- failing that, date_to
+ WHERE
+ -- add this condition in to ensure we can filter events before
+ -- joining funnel_actors
+ toTimeZone(toDateTime(event.timestamp), 'UTC') >= date_from
+ AND toTimeZone(toDateTime(event.timestamp), 'UTC') < date_to
+
+ AND event.team_id = {self.context.team.pk}
+
+ -- Add in per actor filtering on event time range. We just want
+ -- to include events that happened within the bounds of the
+ -- actors time in the funnel.
+ AND toTimeZone(toDateTime(event.timestamp), 'UTC') > funnel_actors.first_timestamp
+ AND toTimeZone(toDateTime(event.timestamp), 'UTC') < coalesce(
+ funnel_actors.final_timestamp,
+ funnel_actors.first_timestamp + INTERVAL {windowInterval} {windowIntervalUnit},
+ date_to)
+ -- Ensure that the event is not outside the bounds of the funnel conversion window
+
+ -- Exclude funnel steps
+ AND event.event NOT IN funnel_step_names
+ """
+
+ def _get_aggregation_target_join_query(self) -> str:
+ aggregation_person_join = f"""
+ JOIN funnel_actors
+ ON event.person_id = funnel_actors.actor_id
+ """
+
+ aggregation_group_join = f"""
+ JOIN funnel_actors
+ ON funnel_actors.actor_id = event.$group_{self.funnels_query.aggregation_group_type_index}
+ """
+
+ return (
+ aggregation_group_join
+ if self.funnels_query.aggregation_group_type_index is not None
+ else aggregation_person_join
+ )
+
+ def _get_aggregation_join_query(self):
+ if self.funnels_query.aggregation_group_type_index is None:
+ return f"JOIN (SELECT id, properties as person_props FROM persons) persons ON persons.id = funnel_actors.actor_id"
+ else:
+ group_type_index = self.funnels_query.aggregation_group_type_index
+ return f"""
+ LEFT JOIN (
+ SELECT
+ key,
+ properties --AS group_properties_{group_type_index}
+ FROM groups
+ WHERE index = {group_type_index}
+ ) groups_{group_type_index}
+ ON funnel_actors.actor_id == groups_{group_type_index}.key
+ """
+
+ def _get_properties_prop_clause(self):
+ assert self.query.funnelCorrelationNames is not None
+
+ if self.funnels_query.aggregation_group_type_index is None:
+ properties_prefix = "person_props"
+ else:
+ properties_prefix = f"groups_{self.funnels_query.aggregation_group_type_index}.properties"
+ if "$all" in self.query.funnelCorrelationNames:
+ return f"arrayJoin(JSONExtractKeysAndValues({properties_prefix}, 'String')) as prop"
+ else:
+ props = [
+ f"JSONExtractString({properties_prefix}, '{property_name}')"
+ for property_name in self.query.funnelCorrelationNames
+ ]
+ props_str = ", ".join(props)
+ return f"arrayJoin(arrayZip({self.query.funnelCorrelationNames}, [{props_str}])) as prop"
+
+ def _get_funnel_step_names(self) -> List[str]:
+ events: Set[str] = set()
+ for entity in self.funnels_query.series:
+ if isinstance(entity, ActionsNode):
+ action = Action.objects.get(pk=int(entity.id), team=self.context.team)
+ events.update(action.get_step_events())
+ elif isinstance(entity, EventsNode):
+ if entity.event is not None:
+ events.add(entity.event)
+ else:
+ raise ValidationError("Data warehouse nodes are not supported here")
+
+ return sorted(list(events))
+
+ @property
+ def properties_to_include(self) -> List[str]:
+ props_to_include: List[str] = []
+ # TODO: implement or remove
+ # if self.query.funnelCorrelationType == FunnelCorrelationResultsType.properties:
+ # assert self.query.funnelCorrelationNames is not None
+
+ # # When dealing with properties, make sure funnel response comes with properties
+ # # so we don't have to join on persons/groups to get these properties again
+
+ # for property_name in self.query.funnelCorrelationNames:
+ # if self.funnels_query.aggregation_group_type_index is not None:
+ # if "$all" == property_name:
+ # return []
+ # else:
+ # if "$all" == property_name:
+ # return []
+
+ return props_to_include
+
+ def support_autocapture_elements(self) -> bool:
+ if (
+ self.query.funnelCorrelationType == FunnelCorrelationResultsType.event_with_properties
+ and AUTOCAPTURE_EVENT in (self.query.funnelCorrelationEventNames or [])
+ ):
+ return True
+ return False
+
+ @staticmethod
+ def are_results_insignificant(event_contingency_table: EventContingencyTable) -> bool:
+ """
+ Check if the results are insignificant, i.e. if the success/failure counts are
+ significantly different from the total counts
+ """
+
+ total_count = event_contingency_table.success_total + event_contingency_table.failure_total
+
+ if event_contingency_table.visited.success_count + event_contingency_table.visited.failure_count < min(
+ FunnelCorrelationQueryRunner.MIN_PERSON_COUNT,
+ FunnelCorrelationQueryRunner.MIN_PERSON_PERCENTAGE * total_count,
+ ):
+ return True
+
+ return False
+
+
+def get_entity_odds_ratio(event_contingency_table: EventContingencyTable, prior_counts: int) -> EventOddsRatio:
+ # Add 1 to all values to prevent divide by zero errors, and introduce a [prior](https://en.wikipedia.org/wiki/Prior_probability)
+ odds_ratio = (
+ (event_contingency_table.visited.success_count + prior_counts)
+ * (event_contingency_table.failure_total - event_contingency_table.visited.failure_count + prior_counts)
+ ) / (
+ (event_contingency_table.success_total - event_contingency_table.visited.success_count + prior_counts)
+ * (event_contingency_table.visited.failure_count + prior_counts)
+ )
+
+ return EventOddsRatio(
+ event=event_contingency_table.event,
+ success_count=event_contingency_table.visited.success_count,
+ failure_count=event_contingency_table.visited.failure_count,
+ odds_ratio=odds_ratio,
+ correlation_type="success" if odds_ratio > 1 else "failure",
+ )
diff --git a/posthog/hogql_queries/insights/funnels/funnel_event_query.py b/posthog/hogql_queries/insights/funnels/funnel_event_query.py
index ebf4dad02c555..f2d0e115e2d0b 100644
--- a/posthog/hogql_queries/insights/funnels/funnel_event_query.py
+++ b/posthog/hogql_queries/insights/funnels/funnel_event_query.py
@@ -93,7 +93,7 @@ def _sample_expr(self) -> ast.SampleExpr | None:
else:
return ast.SampleExpr(sample_value=ast.RatioExpr(left=ast.Constant(value=query.samplingFactor)))
- def _date_range_expr(self) -> ast.Expr:
+ def _date_range(self) -> QueryDateRange:
team, query, now = self.context.team, self.context.query, self.context.now
date_range = QueryDateRange(
@@ -103,17 +103,20 @@ def _date_range_expr(self) -> ast.Expr:
now=now,
)
+ return date_range
+
+ def _date_range_expr(self) -> ast.Expr:
return ast.And(
exprs=[
ast.CompareOperation(
op=ast.CompareOperationOp.GtEq,
left=ast.Field(chain=[self.EVENT_TABLE_ALIAS, "timestamp"]),
- right=ast.Constant(value=date_range.date_from()),
+ right=ast.Constant(value=self._date_range().date_from()),
),
ast.CompareOperation(
op=ast.CompareOperationOp.LtEq,
left=ast.Field(chain=[self.EVENT_TABLE_ALIAS, "timestamp"]),
- right=ast.Constant(value=date_range.date_to()),
+ right=ast.Constant(value=self._date_range().date_to()),
),
]
)
@@ -131,8 +134,11 @@ def _entity_expr(self, skip_entity_filter: bool) -> ast.Expr | None:
if isinstance(node, EventsNode) or isinstance(node, FunnelExclusionEventsNode):
events.add(node.event)
elif isinstance(node, ActionsNode) or isinstance(node, FunnelExclusionActionsNode):
- action = Action.objects.get(pk=int(node.id), team=team)
- events.update(action.get_step_events())
+ try:
+ action = Action.objects.get(pk=int(node.id), team=team)
+ events.update(action.get_step_events())
+ except Action.DoesNotExist:
+ raise ValidationError(f"Action ID {node.id} does not exist!")
else:
raise ValidationError("Series and exclusions must be compose of action and event nodes")
diff --git a/posthog/hogql_queries/insights/funnels/funnel_persons.py b/posthog/hogql_queries/insights/funnels/funnel_persons.py
index 070e4895ca9ac..68781c6bbd0c8 100644
--- a/posthog/hogql_queries/insights/funnels/funnel_persons.py
+++ b/posthog/hogql_queries/insights/funnels/funnel_persons.py
@@ -1,4 +1,4 @@
-from typing import List
+from typing import List, Optional
from posthog.hogql import ast
from posthog.hogql_queries.insights.funnels.funnel import Funnel
@@ -7,13 +7,13 @@
class FunnelActors(Funnel):
def actor_query(
self,
- # extra_fields: Optional[List[str]] = None,
+ extra_fields: Optional[List[str]] = None,
) -> ast.SelectQuery:
select: List[ast.Expr] = [
ast.Alias(alias="actor_id", expr=ast.Field(chain=["aggregation_target"])),
*self._get_funnel_person_step_events(),
*self._get_timestamp_outer_select(),
- # {extra_fields}
+ *([ast.Field(chain=[field]) for field in extra_fields or []]),
]
select_from = ast.JoinExpr(table=self.get_step_counts_query())
where = self._get_funnel_person_step_condition()
diff --git a/posthog/hogql_queries/insights/funnels/funnel_query_context.py b/posthog/hogql_queries/insights/funnels/funnel_query_context.py
index 9c5fa6686c3cc..66a0d28ad3d7f 100644
--- a/posthog/hogql_queries/insights/funnels/funnel_query_context.py
+++ b/posthog/hogql_queries/insights/funnels/funnel_query_context.py
@@ -34,6 +34,11 @@ class FunnelQueryContext(QueryContext):
actorsQuery: FunnelsActorsQuery | None
+ includeTimestamp: Optional[bool]
+ includePrecedingTimestamp: Optional[bool]
+ includeProperties: List[str]
+ includeFinalMatchingEvents: Optional[bool]
+
def __init__(
self,
query: FunnelsQuery,
@@ -41,6 +46,10 @@ def __init__(
timings: Optional[HogQLTimings] = None,
modifiers: Optional[HogQLQueryModifiers] = None,
limit_context: Optional[LimitContext] = None,
+ include_timestamp: Optional[bool] = None,
+ include_preceding_timestamp: Optional[bool] = None,
+ include_properties: Optional[List[str]] = None,
+ include_final_matching_events: Optional[bool] = None,
):
super().__init__(query=query, team=team, timings=timings, modifiers=modifiers, limit_context=limit_context)
@@ -59,6 +68,11 @@ def __init__(
self.funnelsFilter.funnelWindowIntervalUnit or FunnelConversionWindowTimeUnit.day
)
+ self.includeTimestamp = include_timestamp
+ self.includePrecedingTimestamp = include_preceding_timestamp
+ self.includeProperties = include_properties or []
+ self.includeFinalMatchingEvents = include_final_matching_events
+
# the API accepts either:
# a string (single breakdown) in parameter "breakdown"
# a list of numbers (one or more cohorts) in parameter "breakdown"
@@ -89,6 +103,8 @@ def __init__(
else:
self.breakdown = self.breakdownFilter.breakdown # type: ignore
+ self.actorsQuery = None
+
@cached_property
def max_steps(self) -> int:
return len(self.query.series)
diff --git a/posthog/hogql_queries/insights/funnels/funnel_strict.py b/posthog/hogql_queries/insights/funnels/funnel_strict.py
index 7da26bfaf1390..1bea66772a6f5 100644
--- a/posthog/hogql_queries/insights/funnels/funnel_strict.py
+++ b/posthog/hogql_queries/insights/funnels/funnel_strict.py
@@ -28,7 +28,7 @@ def get_step_counts_query(self):
max_steps = self.context.max_steps
breakdown_exprs = self._get_breakdown_prop_expr()
inner_timestamps, outer_timestamps = self._get_timestamp_selects()
- person_and_group_properties = self._get_person_and_group_properties()
+ person_and_group_properties = self._get_person_and_group_properties(aggregate=True)
group_by_columns: List[ast.Expr] = [
ast.Field(chain=["aggregation_target"]),
diff --git a/posthog/hogql_queries/insights/funnels/funnel_strict_persons.py b/posthog/hogql_queries/insights/funnels/funnel_strict_persons.py
index d457a50c93758..f55afbd218266 100644
--- a/posthog/hogql_queries/insights/funnels/funnel_strict_persons.py
+++ b/posthog/hogql_queries/insights/funnels/funnel_strict_persons.py
@@ -1,4 +1,4 @@
-from typing import List
+from typing import List, Optional
from posthog.hogql import ast
from posthog.hogql_queries.insights.funnels.funnel_strict import FunnelStrict
@@ -7,13 +7,13 @@
class FunnelStrictActors(FunnelStrict):
def actor_query(
self,
- # extra_fields: Optional[List[str]] = None,
+ extra_fields: Optional[List[str]] = None,
) -> ast.SelectQuery:
select: List[ast.Expr] = [
ast.Alias(alias="actor_id", expr=ast.Field(chain=["aggregation_target"])),
*self._get_funnel_person_step_events(),
*self._get_timestamp_outer_select(),
- # {extra_fields}
+ *([ast.Field(chain=[field]) for field in extra_fields or []]),
]
select_from = ast.JoinExpr(table=self.get_step_counts_query())
where = self._get_funnel_person_step_condition()
diff --git a/posthog/hogql_queries/insights/funnels/funnel_unordered.py b/posthog/hogql_queries/insights/funnels/funnel_unordered.py
index e0c7eba870f88..09fc322621eac 100644
--- a/posthog/hogql_queries/insights/funnels/funnel_unordered.py
+++ b/posthog/hogql_queries/insights/funnels/funnel_unordered.py
@@ -62,7 +62,7 @@ def get_step_counts_query(self):
max_steps = self.context.max_steps
breakdown_exprs = self._get_breakdown_prop_expr()
inner_timestamps, outer_timestamps = self._get_timestamp_selects()
- person_and_group_properties = self._get_person_and_group_properties()
+ person_and_group_properties = self._get_person_and_group_properties(aggregate=True)
group_by_columns: List[ast.Expr] = [
ast.Field(chain=["aggregation_target"]),
diff --git a/posthog/hogql_queries/insights/funnels/funnel_unordered_persons.py b/posthog/hogql_queries/insights/funnels/funnel_unordered_persons.py
index 2af375ab1f23d..a378f044b5d56 100644
--- a/posthog/hogql_queries/insights/funnels/funnel_unordered_persons.py
+++ b/posthog/hogql_queries/insights/funnels/funnel_unordered_persons.py
@@ -1,4 +1,4 @@
-from typing import List
+from typing import List, Optional
from posthog.hogql import ast
from posthog.hogql.parser import parse_expr
@@ -19,13 +19,13 @@ def _get_funnel_person_step_events(self) -> List[ast.Expr]:
def actor_query(
self,
- # extra_fields: Optional[List[str]] = None,
+ extra_fields: Optional[List[str]] = None,
) -> ast.SelectQuery:
select: List[ast.Expr] = [
ast.Alias(alias="actor_id", expr=ast.Field(chain=["aggregation_target"])),
*self._get_funnel_person_step_events(),
*self._get_timestamp_outer_select(),
- # {extra_fields}
+ *([ast.Field(chain=[field]) for field in extra_fields or []]),
]
select_from = ast.JoinExpr(table=self.get_step_counts_query())
where = self._get_funnel_person_step_condition()
diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlation.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlation.ambr
new file mode 100644
index 0000000000000..5019f0f57f7b7
--- /dev/null
+++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlation.ambr
@@ -0,0 +1,6814 @@
+# serializer version: 1
+# name: TestClickhouseFunnelCorrelation.test_action_events_are_excluded_from_correlations
+ '''
+ SELECT event.event AS name,
+ countIf(funnel_actors.actor_id, ifNull(equals(funnel_actors.steps, 2), 0)) AS success_count,
+ countIf(funnel_actors.actor_id, ifNull(notEquals(funnel_actors.steps, 2), 1)) AS failure_count
+ FROM events AS event
+ INNER JOIN
+ (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
+ person_distinct_id2.distinct_id AS distinct_id
+ FROM person_distinct_id2
+ WHERE equals(person_distinct_id2.team_id, 2)
+ GROUP BY person_distinct_id2.distinct_id
+ HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS event__pdi ON equals(event.distinct_id, event__pdi.distinct_id)
+ JOIN
+ (SELECT aggregation_target AS actor_id,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e__pdi.person_id AS aggregation_target,
+ if(and(equals(e.event, 'user signed up'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0)), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(and(equals(e.event, 'paid'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0)), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1
+ FROM events AS e
+ INNER JOIN
+ (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
+ person_distinct_id2.distinct_id AS distinct_id
+ FROM person_distinct_id2
+ WHERE equals(person_distinct_id2.team_id, 2)
+ GROUP BY person_distinct_id2.distinct_id
+ HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(event__pdi.person_id, funnel_actors.actor_id)
+ WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), notIn(event.event, []))
+ GROUP BY name
+ LIMIT 100
+ UNION ALL
+ SELECT 'Total_Values_In_Query' AS name,
+ countIf(funnel_actors.actor_id, ifNull(equals(funnel_actors.steps, 2), 0)) AS success_count,
+ countIf(funnel_actors.actor_id, ifNull(notEquals(funnel_actors.steps, 2), 1)) AS failure_count
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e__pdi.person_id AS aggregation_target,
+ if(and(equals(e.event, 'user signed up'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0)), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(and(equals(e.event, 'paid'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0)), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1
+ FROM events AS e
+ INNER JOIN
+ (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
+ person_distinct_id2.distinct_id AS distinct_id
+ FROM person_distinct_id2
+ WHERE equals(person_distinct_id2.team_id, 2)
+ GROUP BY person_distinct_id2.distinct_id
+ HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ LIMIT 100 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_basic_funnel_correlation_with_properties
+ '''
+ SELECT concat(ifNull(toString((aggregation_target_with_props.prop).1), ''), '::', ifNull(toString((aggregation_target_with_props.prop).2), '')) AS name,
+ countIf(aggregation_target_with_props.actor_id, ifNull(equals(aggregation_target_with_props.steps, 2), 0)) AS success_count,
+ countIf(aggregation_target_with_props.actor_id, ifNull(notEquals(aggregation_target_with_props.steps, 2), 1)) AS failure_count
+ FROM
+ (SELECT funnel_actors.actor_id AS actor_id,
+ funnel_actors.steps AS steps,
+ arrayJoin(arrayZip(['$browser'], [JSONExtractString(persons.person_props, '$browser')])) AS prop
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e__pdi.person_id AS aggregation_target,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1
+ FROM events AS e
+ INNER JOIN
+ (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
+ person_distinct_id2.distinct_id AS distinct_id
+ FROM person_distinct_id2
+ WHERE equals(person_distinct_id2.team_id, 2)
+ GROUP BY person_distinct_id2.distinct_id
+ HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ JOIN
+ (SELECT persons.id AS id,
+ persons.properties AS person_props
+ FROM
+ (SELECT person.id AS id,
+ person.properties AS properties
+ FROM person
+ WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version),
+ (SELECT person.id AS id, max(person.version) AS version
+ FROM person
+ WHERE equals(person.team_id, 2)
+ GROUP BY person.id
+ HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons) AS persons ON equals(persons.id, funnel_actors.actor_id)) AS aggregation_target_with_props
+ GROUP BY (aggregation_target_with_props.prop).1, (aggregation_target_with_props.prop).2
+ HAVING ifNull(notIn((aggregation_target_with_props.prop).1, []), 0)
+ LIMIT 100
+ UNION ALL
+ SELECT 'Total_Values_In_Query' AS name,
+ countIf(funnel_actors.actor_id, ifNull(equals(funnel_actors.steps, 2), 0)) AS success_count,
+ countIf(funnel_actors.actor_id, ifNull(notEquals(funnel_actors.steps, 2), 1)) AS failure_count
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e__pdi.person_id AS aggregation_target,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1
+ FROM events AS e
+ INNER JOIN
+ (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
+ person_distinct_id2.distinct_id AS distinct_id
+ FROM person_distinct_id2
+ WHERE equals(person_distinct_id2.team_id, 2)
+ GROUP BY person_distinct_id2.distinct_id
+ HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ LIMIT 100 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_basic_funnel_correlation_with_properties.1
+ '''
+ SELECT persons.id,
+ persons.id AS id,
+ source.matching_events AS matching_events
+ FROM
+ (SELECT person.id AS id
+ FROM person
+ WHERE equals(person.team_id, 2)
+ GROUP BY person.id
+ HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons
+ INNER JOIN
+ (SELECT funnel_actors.actor_id AS actor_id,
+ any(funnel_actors.matching_events) AS matching_events
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ final_matching_events AS matching_events,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ groupArray(10)(step_0_matching_event) AS step_0_matching_events,
+ groupArray(10)(step_1_matching_event) AS step_1_matching_events,
+ groupArray(10)(final_matching_event) AS final_matching_events,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ last_value(uuid_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
+ last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
+ last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e__pdi.person_id AS aggregation_target,
+ e.uuid AS uuid,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
+ if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
+ if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
+ if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
+ if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
+ FROM events AS e
+ INNER JOIN
+ (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
+ argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id,
+ person_distinct_id2.distinct_id AS distinct_id
+ FROM person_distinct_id2
+ WHERE equals(person_distinct_id2.team_id, 2)
+ GROUP BY person_distinct_id2.distinct_id
+ HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
+ INNER JOIN
+ (SELECT person.id AS id,
+ replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$browser'), ''), 'null'), '^"|"$', '') AS `properties___$browser`
+ FROM person
+ WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version),
+ (SELECT person.id AS id, max(person.version) AS version
+ FROM person
+ WHERE equals(person.team_id, 2)
+ GROUP BY person.id
+ HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id)
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__pdi__person.`properties___$browser`, 'Positive'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ WHERE ifNull(equals(funnel_actors.steps, 2), 0)
+ GROUP BY funnel_actors.actor_id
+ ORDER BY funnel_actors.actor_id ASC) AS source ON equals(persons.id, source.actor_id)
+ ORDER BY persons.id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_basic_funnel_correlation_with_properties.2
+ '''
+ SELECT DISTINCT session_replay_events.session_id AS session_id
+ FROM
+ (SELECT session_replay_events.session_id AS session_id
+ FROM session_replay_events
+ WHERE equals(session_replay_events.team_id, 2)
+ GROUP BY session_replay_events.session_id) AS session_replay_events
+ WHERE ifNull(in(session_replay_events.session_id, ['']), 0)
+ LIMIT 100 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_basic_funnel_correlation_with_properties.3
+ '''
+ SELECT persons.id,
+ persons.id AS id,
+ source.matching_events AS matching_events
+ FROM
+ (SELECT person.id AS id
+ FROM person
+ WHERE equals(person.team_id, 2)
+ GROUP BY person.id
+ HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons
+ INNER JOIN
+ (SELECT funnel_actors.actor_id AS actor_id,
+ any(funnel_actors.matching_events) AS matching_events
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ final_matching_events AS matching_events,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ groupArray(10)(step_0_matching_event) AS step_0_matching_events,
+ groupArray(10)(step_1_matching_event) AS step_1_matching_events,
+ groupArray(10)(final_matching_event) AS final_matching_events,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ last_value(uuid_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
+ last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
+ last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e__pdi.person_id AS aggregation_target,
+ e.uuid AS uuid,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
+ if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
+ if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
+ if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
+ if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
+ FROM events AS e
+ INNER JOIN
+ (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
+ argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id,
+ person_distinct_id2.distinct_id AS distinct_id
+ FROM person_distinct_id2
+ WHERE equals(person_distinct_id2.team_id, 2)
+ GROUP BY person_distinct_id2.distinct_id
+ HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
+ INNER JOIN
+ (SELECT person.id AS id,
+ replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$browser'), ''), 'null'), '^"|"$', '') AS `properties___$browser`
+ FROM person
+ WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version),
+ (SELECT person.id AS id, max(person.version) AS version
+ FROM person
+ WHERE equals(person.team_id, 2)
+ GROUP BY person.id
+ HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id)
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__pdi__person.`properties___$browser`, 'Positive'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ WHERE ifNull(notEquals(funnel_actors.steps, 2), 1)
+ GROUP BY funnel_actors.actor_id
+ ORDER BY funnel_actors.actor_id ASC) AS source ON equals(persons.id, source.actor_id)
+ ORDER BY persons.id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_basic_funnel_correlation_with_properties.4
+ '''
+ SELECT DISTINCT session_replay_events.session_id AS session_id
+ FROM
+ (SELECT session_replay_events.session_id AS session_id
+ FROM session_replay_events
+ WHERE equals(session_replay_events.team_id, 2)
+ GROUP BY session_replay_events.session_id) AS session_replay_events
+ WHERE ifNull(in(session_replay_events.session_id, ['']), 0)
+ LIMIT 100 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_basic_funnel_correlation_with_properties.5
+ '''
+ SELECT persons.id,
+ persons.id AS id,
+ source.matching_events AS matching_events
+ FROM
+ (SELECT person.id AS id
+ FROM person
+ WHERE equals(person.team_id, 2)
+ GROUP BY person.id
+ HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons
+ INNER JOIN
+ (SELECT funnel_actors.actor_id AS actor_id,
+ any(funnel_actors.matching_events) AS matching_events
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ final_matching_events AS matching_events,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ groupArray(10)(step_0_matching_event) AS step_0_matching_events,
+ groupArray(10)(step_1_matching_event) AS step_1_matching_events,
+ groupArray(10)(final_matching_event) AS final_matching_events,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ last_value(uuid_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
+ last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
+ last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e__pdi.person_id AS aggregation_target,
+ e.uuid AS uuid,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
+ if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
+ if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
+ if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
+ if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
+ FROM events AS e
+ INNER JOIN
+ (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
+ argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id,
+ person_distinct_id2.distinct_id AS distinct_id
+ FROM person_distinct_id2
+ WHERE equals(person_distinct_id2.team_id, 2)
+ GROUP BY person_distinct_id2.distinct_id
+ HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
+ INNER JOIN
+ (SELECT person.id AS id,
+ replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$browser'), ''), 'null'), '^"|"$', '') AS `properties___$browser`
+ FROM person
+ WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version),
+ (SELECT person.id AS id, max(person.version) AS version
+ FROM person
+ WHERE equals(person.team_id, 2)
+ GROUP BY person.id
+ HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id)
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__pdi__person.`properties___$browser`, 'Negative'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ WHERE ifNull(equals(funnel_actors.steps, 2), 0)
+ GROUP BY funnel_actors.actor_id
+ ORDER BY funnel_actors.actor_id ASC) AS source ON equals(persons.id, source.actor_id)
+ ORDER BY persons.id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_basic_funnel_correlation_with_properties.6
+ '''
+ SELECT DISTINCT session_replay_events.session_id AS session_id
+ FROM
+ (SELECT session_replay_events.session_id AS session_id
+ FROM session_replay_events
+ WHERE equals(session_replay_events.team_id, 2)
+ GROUP BY session_replay_events.session_id) AS session_replay_events
+ WHERE ifNull(in(session_replay_events.session_id, ['']), 0)
+ LIMIT 100 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_basic_funnel_correlation_with_properties.7
+ '''
+ SELECT persons.id,
+ persons.id AS id,
+ source.matching_events AS matching_events
+ FROM
+ (SELECT person.id AS id
+ FROM person
+ WHERE equals(person.team_id, 2)
+ GROUP BY person.id
+ HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons
+ INNER JOIN
+ (SELECT funnel_actors.actor_id AS actor_id,
+ any(funnel_actors.matching_events) AS matching_events
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ final_matching_events AS matching_events,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ groupArray(10)(step_0_matching_event) AS step_0_matching_events,
+ groupArray(10)(step_1_matching_event) AS step_1_matching_events,
+ groupArray(10)(final_matching_event) AS final_matching_events,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ last_value(uuid_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
+ last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
+ last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e__pdi.person_id AS aggregation_target,
+ e.uuid AS uuid,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
+ if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
+ if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
+ if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
+ if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
+ FROM events AS e
+ INNER JOIN
+ (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
+ argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id,
+ person_distinct_id2.distinct_id AS distinct_id
+ FROM person_distinct_id2
+ WHERE equals(person_distinct_id2.team_id, 2)
+ GROUP BY person_distinct_id2.distinct_id
+ HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
+ INNER JOIN
+ (SELECT person.id AS id,
+ replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$browser'), ''), 'null'), '^"|"$', '') AS `properties___$browser`
+ FROM person
+ WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version),
+ (SELECT person.id AS id, max(person.version) AS version
+ FROM person
+ WHERE equals(person.team_id, 2)
+ GROUP BY person.id
+ HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id)
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__pdi__person.`properties___$browser`, 'Negative'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ WHERE ifNull(notEquals(funnel_actors.steps, 2), 1)
+ GROUP BY funnel_actors.actor_id
+ ORDER BY funnel_actors.actor_id ASC) AS source ON equals(persons.id, source.actor_id)
+ ORDER BY persons.id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_basic_funnel_correlation_with_properties.8
+ '''
+ SELECT DISTINCT session_replay_events.session_id AS session_id
+ FROM
+ (SELECT session_replay_events.session_id AS session_id
+ FROM session_replay_events
+ WHERE equals(session_replay_events.team_id, 2)
+ GROUP BY session_replay_events.session_id) AS session_replay_events
+ WHERE ifNull(in(session_replay_events.session_id, ['']), 0)
+ LIMIT 100 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_event_properties_and_groups
+ '''
+ SELECT concat(ifNull(toString(event_name), ''), '::', ifNull(toString((prop).1), ''), '::', ifNull(toString((prop).2), '')) AS name,
+ countIf(actor_id, ifNull(equals(steps, 2), 0)) AS success_count,
+ countIf(actor_id, ifNull(notEquals(steps, 2), 1)) AS failure_count
+ FROM
+ (SELECT funnel_actors.actor_id AS actor_id,
+ funnel_actors.steps AS steps,
+ event.event AS event_name,
+ arrayJoin(JSONExtractKeysAndValues(event.properties, 'String')) AS prop
+ FROM events AS event
+ JOIN
+ (SELECT aggregation_target AS actor_id,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_1` AS aggregation_target,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_1`)
+ WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), in(event.event, ['positively_related', 'negatively_related'])))
+ GROUP BY name
+ HAVING and(ifNull(greater(plus(success_count, failure_count), 2), 0), ifNull(notIn((prop).1, []), 0))
+ LIMIT 100
+ UNION ALL
+ SELECT 'Total_Values_In_Query' AS name,
+ countIf(funnel_actors.actor_id, ifNull(equals(funnel_actors.steps, 2), 0)) AS success_count,
+ countIf(funnel_actors.actor_id, ifNull(notEquals(funnel_actors.steps, 2), 1)) AS failure_count
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_1` AS aggregation_target,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ LIMIT 100 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_event_properties_and_groups_materialized
+ '''
+ SELECT concat(ifNull(toString(event_name), ''), '::', ifNull(toString((prop).1), ''), '::', ifNull(toString((prop).2), '')) AS name,
+ countIf(actor_id, ifNull(equals(steps, 2), 0)) AS success_count,
+ countIf(actor_id, ifNull(notEquals(steps, 2), 1)) AS failure_count
+ FROM
+ (SELECT funnel_actors.actor_id AS actor_id,
+ funnel_actors.steps AS steps,
+ event.event AS event_name,
+ arrayJoin(JSONExtractKeysAndValues(event.properties, 'String')) AS prop
+ FROM events AS event
+ JOIN
+ (SELECT aggregation_target AS actor_id,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_1` AS aggregation_target,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_1`)
+ WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), in(event.event, ['positively_related', 'negatively_related'])))
+ GROUP BY name
+ HAVING and(ifNull(greater(plus(success_count, failure_count), 2), 0), ifNull(notIn((prop).1, []), 0))
+ LIMIT 100
+ UNION ALL
+ SELECT 'Total_Values_In_Query' AS name,
+ countIf(funnel_actors.actor_id, ifNull(equals(funnel_actors.steps, 2), 0)) AS success_count,
+ countIf(funnel_actors.actor_id, ifNull(notEquals(funnel_actors.steps, 2), 1)) AS failure_count
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_1` AS aggregation_target,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ LIMIT 100 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups
+ '''
+ SELECT event.event AS name,
+ countIf(funnel_actors.actor_id, ifNull(equals(funnel_actors.steps, 2), 0)) AS success_count,
+ countIf(funnel_actors.actor_id, ifNull(notEquals(funnel_actors.steps, 2), 1)) AS failure_count
+ FROM events AS event
+ JOIN
+ (SELECT aggregation_target AS actor_id,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`)
+ WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), notIn(event.event, []))
+ GROUP BY name
+ LIMIT 100
+ UNION ALL
+ SELECT 'Total_Values_In_Query' AS name,
+ countIf(funnel_actors.actor_id, ifNull(equals(funnel_actors.steps, 2), 0)) AS success_count,
+ countIf(funnel_actors.actor_id, ifNull(notEquals(funnel_actors.steps, 2), 1)) AS failure_count
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ LIMIT 100 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups.1
+ '''
+ SELECT source.actor_id AS actor_id
+ FROM
+ (SELECT groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ INNER JOIN
+ (SELECT funnel_actors.actor_id AS actor_id,
+ any(funnel_actors.matching_events) AS matching_events
+ FROM events AS event
+ JOIN
+ (SELECT aggregation_target AS actor_id,
+ final_matching_events AS matching_events,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ groupArray(10)(step_0_matching_event) AS step_0_matching_events,
+ groupArray(10)(step_1_matching_event) AS step_1_matching_events,
+ groupArray(10)(final_matching_event) AS final_matching_events,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ last_value(uuid_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
+ last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
+ last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ e.uuid AS uuid,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
+ if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
+ if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
+ if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
+ if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`)
+ WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'positively_related'), ifNull(equals(funnel_actors.steps, 2), 0))
+ GROUP BY actor_id
+ ORDER BY actor_id ASC) AS source ON equals(groups.key, source.actor_id)
+ ORDER BY source.actor_id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups.2
+ '''
+ SELECT source.actor_id AS actor_id
+ FROM
+ (SELECT groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ INNER JOIN
+ (SELECT funnel_actors.actor_id AS actor_id,
+ any(funnel_actors.matching_events) AS matching_events
+ FROM events AS event
+ JOIN
+ (SELECT aggregation_target AS actor_id,
+ final_matching_events AS matching_events,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ groupArray(10)(step_0_matching_event) AS step_0_matching_events,
+ groupArray(10)(step_1_matching_event) AS step_1_matching_events,
+ groupArray(10)(final_matching_event) AS final_matching_events,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ last_value(uuid_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
+ last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
+ last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ e.uuid AS uuid,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
+ if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
+ if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
+ if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
+ if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`)
+ WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'positively_related'), ifNull(notEquals(funnel_actors.steps, 2), 1))
+ GROUP BY actor_id
+ ORDER BY actor_id ASC) AS source ON equals(groups.key, source.actor_id)
+ ORDER BY source.actor_id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups.3
+ '''
+ SELECT source.actor_id AS actor_id
+ FROM
+ (SELECT groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ INNER JOIN
+ (SELECT funnel_actors.actor_id AS actor_id,
+ any(funnel_actors.matching_events) AS matching_events
+ FROM events AS event
+ JOIN
+ (SELECT aggregation_target AS actor_id,
+ final_matching_events AS matching_events,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ groupArray(10)(step_0_matching_event) AS step_0_matching_events,
+ groupArray(10)(step_1_matching_event) AS step_1_matching_events,
+ groupArray(10)(final_matching_event) AS final_matching_events,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ last_value(uuid_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
+ last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
+ last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ e.uuid AS uuid,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
+ if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
+ if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
+ if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
+ if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`)
+ WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'negatively_related'), ifNull(equals(funnel_actors.steps, 2), 0))
+ GROUP BY actor_id
+ ORDER BY actor_id ASC) AS source ON equals(groups.key, source.actor_id)
+ ORDER BY source.actor_id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups.4
+ '''
+ SELECT source.actor_id AS actor_id
+ FROM
+ (SELECT groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ INNER JOIN
+ (SELECT funnel_actors.actor_id AS actor_id,
+ any(funnel_actors.matching_events) AS matching_events
+ FROM events AS event
+ JOIN
+ (SELECT aggregation_target AS actor_id,
+ final_matching_events AS matching_events,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ groupArray(10)(step_0_matching_event) AS step_0_matching_events,
+ groupArray(10)(step_1_matching_event) AS step_1_matching_events,
+ groupArray(10)(final_matching_event) AS final_matching_events,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ last_value(uuid_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
+ last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
+ last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ e.uuid AS uuid,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
+ if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
+ if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
+ if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
+ if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`)
+ WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'negatively_related'), ifNull(notEquals(funnel_actors.steps, 2), 1))
+ GROUP BY actor_id
+ ORDER BY actor_id ASC) AS source ON equals(groups.key, source.actor_id)
+ ORDER BY source.actor_id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups.5
+ '''
+ SELECT event.event AS name,
+ countIf(funnel_actors.actor_id, ifNull(equals(funnel_actors.steps, 2), 0)) AS success_count,
+ countIf(funnel_actors.actor_id, ifNull(notEquals(funnel_actors.steps, 2), 1)) AS failure_count
+ FROM events AS event
+ JOIN
+ (SELECT aggregation_target AS actor_id,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1
+ FROM events AS e
+ LEFT JOIN
+ (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0))
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'finance'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`)
+ WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), notIn(event.event, []))
+ GROUP BY name
+ LIMIT 100
+ UNION ALL
+ SELECT 'Total_Values_In_Query' AS name,
+ countIf(funnel_actors.actor_id, ifNull(equals(funnel_actors.steps, 2), 0)) AS success_count,
+ countIf(funnel_actors.actor_id, ifNull(notEquals(funnel_actors.steps, 2), 1)) AS failure_count
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1
+ FROM events AS e
+ LEFT JOIN
+ (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0))
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'finance'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ LIMIT 100 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups.6
+ '''
+ SELECT source.actor_id AS actor_id
+ FROM
+ (SELECT groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ INNER JOIN
+ (SELECT funnel_actors.actor_id AS actor_id,
+ any(funnel_actors.matching_events) AS matching_events
+ FROM events AS event
+ JOIN
+ (SELECT aggregation_target AS actor_id,
+ final_matching_events AS matching_events,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ groupArray(10)(step_0_matching_event) AS step_0_matching_events,
+ groupArray(10)(step_1_matching_event) AS step_1_matching_events,
+ groupArray(10)(final_matching_event) AS final_matching_events,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ last_value(uuid_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
+ last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
+ last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ e.uuid AS uuid,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
+ if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
+ if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
+ if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
+ if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`)
+ WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'negatively_related'), ifNull(equals(funnel_actors.steps, 2), 0))
+ GROUP BY actor_id
+ ORDER BY actor_id ASC) AS source ON equals(groups.key, source.actor_id)
+ ORDER BY source.actor_id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups.7
+ '''
+ SELECT source.actor_id AS actor_id
+ FROM
+ (SELECT groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ INNER JOIN
+ (SELECT funnel_actors.actor_id AS actor_id,
+ any(funnel_actors.matching_events) AS matching_events
+ FROM events AS event
+ JOIN
+ (SELECT aggregation_target AS actor_id,
+ final_matching_events AS matching_events,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ groupArray(10)(step_0_matching_event) AS step_0_matching_events,
+ groupArray(10)(step_1_matching_event) AS step_1_matching_events,
+ groupArray(10)(final_matching_event) AS final_matching_events,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ last_value(uuid_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
+ last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
+ last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ e.uuid AS uuid,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
+ if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
+ if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
+ if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
+ if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`)
+ WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'negatively_related'), ifNull(notEquals(funnel_actors.steps, 2), 1))
+ GROUP BY actor_id
+ ORDER BY actor_id ASC) AS source ON equals(groups.key, source.actor_id)
+ ORDER BY source.actor_id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups_poe_v2
+ '''
+ SELECT event.event AS name,
+ countIf(funnel_actors.actor_id, ifNull(equals(funnel_actors.steps, 2), 0)) AS success_count,
+ countIf(funnel_actors.actor_id, ifNull(notEquals(funnel_actors.steps, 2), 1)) AS failure_count
+ FROM events AS event
+ JOIN
+ (SELECT aggregation_target AS actor_id,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`)
+ WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), notIn(event.event, []))
+ GROUP BY name
+ LIMIT 100
+ UNION ALL
+ SELECT 'Total_Values_In_Query' AS name,
+ countIf(funnel_actors.actor_id, ifNull(equals(funnel_actors.steps, 2), 0)) AS success_count,
+ countIf(funnel_actors.actor_id, ifNull(notEquals(funnel_actors.steps, 2), 1)) AS failure_count
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ LIMIT 100 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups_poe_v2.1
+ '''
+ SELECT source.actor_id AS actor_id
+ FROM
+ (SELECT groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ INNER JOIN
+ (SELECT funnel_actors.actor_id AS actor_id,
+ any(funnel_actors.matching_events) AS matching_events
+ FROM events AS event
+ JOIN
+ (SELECT aggregation_target AS actor_id,
+ final_matching_events AS matching_events,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ groupArray(10)(step_0_matching_event) AS step_0_matching_events,
+ groupArray(10)(step_1_matching_event) AS step_1_matching_events,
+ groupArray(10)(final_matching_event) AS final_matching_events,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ last_value(uuid_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
+ last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
+ last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ e.uuid AS uuid,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
+ if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
+ if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
+ if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
+ if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`)
+ WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'positively_related'), ifNull(equals(funnel_actors.steps, 2), 0))
+ GROUP BY actor_id
+ ORDER BY actor_id ASC) AS source ON equals(groups.key, source.actor_id)
+ ORDER BY source.actor_id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups_poe_v2.2
+ '''
+ SELECT source.actor_id AS actor_id
+ FROM
+ (SELECT groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ INNER JOIN
+ (SELECT funnel_actors.actor_id AS actor_id,
+ any(funnel_actors.matching_events) AS matching_events
+ FROM events AS event
+ JOIN
+ (SELECT aggregation_target AS actor_id,
+ final_matching_events AS matching_events,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ groupArray(10)(step_0_matching_event) AS step_0_matching_events,
+ groupArray(10)(step_1_matching_event) AS step_1_matching_events,
+ groupArray(10)(final_matching_event) AS final_matching_events,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ last_value(uuid_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
+ last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
+ last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ e.uuid AS uuid,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
+ if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
+ if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
+ if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
+ if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`)
+ WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'positively_related'), ifNull(notEquals(funnel_actors.steps, 2), 1))
+ GROUP BY actor_id
+ ORDER BY actor_id ASC) AS source ON equals(groups.key, source.actor_id)
+ ORDER BY source.actor_id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups_poe_v2.3
+ '''
+ SELECT source.actor_id AS actor_id
+ FROM
+ (SELECT groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ INNER JOIN
+ (SELECT funnel_actors.actor_id AS actor_id,
+ any(funnel_actors.matching_events) AS matching_events
+ FROM events AS event
+ JOIN
+ (SELECT aggregation_target AS actor_id,
+ final_matching_events AS matching_events,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ groupArray(10)(step_0_matching_event) AS step_0_matching_events,
+ groupArray(10)(step_1_matching_event) AS step_1_matching_events,
+ groupArray(10)(final_matching_event) AS final_matching_events,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ last_value(uuid_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
+ last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
+ last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ e.uuid AS uuid,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
+ if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
+ if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
+ if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
+ if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`)
+ WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'negatively_related'), ifNull(equals(funnel_actors.steps, 2), 0))
+ GROUP BY actor_id
+ ORDER BY actor_id ASC) AS source ON equals(groups.key, source.actor_id)
+ ORDER BY source.actor_id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups_poe_v2.4
+ '''
+ SELECT source.actor_id AS actor_id
+ FROM
+ (SELECT groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ INNER JOIN
+ (SELECT funnel_actors.actor_id AS actor_id,
+ any(funnel_actors.matching_events) AS matching_events
+ FROM events AS event
+ JOIN
+ (SELECT aggregation_target AS actor_id,
+ final_matching_events AS matching_events,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ groupArray(10)(step_0_matching_event) AS step_0_matching_events,
+ groupArray(10)(step_1_matching_event) AS step_1_matching_events,
+ groupArray(10)(final_matching_event) AS final_matching_events,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ last_value(uuid_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
+ last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
+ last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ e.uuid AS uuid,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
+ if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
+ if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
+ if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
+ if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`)
+ WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'negatively_related'), ifNull(notEquals(funnel_actors.steps, 2), 1))
+ GROUP BY actor_id
+ ORDER BY actor_id ASC) AS source ON equals(groups.key, source.actor_id)
+ ORDER BY source.actor_id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups_poe_v2.5
+ '''
+ SELECT event.event AS name,
+ countIf(funnel_actors.actor_id, ifNull(equals(funnel_actors.steps, 2), 0)) AS success_count,
+ countIf(funnel_actors.actor_id, ifNull(notEquals(funnel_actors.steps, 2), 1)) AS failure_count
+ FROM events AS event
+ JOIN
+ (SELECT aggregation_target AS actor_id,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1
+ FROM events AS e
+ LEFT JOIN
+ (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0))
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'finance'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`)
+ WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), notIn(event.event, []))
+ GROUP BY name
+ LIMIT 100
+ UNION ALL
+ SELECT 'Total_Values_In_Query' AS name,
+ countIf(funnel_actors.actor_id, ifNull(equals(funnel_actors.steps, 2), 0)) AS success_count,
+ countIf(funnel_actors.actor_id, ifNull(notEquals(funnel_actors.steps, 2), 1)) AS failure_count
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1
+ FROM events AS e
+ LEFT JOIN
+ (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0))
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'finance'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ LIMIT 100 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups_poe_v2.6
+ '''
+ SELECT source.actor_id AS actor_id
+ FROM
+ (SELECT groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ INNER JOIN
+ (SELECT funnel_actors.actor_id AS actor_id,
+ any(funnel_actors.matching_events) AS matching_events
+ FROM events AS event
+ JOIN
+ (SELECT aggregation_target AS actor_id,
+ final_matching_events AS matching_events,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ groupArray(10)(step_0_matching_event) AS step_0_matching_events,
+ groupArray(10)(step_1_matching_event) AS step_1_matching_events,
+ groupArray(10)(final_matching_event) AS final_matching_events,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ last_value(uuid_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
+ last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
+ last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ e.uuid AS uuid,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
+ if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
+ if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
+ if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
+ if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`)
+ WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'negatively_related'), ifNull(equals(funnel_actors.steps, 2), 0))
+ GROUP BY actor_id
+ ORDER BY actor_id ASC) AS source ON equals(groups.key, source.actor_id)
+ ORDER BY source.actor_id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_events_and_groups_poe_v2.7
+ '''
+ SELECT source.actor_id AS actor_id
+ FROM
+ (SELECT groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ INNER JOIN
+ (SELECT funnel_actors.actor_id AS actor_id,
+ any(funnel_actors.matching_events) AS matching_events
+ FROM events AS event
+ JOIN
+ (SELECT aggregation_target AS actor_id,
+ final_matching_events AS matching_events,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ groupArray(10)(step_0_matching_event) AS step_0_matching_events,
+ groupArray(10)(step_1_matching_event) AS step_1_matching_events,
+ groupArray(10)(final_matching_event) AS final_matching_events,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ last_value(uuid_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
+ last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
+ last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ e.uuid AS uuid,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
+ if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
+ if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
+ if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
+ if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(funnel_actors.actor_id, event.`$group_0`)
+ WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-14 23:59:59', 6, 'UTC')))), notIn(event.event, ['paid', 'user signed up']), equals(event.event, 'negatively_related'), ifNull(notEquals(funnel_actors.steps, 2), 1))
+ GROUP BY actor_id
+ ORDER BY actor_id ASC) AS source ON equals(groups.key, source.actor_id)
+ ORDER BY source.actor_id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups
+ '''
+ SELECT concat(ifNull(toString((aggregation_target_with_props.prop).1), ''), '::', ifNull(toString((aggregation_target_with_props.prop).2), '')) AS name,
+ countIf(aggregation_target_with_props.actor_id, ifNull(equals(aggregation_target_with_props.steps, 2), 0)) AS success_count,
+ countIf(aggregation_target_with_props.actor_id, ifNull(notEquals(aggregation_target_with_props.steps, 2), 1)) AS failure_count
+ FROM
+ (SELECT funnel_actors.actor_id AS actor_id,
+ funnel_actors.steps AS steps,
+ arrayJoin(arrayZip(['industry'], [JSONExtractString(groups_0.properties, 'industry')])) AS prop
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ LEFT JOIN
+ (SELECT groups.key AS key,
+ groups.properties AS properties
+ FROM
+ (SELECT argMax(groups.group_properties, groups._timestamp) AS properties,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ WHERE ifNull(equals(groups.index, 0), 0)) AS groups_0 ON equals(funnel_actors.actor_id, groups_0.key)) AS aggregation_target_with_props
+ GROUP BY (aggregation_target_with_props.prop).1, (aggregation_target_with_props.prop).2
+ HAVING ifNull(notIn((aggregation_target_with_props.prop).1, []), 0)
+ LIMIT 100
+ UNION ALL
+ SELECT 'Total_Values_In_Query' AS name,
+ countIf(funnel_actors.actor_id, ifNull(equals(funnel_actors.steps, 2), 0)) AS success_count,
+ countIf(funnel_actors.actor_id, ifNull(notEquals(funnel_actors.steps, 2), 1)) AS failure_count
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ LIMIT 100 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups.1
+ '''
+ SELECT source.actor_id AS actor_id
+ FROM
+ (SELECT groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ INNER JOIN
+ (SELECT funnel_actors.actor_id AS actor_id,
+ any(funnel_actors.matching_events) AS matching_events
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ final_matching_events AS matching_events,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ groupArray(10)(step_0_matching_event) AS step_0_matching_events,
+ groupArray(10)(step_1_matching_event) AS step_1_matching_events,
+ groupArray(10)(final_matching_event) AS final_matching_events,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ last_value(uuid_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
+ last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
+ last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ e.uuid AS uuid,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
+ if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
+ if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
+ if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
+ if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
+ FROM events AS e
+ LEFT JOIN
+ (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0))
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'positive'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ WHERE ifNull(equals(funnel_actors.steps, 2), 0)
+ GROUP BY funnel_actors.actor_id
+ ORDER BY funnel_actors.actor_id ASC) AS source ON equals(groups.key, source.actor_id)
+ ORDER BY source.actor_id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups.2
+ '''
+ SELECT source.actor_id AS actor_id
+ FROM
+ (SELECT groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ INNER JOIN
+ (SELECT funnel_actors.actor_id AS actor_id,
+ any(funnel_actors.matching_events) AS matching_events
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ final_matching_events AS matching_events,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ groupArray(10)(step_0_matching_event) AS step_0_matching_events,
+ groupArray(10)(step_1_matching_event) AS step_1_matching_events,
+ groupArray(10)(final_matching_event) AS final_matching_events,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ last_value(uuid_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
+ last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
+ last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ e.uuid AS uuid,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
+ if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
+ if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
+ if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
+ if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
+ FROM events AS e
+ LEFT JOIN
+ (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0))
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'positive'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ WHERE ifNull(notEquals(funnel_actors.steps, 2), 1)
+ GROUP BY funnel_actors.actor_id
+ ORDER BY funnel_actors.actor_id ASC) AS source ON equals(groups.key, source.actor_id)
+ ORDER BY source.actor_id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups.3
+ '''
+ SELECT source.actor_id AS actor_id
+ FROM
+ (SELECT groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ INNER JOIN
+ (SELECT funnel_actors.actor_id AS actor_id,
+ any(funnel_actors.matching_events) AS matching_events
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ final_matching_events AS matching_events,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ groupArray(10)(step_0_matching_event) AS step_0_matching_events,
+ groupArray(10)(step_1_matching_event) AS step_1_matching_events,
+ groupArray(10)(final_matching_event) AS final_matching_events,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ last_value(uuid_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
+ last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
+ last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ e.uuid AS uuid,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
+ if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
+ if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
+ if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
+ if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
+ FROM events AS e
+ LEFT JOIN
+ (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0))
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'negative'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ WHERE ifNull(equals(funnel_actors.steps, 2), 0)
+ GROUP BY funnel_actors.actor_id
+ ORDER BY funnel_actors.actor_id ASC) AS source ON equals(groups.key, source.actor_id)
+ ORDER BY source.actor_id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups.4
+ '''
+ SELECT source.actor_id AS actor_id
+ FROM
+ (SELECT groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ INNER JOIN
+ (SELECT funnel_actors.actor_id AS actor_id,
+ any(funnel_actors.matching_events) AS matching_events
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ final_matching_events AS matching_events,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ groupArray(10)(step_0_matching_event) AS step_0_matching_events,
+ groupArray(10)(step_1_matching_event) AS step_1_matching_events,
+ groupArray(10)(final_matching_event) AS final_matching_events,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ last_value(uuid_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
+ last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
+ last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ e.uuid AS uuid,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
+ if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
+ if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
+ if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
+ if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
+ FROM events AS e
+ LEFT JOIN
+ (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0))
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'negative'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ WHERE ifNull(notEquals(funnel_actors.steps, 2), 1)
+ GROUP BY funnel_actors.actor_id
+ ORDER BY funnel_actors.actor_id ASC) AS source ON equals(groups.key, source.actor_id)
+ ORDER BY source.actor_id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups.5
+ '''
+ SELECT concat(ifNull(toString((aggregation_target_with_props.prop).1), ''), '::', ifNull(toString((aggregation_target_with_props.prop).2), '')) AS name,
+ countIf(aggregation_target_with_props.actor_id, ifNull(equals(aggregation_target_with_props.steps, 2), 0)) AS success_count,
+ countIf(aggregation_target_with_props.actor_id, ifNull(notEquals(aggregation_target_with_props.steps, 2), 1)) AS failure_count
+ FROM
+ (SELECT funnel_actors.actor_id AS actor_id,
+ funnel_actors.steps AS steps,
+ arrayJoin(JSONExtractKeysAndValues(groups_0.properties, 'String')) AS prop
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ LEFT JOIN
+ (SELECT groups.key AS key,
+ groups.properties AS properties
+ FROM
+ (SELECT argMax(groups.group_properties, groups._timestamp) AS properties,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ WHERE ifNull(equals(groups.index, 0), 0)) AS groups_0 ON equals(funnel_actors.actor_id, groups_0.key)) AS aggregation_target_with_props
+ GROUP BY (aggregation_target_with_props.prop).1, (aggregation_target_with_props.prop).2
+ HAVING ifNull(notIn((aggregation_target_with_props.prop).1, []), 0)
+ LIMIT 100
+ UNION ALL
+ SELECT 'Total_Values_In_Query' AS name,
+ countIf(funnel_actors.actor_id, ifNull(equals(funnel_actors.steps, 2), 0)) AS success_count,
+ countIf(funnel_actors.actor_id, ifNull(notEquals(funnel_actors.steps, 2), 1)) AS failure_count
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ LIMIT 100 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_materialized
+ '''
+ SELECT concat(ifNull(toString((aggregation_target_with_props.prop).1), ''), '::', ifNull(toString((aggregation_target_with_props.prop).2), '')) AS name,
+ countIf(aggregation_target_with_props.actor_id, ifNull(equals(aggregation_target_with_props.steps, 2), 0)) AS success_count,
+ countIf(aggregation_target_with_props.actor_id, ifNull(notEquals(aggregation_target_with_props.steps, 2), 1)) AS failure_count
+ FROM
+ (SELECT funnel_actors.actor_id AS actor_id,
+ funnel_actors.steps AS steps,
+ arrayJoin(arrayZip(['industry'], [JSONExtractString(groups_0.properties, 'industry')])) AS prop
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ LEFT JOIN
+ (SELECT groups.key AS key,
+ groups.properties AS properties
+ FROM
+ (SELECT argMax(groups.group_properties, groups._timestamp) AS properties,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ WHERE ifNull(equals(groups.index, 0), 0)) AS groups_0 ON equals(funnel_actors.actor_id, groups_0.key)) AS aggregation_target_with_props
+ GROUP BY (aggregation_target_with_props.prop).1, (aggregation_target_with_props.prop).2
+ HAVING ifNull(notIn((aggregation_target_with_props.prop).1, []), 0)
+ LIMIT 100
+ UNION ALL
+ SELECT 'Total_Values_In_Query' AS name,
+ countIf(funnel_actors.actor_id, ifNull(equals(funnel_actors.steps, 2), 0)) AS success_count,
+ countIf(funnel_actors.actor_id, ifNull(notEquals(funnel_actors.steps, 2), 1)) AS failure_count
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ LIMIT 100 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_materialized.1
+ '''
+ SELECT source.actor_id AS actor_id
+ FROM
+ (SELECT groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ INNER JOIN
+ (SELECT funnel_actors.actor_id AS actor_id,
+ any(funnel_actors.matching_events) AS matching_events
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ final_matching_events AS matching_events,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ groupArray(10)(step_0_matching_event) AS step_0_matching_events,
+ groupArray(10)(step_1_matching_event) AS step_1_matching_events,
+ groupArray(10)(final_matching_event) AS final_matching_events,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ last_value(uuid_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
+ last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
+ last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ e.uuid AS uuid,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
+ if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
+ if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
+ if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
+ if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
+ FROM events AS e
+ LEFT JOIN
+ (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0))
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'positive'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ WHERE ifNull(equals(funnel_actors.steps, 2), 0)
+ GROUP BY funnel_actors.actor_id
+ ORDER BY funnel_actors.actor_id ASC) AS source ON equals(groups.key, source.actor_id)
+ ORDER BY source.actor_id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_materialized.2
+ '''
+ SELECT source.actor_id AS actor_id
+ FROM
+ (SELECT groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ INNER JOIN
+ (SELECT funnel_actors.actor_id AS actor_id,
+ any(funnel_actors.matching_events) AS matching_events
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ final_matching_events AS matching_events,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ groupArray(10)(step_0_matching_event) AS step_0_matching_events,
+ groupArray(10)(step_1_matching_event) AS step_1_matching_events,
+ groupArray(10)(final_matching_event) AS final_matching_events,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ last_value(uuid_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
+ last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
+ last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ e.uuid AS uuid,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
+ if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
+ if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
+ if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
+ if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
+ FROM events AS e
+ LEFT JOIN
+ (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0))
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'positive'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ WHERE ifNull(notEquals(funnel_actors.steps, 2), 1)
+ GROUP BY funnel_actors.actor_id
+ ORDER BY funnel_actors.actor_id ASC) AS source ON equals(groups.key, source.actor_id)
+ ORDER BY source.actor_id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_materialized.3
+ '''
+ SELECT source.actor_id AS actor_id
+ FROM
+ (SELECT groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ INNER JOIN
+ (SELECT funnel_actors.actor_id AS actor_id,
+ any(funnel_actors.matching_events) AS matching_events
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ final_matching_events AS matching_events,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ groupArray(10)(step_0_matching_event) AS step_0_matching_events,
+ groupArray(10)(step_1_matching_event) AS step_1_matching_events,
+ groupArray(10)(final_matching_event) AS final_matching_events,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ last_value(uuid_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
+ last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
+ last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ e.uuid AS uuid,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
+ if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
+ if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
+ if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
+ if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
+ FROM events AS e
+ LEFT JOIN
+ (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0))
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'negative'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ WHERE ifNull(equals(funnel_actors.steps, 2), 0)
+ GROUP BY funnel_actors.actor_id
+ ORDER BY funnel_actors.actor_id ASC) AS source ON equals(groups.key, source.actor_id)
+ ORDER BY source.actor_id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_materialized.4
+ '''
+ SELECT source.actor_id AS actor_id
+ FROM
+ (SELECT groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ INNER JOIN
+ (SELECT funnel_actors.actor_id AS actor_id,
+ any(funnel_actors.matching_events) AS matching_events
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ final_matching_events AS matching_events,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ groupArray(10)(step_0_matching_event) AS step_0_matching_events,
+ groupArray(10)(step_1_matching_event) AS step_1_matching_events,
+ groupArray(10)(final_matching_event) AS final_matching_events,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ last_value(uuid_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
+ last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
+ last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ e.uuid AS uuid,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
+ if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
+ if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
+ if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
+ if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
+ FROM events AS e
+ LEFT JOIN
+ (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0))
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'negative'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ WHERE ifNull(notEquals(funnel_actors.steps, 2), 1)
+ GROUP BY funnel_actors.actor_id
+ ORDER BY funnel_actors.actor_id ASC) AS source ON equals(groups.key, source.actor_id)
+ ORDER BY source.actor_id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_materialized.5
+ '''
+ SELECT concat(ifNull(toString((aggregation_target_with_props.prop).1), ''), '::', ifNull(toString((aggregation_target_with_props.prop).2), '')) AS name,
+ countIf(aggregation_target_with_props.actor_id, ifNull(equals(aggregation_target_with_props.steps, 2), 0)) AS success_count,
+ countIf(aggregation_target_with_props.actor_id, ifNull(notEquals(aggregation_target_with_props.steps, 2), 1)) AS failure_count
+ FROM
+ (SELECT funnel_actors.actor_id AS actor_id,
+ funnel_actors.steps AS steps,
+ arrayJoin(JSONExtractKeysAndValues(groups_0.properties, 'String')) AS prop
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ LEFT JOIN
+ (SELECT groups.key AS key,
+ groups.properties AS properties
+ FROM
+ (SELECT argMax(groups.group_properties, groups._timestamp) AS properties,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ WHERE ifNull(equals(groups.index, 0), 0)) AS groups_0 ON equals(funnel_actors.actor_id, groups_0.key)) AS aggregation_target_with_props
+ GROUP BY (aggregation_target_with_props.prop).1, (aggregation_target_with_props.prop).2
+ HAVING ifNull(notIn((aggregation_target_with_props.prop).1, []), 0)
+ LIMIT 100
+ UNION ALL
+ SELECT 'Total_Values_In_Query' AS name,
+ countIf(funnel_actors.actor_id, ifNull(equals(funnel_actors.steps, 2), 0)) AS success_count,
+ countIf(funnel_actors.actor_id, ifNull(notEquals(funnel_actors.steps, 2), 1)) AS failure_count
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ LIMIT 100 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events
+ '''
+ SELECT concat(ifNull(toString((aggregation_target_with_props.prop).1), ''), '::', ifNull(toString((aggregation_target_with_props.prop).2), '')) AS name,
+ countIf(aggregation_target_with_props.actor_id, ifNull(equals(aggregation_target_with_props.steps, 2), 0)) AS success_count,
+ countIf(aggregation_target_with_props.actor_id, ifNull(notEquals(aggregation_target_with_props.steps, 2), 1)) AS failure_count
+ FROM
+ (SELECT funnel_actors.actor_id AS actor_id,
+ funnel_actors.steps AS steps,
+ arrayJoin(arrayZip(['industry'], [JSONExtractString(groups_0.properties, 'industry')])) AS prop
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ LEFT JOIN
+ (SELECT groups.key AS key,
+ groups.properties AS properties
+ FROM
+ (SELECT argMax(groups.group_properties, groups._timestamp) AS properties,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ WHERE ifNull(equals(groups.index, 0), 0)) AS groups_0 ON equals(funnel_actors.actor_id, groups_0.key)) AS aggregation_target_with_props
+ GROUP BY (aggregation_target_with_props.prop).1, (aggregation_target_with_props.prop).2
+ HAVING ifNull(notIn((aggregation_target_with_props.prop).1, []), 0)
+ LIMIT 100
+ UNION ALL
+ SELECT 'Total_Values_In_Query' AS name,
+ countIf(funnel_actors.actor_id, ifNull(equals(funnel_actors.steps, 2), 0)) AS success_count,
+ countIf(funnel_actors.actor_id, ifNull(notEquals(funnel_actors.steps, 2), 1)) AS failure_count
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ LIMIT 100 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events.1
+ '''
+ SELECT source.actor_id AS actor_id
+ FROM
+ (SELECT groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ INNER JOIN
+ (SELECT funnel_actors.actor_id AS actor_id,
+ any(funnel_actors.matching_events) AS matching_events
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ final_matching_events AS matching_events,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ groupArray(10)(step_0_matching_event) AS step_0_matching_events,
+ groupArray(10)(step_1_matching_event) AS step_1_matching_events,
+ groupArray(10)(final_matching_event) AS final_matching_events,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ last_value(uuid_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
+ last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
+ last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ e.uuid AS uuid,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
+ if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
+ if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
+ if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
+ if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
+ FROM events AS e
+ LEFT JOIN
+ (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0))
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'positive'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ WHERE ifNull(equals(funnel_actors.steps, 2), 0)
+ GROUP BY funnel_actors.actor_id
+ ORDER BY funnel_actors.actor_id ASC) AS source ON equals(groups.key, source.actor_id)
+ ORDER BY source.actor_id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events.2
+ '''
+ SELECT source.actor_id AS actor_id
+ FROM
+ (SELECT groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ INNER JOIN
+ (SELECT funnel_actors.actor_id AS actor_id,
+ any(funnel_actors.matching_events) AS matching_events
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ final_matching_events AS matching_events,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ groupArray(10)(step_0_matching_event) AS step_0_matching_events,
+ groupArray(10)(step_1_matching_event) AS step_1_matching_events,
+ groupArray(10)(final_matching_event) AS final_matching_events,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ last_value(uuid_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
+ last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
+ last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ e.uuid AS uuid,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
+ if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
+ if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
+ if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
+ if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
+ FROM events AS e
+ LEFT JOIN
+ (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0))
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'positive'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ WHERE ifNull(notEquals(funnel_actors.steps, 2), 1)
+ GROUP BY funnel_actors.actor_id
+ ORDER BY funnel_actors.actor_id ASC) AS source ON equals(groups.key, source.actor_id)
+ ORDER BY source.actor_id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events.3
+ '''
+ SELECT source.actor_id AS actor_id
+ FROM
+ (SELECT groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ INNER JOIN
+ (SELECT funnel_actors.actor_id AS actor_id,
+ any(funnel_actors.matching_events) AS matching_events
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ final_matching_events AS matching_events,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ groupArray(10)(step_0_matching_event) AS step_0_matching_events,
+ groupArray(10)(step_1_matching_event) AS step_1_matching_events,
+ groupArray(10)(final_matching_event) AS final_matching_events,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ last_value(uuid_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
+ last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
+ last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ e.uuid AS uuid,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
+ if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
+ if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
+ if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
+ if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
+ FROM events AS e
+ LEFT JOIN
+ (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0))
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'negative'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ WHERE ifNull(equals(funnel_actors.steps, 2), 0)
+ GROUP BY funnel_actors.actor_id
+ ORDER BY funnel_actors.actor_id ASC) AS source ON equals(groups.key, source.actor_id)
+ ORDER BY source.actor_id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events.4
+ '''
+ SELECT source.actor_id AS actor_id
+ FROM
+ (SELECT groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ INNER JOIN
+ (SELECT funnel_actors.actor_id AS actor_id,
+ any(funnel_actors.matching_events) AS matching_events
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ final_matching_events AS matching_events,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ groupArray(10)(step_0_matching_event) AS step_0_matching_events,
+ groupArray(10)(step_1_matching_event) AS step_1_matching_events,
+ groupArray(10)(final_matching_event) AS final_matching_events,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ last_value(uuid_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
+ last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
+ last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ e.uuid AS uuid,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
+ if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
+ if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
+ if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
+ if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
+ FROM events AS e
+ LEFT JOIN
+ (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0))
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'negative'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ WHERE ifNull(notEquals(funnel_actors.steps, 2), 1)
+ GROUP BY funnel_actors.actor_id
+ ORDER BY funnel_actors.actor_id ASC) AS source ON equals(groups.key, source.actor_id)
+ ORDER BY source.actor_id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events.5
+ '''
+ SELECT concat(ifNull(toString((aggregation_target_with_props.prop).1), ''), '::', ifNull(toString((aggregation_target_with_props.prop).2), '')) AS name,
+ countIf(aggregation_target_with_props.actor_id, ifNull(equals(aggregation_target_with_props.steps, 2), 0)) AS success_count,
+ countIf(aggregation_target_with_props.actor_id, ifNull(notEquals(aggregation_target_with_props.steps, 2), 1)) AS failure_count
+ FROM
+ (SELECT funnel_actors.actor_id AS actor_id,
+ funnel_actors.steps AS steps,
+ arrayJoin(JSONExtractKeysAndValues(groups_0.properties, 'String')) AS prop
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ LEFT JOIN
+ (SELECT groups.key AS key,
+ groups.properties AS properties
+ FROM
+ (SELECT argMax(groups.group_properties, groups._timestamp) AS properties,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ WHERE ifNull(equals(groups.index, 0), 0)) AS groups_0 ON equals(funnel_actors.actor_id, groups_0.key)) AS aggregation_target_with_props
+ GROUP BY (aggregation_target_with_props.prop).1, (aggregation_target_with_props.prop).2
+ HAVING ifNull(notIn((aggregation_target_with_props.prop).1, []), 0)
+ LIMIT 100
+ UNION ALL
+ SELECT 'Total_Values_In_Query' AS name,
+ countIf(funnel_actors.actor_id, ifNull(equals(funnel_actors.steps, 2), 0)) AS success_count,
+ countIf(funnel_actors.actor_id, ifNull(notEquals(funnel_actors.steps, 2), 1)) AS failure_count
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ LIMIT 100 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_materialized
+ '''
+ SELECT concat(ifNull(toString((aggregation_target_with_props.prop).1), ''), '::', ifNull(toString((aggregation_target_with_props.prop).2), '')) AS name,
+ countIf(aggregation_target_with_props.actor_id, ifNull(equals(aggregation_target_with_props.steps, 2), 0)) AS success_count,
+ countIf(aggregation_target_with_props.actor_id, ifNull(notEquals(aggregation_target_with_props.steps, 2), 1)) AS failure_count
+ FROM
+ (SELECT funnel_actors.actor_id AS actor_id,
+ funnel_actors.steps AS steps,
+ arrayJoin(arrayZip(['industry'], [JSONExtractString(groups_0.properties, 'industry')])) AS prop
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ LEFT JOIN
+ (SELECT groups.key AS key,
+ groups.properties AS properties
+ FROM
+ (SELECT argMax(groups.group_properties, groups._timestamp) AS properties,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ WHERE ifNull(equals(groups.index, 0), 0)) AS groups_0 ON equals(funnel_actors.actor_id, groups_0.key)) AS aggregation_target_with_props
+ GROUP BY (aggregation_target_with_props.prop).1, (aggregation_target_with_props.prop).2
+ HAVING ifNull(notIn((aggregation_target_with_props.prop).1, []), 0)
+ LIMIT 100
+ UNION ALL
+ SELECT 'Total_Values_In_Query' AS name,
+ countIf(funnel_actors.actor_id, ifNull(equals(funnel_actors.steps, 2), 0)) AS success_count,
+ countIf(funnel_actors.actor_id, ifNull(notEquals(funnel_actors.steps, 2), 1)) AS failure_count
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ LIMIT 100 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_materialized.1
+ '''
+ SELECT source.actor_id AS actor_id
+ FROM
+ (SELECT groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ INNER JOIN
+ (SELECT funnel_actors.actor_id AS actor_id,
+ any(funnel_actors.matching_events) AS matching_events
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ final_matching_events AS matching_events,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ groupArray(10)(step_0_matching_event) AS step_0_matching_events,
+ groupArray(10)(step_1_matching_event) AS step_1_matching_events,
+ groupArray(10)(final_matching_event) AS final_matching_events,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ last_value(uuid_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
+ last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
+ last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ e.uuid AS uuid,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
+ if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
+ if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
+ if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
+ if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
+ FROM events AS e
+ LEFT JOIN
+ (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0))
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'positive'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ WHERE ifNull(equals(funnel_actors.steps, 2), 0)
+ GROUP BY funnel_actors.actor_id
+ ORDER BY funnel_actors.actor_id ASC) AS source ON equals(groups.key, source.actor_id)
+ ORDER BY source.actor_id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_materialized.2
+ '''
+ SELECT source.actor_id AS actor_id
+ FROM
+ (SELECT groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ INNER JOIN
+ (SELECT funnel_actors.actor_id AS actor_id,
+ any(funnel_actors.matching_events) AS matching_events
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ final_matching_events AS matching_events,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ groupArray(10)(step_0_matching_event) AS step_0_matching_events,
+ groupArray(10)(step_1_matching_event) AS step_1_matching_events,
+ groupArray(10)(final_matching_event) AS final_matching_events,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ last_value(uuid_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
+ last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
+ last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ e.uuid AS uuid,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
+ if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
+ if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
+ if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
+ if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
+ FROM events AS e
+ LEFT JOIN
+ (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0))
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'positive'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ WHERE ifNull(notEquals(funnel_actors.steps, 2), 1)
+ GROUP BY funnel_actors.actor_id
+ ORDER BY funnel_actors.actor_id ASC) AS source ON equals(groups.key, source.actor_id)
+ ORDER BY source.actor_id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_materialized.3
+ '''
+ SELECT source.actor_id AS actor_id
+ FROM
+ (SELECT groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ INNER JOIN
+ (SELECT funnel_actors.actor_id AS actor_id,
+ any(funnel_actors.matching_events) AS matching_events
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ final_matching_events AS matching_events,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ groupArray(10)(step_0_matching_event) AS step_0_matching_events,
+ groupArray(10)(step_1_matching_event) AS step_1_matching_events,
+ groupArray(10)(final_matching_event) AS final_matching_events,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ last_value(uuid_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
+ last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
+ last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ e.uuid AS uuid,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
+ if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
+ if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
+ if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
+ if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
+ FROM events AS e
+ LEFT JOIN
+ (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0))
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'negative'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ WHERE ifNull(equals(funnel_actors.steps, 2), 0)
+ GROUP BY funnel_actors.actor_id
+ ORDER BY funnel_actors.actor_id ASC) AS source ON equals(groups.key, source.actor_id)
+ ORDER BY source.actor_id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_materialized.4
+ '''
+ SELECT source.actor_id AS actor_id
+ FROM
+ (SELECT groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ INNER JOIN
+ (SELECT funnel_actors.actor_id AS actor_id,
+ any(funnel_actors.matching_events) AS matching_events
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ final_matching_events AS matching_events,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ groupArray(10)(step_0_matching_event) AS step_0_matching_events,
+ groupArray(10)(step_1_matching_event) AS step_1_matching_events,
+ groupArray(10)(final_matching_event) AS final_matching_events,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ last_value(uuid_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
+ last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
+ last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ e.uuid AS uuid,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
+ if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
+ if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
+ if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
+ if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
+ FROM events AS e
+ LEFT JOIN
+ (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0))
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'negative'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ WHERE ifNull(notEquals(funnel_actors.steps, 2), 1)
+ GROUP BY funnel_actors.actor_id
+ ORDER BY funnel_actors.actor_id ASC) AS source ON equals(groups.key, source.actor_id)
+ ORDER BY source.actor_id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_materialized.5
+ '''
+ SELECT concat(ifNull(toString((aggregation_target_with_props.prop).1), ''), '::', ifNull(toString((aggregation_target_with_props.prop).2), '')) AS name,
+ countIf(aggregation_target_with_props.actor_id, ifNull(equals(aggregation_target_with_props.steps, 2), 0)) AS success_count,
+ countIf(aggregation_target_with_props.actor_id, ifNull(notEquals(aggregation_target_with_props.steps, 2), 1)) AS failure_count
+ FROM
+ (SELECT funnel_actors.actor_id AS actor_id,
+ funnel_actors.steps AS steps,
+ arrayJoin(JSONExtractKeysAndValues(groups_0.properties, 'String')) AS prop
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ LEFT JOIN
+ (SELECT groups.key AS key,
+ groups.properties AS properties
+ FROM
+ (SELECT argMax(groups.group_properties, groups._timestamp) AS properties,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ WHERE ifNull(equals(groups.index, 0), 0)) AS groups_0 ON equals(funnel_actors.actor_id, groups_0.key)) AS aggregation_target_with_props
+ GROUP BY (aggregation_target_with_props.prop).1, (aggregation_target_with_props.prop).2
+ HAVING ifNull(notIn((aggregation_target_with_props.prop).1, []), 0)
+ LIMIT 100
+ UNION ALL
+ SELECT 'Total_Values_In_Query' AS name,
+ countIf(funnel_actors.actor_id, ifNull(equals(funnel_actors.steps, 2), 0)) AS success_count,
+ countIf(funnel_actors.actor_id, ifNull(notEquals(funnel_actors.steps, 2), 1)) AS failure_count
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ LIMIT 100 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_poe_v2
+ '''
+ SELECT concat(ifNull(toString((aggregation_target_with_props.prop).1), ''), '::', ifNull(toString((aggregation_target_with_props.prop).2), '')) AS name,
+ countIf(aggregation_target_with_props.actor_id, ifNull(equals(aggregation_target_with_props.steps, 2), 0)) AS success_count,
+ countIf(aggregation_target_with_props.actor_id, ifNull(notEquals(aggregation_target_with_props.steps, 2), 1)) AS failure_count
+ FROM
+ (SELECT funnel_actors.actor_id AS actor_id,
+ funnel_actors.steps AS steps,
+ arrayJoin(arrayZip(['industry'], [JSONExtractString(groups_0.properties, 'industry')])) AS prop
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ LEFT JOIN
+ (SELECT groups.key AS key,
+ groups.properties AS properties
+ FROM
+ (SELECT argMax(groups.group_properties, groups._timestamp) AS properties,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ WHERE ifNull(equals(groups.index, 0), 0)) AS groups_0 ON equals(funnel_actors.actor_id, groups_0.key)) AS aggregation_target_with_props
+ GROUP BY (aggregation_target_with_props.prop).1, (aggregation_target_with_props.prop).2
+ HAVING ifNull(notIn((aggregation_target_with_props.prop).1, []), 0)
+ LIMIT 100
+ UNION ALL
+ SELECT 'Total_Values_In_Query' AS name,
+ countIf(funnel_actors.actor_id, ifNull(equals(funnel_actors.steps, 2), 0)) AS success_count,
+ countIf(funnel_actors.actor_id, ifNull(notEquals(funnel_actors.steps, 2), 1)) AS failure_count
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ LIMIT 100 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_poe_v2.1
+ '''
+ SELECT source.actor_id AS actor_id
+ FROM
+ (SELECT groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ INNER JOIN
+ (SELECT funnel_actors.actor_id AS actor_id,
+ any(funnel_actors.matching_events) AS matching_events
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ final_matching_events AS matching_events,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ groupArray(10)(step_0_matching_event) AS step_0_matching_events,
+ groupArray(10)(step_1_matching_event) AS step_1_matching_events,
+ groupArray(10)(final_matching_event) AS final_matching_events,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ last_value(uuid_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
+ last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
+ last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ e.uuid AS uuid,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
+ if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
+ if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
+ if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
+ if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
+ FROM events AS e
+ LEFT JOIN
+ (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0))
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'positive'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ WHERE ifNull(equals(funnel_actors.steps, 2), 0)
+ GROUP BY funnel_actors.actor_id
+ ORDER BY funnel_actors.actor_id ASC) AS source ON equals(groups.key, source.actor_id)
+ ORDER BY source.actor_id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_poe_v2.2
+ '''
+ SELECT source.actor_id AS actor_id
+ FROM
+ (SELECT groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ INNER JOIN
+ (SELECT funnel_actors.actor_id AS actor_id,
+ any(funnel_actors.matching_events) AS matching_events
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ final_matching_events AS matching_events,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ groupArray(10)(step_0_matching_event) AS step_0_matching_events,
+ groupArray(10)(step_1_matching_event) AS step_1_matching_events,
+ groupArray(10)(final_matching_event) AS final_matching_events,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ last_value(uuid_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
+ last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
+ last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ e.uuid AS uuid,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
+ if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
+ if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
+ if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
+ if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
+ FROM events AS e
+ LEFT JOIN
+ (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0))
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'positive'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ WHERE ifNull(notEquals(funnel_actors.steps, 2), 1)
+ GROUP BY funnel_actors.actor_id
+ ORDER BY funnel_actors.actor_id ASC) AS source ON equals(groups.key, source.actor_id)
+ ORDER BY source.actor_id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_poe_v2.3
+ '''
+ SELECT source.actor_id AS actor_id
+ FROM
+ (SELECT groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ INNER JOIN
+ (SELECT funnel_actors.actor_id AS actor_id,
+ any(funnel_actors.matching_events) AS matching_events
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ final_matching_events AS matching_events,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ groupArray(10)(step_0_matching_event) AS step_0_matching_events,
+ groupArray(10)(step_1_matching_event) AS step_1_matching_events,
+ groupArray(10)(final_matching_event) AS final_matching_events,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ last_value(uuid_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
+ last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
+ last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ e.uuid AS uuid,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
+ if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
+ if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
+ if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
+ if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
+ FROM events AS e
+ LEFT JOIN
+ (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0))
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'negative'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ WHERE ifNull(equals(funnel_actors.steps, 2), 0)
+ GROUP BY funnel_actors.actor_id
+ ORDER BY funnel_actors.actor_id ASC) AS source ON equals(groups.key, source.actor_id)
+ ORDER BY source.actor_id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_poe_v2.4
+ '''
+ SELECT source.actor_id AS actor_id
+ FROM
+ (SELECT groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ INNER JOIN
+ (SELECT funnel_actors.actor_id AS actor_id,
+ any(funnel_actors.matching_events) AS matching_events
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ final_matching_events AS matching_events,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ groupArray(10)(step_0_matching_event) AS step_0_matching_events,
+ groupArray(10)(step_1_matching_event) AS step_1_matching_events,
+ groupArray(10)(final_matching_event) AS final_matching_events,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ last_value(uuid_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
+ last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
+ last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ e.uuid AS uuid,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
+ if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
+ if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
+ if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
+ if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
+ FROM events AS e
+ LEFT JOIN
+ (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0))
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up')), ifNull(equals(e__group_0.properties___industry, 'negative'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ WHERE ifNull(notEquals(funnel_actors.steps, 2), 1)
+ GROUP BY funnel_actors.actor_id
+ ORDER BY funnel_actors.actor_id ASC) AS source ON equals(groups.key, source.actor_id)
+ ORDER BY source.actor_id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestClickhouseFunnelCorrelation.test_funnel_correlation_with_properties_and_groups_person_on_events_poe_v2.5
+ '''
+ SELECT concat(ifNull(toString((aggregation_target_with_props.prop).1), ''), '::', ifNull(toString((aggregation_target_with_props.prop).2), '')) AS name,
+ countIf(aggregation_target_with_props.actor_id, ifNull(equals(aggregation_target_with_props.steps, 2), 0)) AS success_count,
+ countIf(aggregation_target_with_props.actor_id, ifNull(notEquals(aggregation_target_with_props.steps, 2), 1)) AS failure_count
+ FROM
+ (SELECT funnel_actors.actor_id AS actor_id,
+ funnel_actors.steps AS steps,
+ arrayJoin(JSONExtractKeysAndValues(groups_0.properties, 'String')) AS prop
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ LEFT JOIN
+ (SELECT groups.key AS key,
+ groups.properties AS properties
+ FROM
+ (SELECT argMax(groups.group_properties, groups._timestamp) AS properties,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE equals(groups.team_id, 2)
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS groups
+ WHERE ifNull(equals(groups.index, 0), 0)) AS groups_0 ON equals(funnel_actors.actor_id, groups_0.key)) AS aggregation_target_with_props
+ GROUP BY (aggregation_target_with_props.prop).1, (aggregation_target_with_props.prop).2
+ HAVING ifNull(notIn((aggregation_target_with_props.prop).1, []), 0)
+ LIMIT 100
+ UNION ALL
+ SELECT 'Total_Values_In_Query' AS name,
+ countIf(funnel_actors.actor_id, ifNull(equals(funnel_actors.steps, 2), 0)) AS success_count,
+ countIf(funnel_actors.actor_id, ifNull(notEquals(funnel_actors.steps, 2), 1)) AS failure_count
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e.`$group_0` AS aggregation_target,
+ if(equals(e.event, 'user signed up'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(equals(e.event, 'paid'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-14 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('paid', 'user signed up'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ LIMIT 100 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlations_persons.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlations_persons.ambr
new file mode 100644
index 0000000000000..fe7404ea9b7a1
--- /dev/null
+++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel_correlations_persons.ambr
@@ -0,0 +1,776 @@
+# serializer version: 1
+# name: TestFunnelCorrelationsActors.test_funnel_correlation_on_event_with_recordings
+ '''
+ SELECT persons.id,
+ persons.id AS id,
+ source.matching_events AS matching_events
+ FROM
+ (SELECT person.id AS id
+ FROM person
+ WHERE equals(person.team_id, 2)
+ GROUP BY person.id
+ HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons
+ INNER JOIN
+ (SELECT funnel_actors.actor_id AS actor_id,
+ any(funnel_actors.matching_events) AS matching_events
+ FROM events AS event
+ INNER JOIN
+ (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
+ person_distinct_id2.distinct_id AS distinct_id
+ FROM person_distinct_id2
+ WHERE equals(person_distinct_id2.team_id, 2)
+ GROUP BY person_distinct_id2.distinct_id
+ HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS event__pdi ON equals(event.distinct_id, event__pdi.distinct_id)
+ JOIN
+ (SELECT aggregation_target AS actor_id,
+ final_matching_events AS matching_events,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ groupArray(10)(step_0_matching_event) AS step_0_matching_events,
+ groupArray(10)(step_1_matching_event) AS step_1_matching_events,
+ groupArray(10)(final_matching_event) AS final_matching_events,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ last_value(uuid_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
+ last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
+ last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e__pdi.person_id AS aggregation_target,
+ e.uuid AS uuid,
+ if(equals(e.event, '$pageview'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
+ if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
+ if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
+ if(equals(e.event, 'insight analyzed'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
+ if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
+ if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
+ FROM events AS e
+ INNER JOIN
+ (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
+ person_distinct_id2.distinct_id AS distinct_id
+ FROM person_distinct_id2
+ WHERE equals(person_distinct_id2.team_id, 2)
+ GROUP BY person_distinct_id2.distinct_id
+ HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('$pageview', 'insight analyzed'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(event__pdi.person_id, funnel_actors.actor_id)
+ WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-08 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-08 23:59:59', 6, 'UTC')))), notIn(event.event, ['$pageview', 'insight analyzed']), equals(event.event, 'insight loaded'), ifNull(equals(funnel_actors.steps, 2), 0))
+ GROUP BY actor_id
+ ORDER BY actor_id ASC) AS source ON equals(persons.id, source.actor_id)
+ ORDER BY persons.id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestFunnelCorrelationsActors.test_funnel_correlation_on_event_with_recordings.1
+ '''
+ SELECT DISTINCT session_replay_events.session_id AS session_id
+ FROM
+ (SELECT session_replay_events.session_id AS session_id
+ FROM session_replay_events
+ WHERE equals(session_replay_events.team_id, 2)
+ GROUP BY session_replay_events.session_id) AS session_replay_events
+ WHERE ifNull(in(session_replay_events.session_id, ['s2']), 0)
+ LIMIT 100 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestFunnelCorrelationsActors.test_funnel_correlation_on_event_with_recordings.2
+ '''
+ SELECT persons.id,
+ persons.id AS id,
+ source.matching_events AS matching_events
+ FROM
+ (SELECT person.id AS id
+ FROM person
+ WHERE equals(person.team_id, 2)
+ GROUP BY person.id
+ HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons
+ INNER JOIN
+ (SELECT funnel_actors.actor_id AS actor_id,
+ any(funnel_actors.matching_events) AS matching_events
+ FROM events AS event
+ INNER JOIN
+ (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
+ person_distinct_id2.distinct_id AS distinct_id
+ FROM person_distinct_id2
+ WHERE equals(person_distinct_id2.team_id, 2)
+ GROUP BY person_distinct_id2.distinct_id
+ HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS event__pdi ON equals(event.distinct_id, event__pdi.distinct_id)
+ JOIN
+ (SELECT aggregation_target AS actor_id,
+ final_matching_events AS matching_events,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ avg(step_2_conversion_time) AS step_2_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ median(step_2_conversion_time) AS step_2_median_conversion_time_inner,
+ groupArray(10)(step_0_matching_event) AS step_0_matching_events,
+ groupArray(10)(step_1_matching_event) AS step_1_matching_events,
+ groupArray(10)(step_2_matching_event) AS step_2_matching_events,
+ groupArray(10)(final_matching_event) AS final_matching_events,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_2, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ step_2_conversion_time AS step_2_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ tuple(latest_2, uuid_2, `$session_id_2`, `$window_id_2`) AS step_2_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, if(isNull(latest_2), step_1_matching_event, step_2_matching_event))) AS final_matching_event,
+ latest_0 AS latest_0,
+ latest_2 AS latest_2,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ step_2 AS step_2,
+ latest_2 AS latest_2,
+ uuid_2 AS uuid_2,
+ `$session_id_2` AS `$session_id_2`,
+ `$window_id_2` AS `$window_id_2`,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0), ifNull(lessOrEquals(latest_1, latest_2), 0), ifNull(lessOrEquals(latest_2, plus(latest_0, toIntervalDay(14))), 0)), 3, if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1)) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ if(and(isNotNull(latest_2), ifNull(lessOrEquals(latest_2, plus(latest_1, toIntervalDay(14))), 0)), dateDiff('second', latest_1, latest_2), NULL) AS step_2_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ tuple(latest_2, uuid_2, `$session_id_2`, `$window_id_2`) AS step_2_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, if(isNull(latest_2), step_1_matching_event, step_2_matching_event))) AS final_matching_event
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ step_2 AS step_2,
+ min(latest_2) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2,
+ last_value(uuid_2) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_2,
+ last_value(`$session_id_2`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_2`,
+ last_value(`$window_id_2`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_2`
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ step_2 AS step_2,
+ if(ifNull(less(latest_2, latest_1), 0), NULL, latest_2) AS latest_2,
+ if(ifNull(less(latest_2, latest_1), 0), NULL, uuid_2) AS uuid_2,
+ if(ifNull(less(latest_2, latest_1), 0), NULL, `$session_id_2`) AS `$session_id_2`,
+ if(ifNull(less(latest_2, latest_1), 0), NULL, `$window_id_2`) AS `$window_id_2`
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ last_value(uuid_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
+ last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
+ last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`,
+ step_2 AS step_2,
+ min(latest_2) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_2,
+ last_value(uuid_2) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_2,
+ last_value(`$session_id_2`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_2`,
+ last_value(`$window_id_2`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_2`
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e__pdi.person_id AS aggregation_target,
+ e.uuid AS uuid,
+ if(equals(e.event, '$pageview'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
+ if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
+ if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
+ if(equals(e.event, 'insight analyzed'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
+ if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
+ if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`,
+ if(equals(e.event, 'insight updated'), 1, 0) AS step_2,
+ if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2,
+ if(ifNull(equals(step_2, 1), 0), uuid, NULL) AS uuid_2,
+ if(ifNull(equals(step_2, 1), 0), e.`$session_id`, NULL) AS `$session_id_2`,
+ if(ifNull(equals(step_2, 1), 0), e.`$window_id`, NULL) AS `$window_id_2`
+ FROM events AS e
+ INNER JOIN
+ (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
+ person_distinct_id2.distinct_id AS distinct_id
+ FROM person_distinct_id2
+ WHERE equals(person_distinct_id2.team_id, 2)
+ GROUP BY person_distinct_id2.distinct_id
+ HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('$pageview', 'insight analyzed', 'insight updated'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0)))))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2, 3]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors ON equals(event__pdi.person_id, funnel_actors.actor_id)
+ WHERE and(equals(event.team_id, 2), greaterOrEquals(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-01 00:00:00', 6, 'UTC'))), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-08 23:59:59', 6, 'UTC'))), equals(event.team_id, 2), greater(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), funnel_actors.first_timestamp), less(toTimeZone(toDateTime(toTimeZone(event.timestamp, 'UTC'), 'UTC'), 'UTC'), coalesce(funnel_actors.final_timestamp, plus(funnel_actors.first_timestamp, toIntervalDay(14)), assumeNotNull(parseDateTime64BestEffortOrNull('2021-01-08 23:59:59', 6, 'UTC')))), notIn(event.event, ['$pageview', 'insight analyzed', 'insight updated']), equals(event.event, 'insight loaded'), ifNull(notEquals(funnel_actors.steps, 3), 1))
+ GROUP BY actor_id
+ ORDER BY actor_id ASC) AS source ON equals(persons.id, source.actor_id)
+ ORDER BY persons.id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestFunnelCorrelationsActors.test_funnel_correlation_on_event_with_recordings.3
+ '''
+ SELECT DISTINCT session_replay_events.session_id AS session_id
+ FROM
+ (SELECT session_replay_events.session_id AS session_id
+ FROM session_replay_events
+ WHERE equals(session_replay_events.team_id, 2)
+ GROUP BY session_replay_events.session_id) AS session_replay_events
+ WHERE ifNull(in(session_replay_events.session_id, ['s2']), 0)
+ LIMIT 100 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestFunnelCorrelationsActors.test_funnel_correlation_on_properties_with_recordings
+ '''
+ SELECT persons.id,
+ persons.id AS id,
+ source.matching_events AS matching_events
+ FROM
+ (SELECT person.id AS id
+ FROM person
+ WHERE equals(person.team_id, 2)
+ GROUP BY person.id
+ HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons
+ INNER JOIN
+ (SELECT funnel_actors.actor_id AS actor_id,
+ any(funnel_actors.matching_events) AS matching_events
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ final_matching_events AS matching_events,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ groupArray(10)(step_0_matching_event) AS step_0_matching_events,
+ groupArray(10)(step_1_matching_event) AS step_1_matching_events,
+ groupArray(10)(final_matching_event) AS final_matching_events,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS latest_1,
+ last_value(uuid_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS uuid_1,
+ last_value(`$session_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$session_id_1`,
+ last_value(`$window_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) AS `$window_id_1`
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e__pdi.person_id AS aggregation_target,
+ e.uuid AS uuid,
+ if(equals(e.event, '$pageview'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
+ if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
+ if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
+ if(equals(e.event, 'insight analyzed'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
+ if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
+ if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
+ FROM events AS e
+ INNER JOIN
+ (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
+ argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id,
+ person_distinct_id2.distinct_id AS distinct_id
+ FROM person_distinct_id2
+ WHERE equals(person_distinct_id2.team_id, 2)
+ GROUP BY person_distinct_id2.distinct_id
+ HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
+ INNER JOIN
+ (SELECT person.id AS id,
+ replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'foo'), ''), 'null'), '^"|"$', '') AS properties___foo
+ FROM person
+ WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version),
+ (SELECT person.id AS id, max(person.version) AS version
+ FROM person
+ WHERE equals(person.team_id, 2)
+ GROUP BY person.id
+ HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id)
+ WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('$pageview', 'insight analyzed')), ifNull(equals(e__pdi__person.properties___foo, 'bar'), 0)), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0)))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ WHERE ifNull(equals(funnel_actors.steps, 2), 0)
+ GROUP BY funnel_actors.actor_id
+ ORDER BY funnel_actors.actor_id ASC) AS source ON equals(persons.id, source.actor_id)
+ ORDER BY persons.id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestFunnelCorrelationsActors.test_funnel_correlation_on_properties_with_recordings.1
+ '''
+ SELECT DISTINCT session_replay_events.session_id AS session_id
+ FROM
+ (SELECT session_replay_events.session_id AS session_id
+ FROM session_replay_events
+ WHERE equals(session_replay_events.team_id, 2)
+ GROUP BY session_replay_events.session_id) AS session_replay_events
+ WHERE ifNull(in(session_replay_events.session_id, ['s2']), 0)
+ LIMIT 100 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestFunnelCorrelationsActors.test_strict_funnel_correlation_with_recordings
+ '''
+ SELECT persons.id,
+ persons.id AS id,
+ source.matching_events AS matching_events
+ FROM
+ (SELECT person.id AS id
+ FROM person
+ WHERE equals(person.team_id, 2)
+ GROUP BY person.id
+ HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons
+ INNER JOIN
+ (SELECT funnel_actors.actor_id AS actor_id,
+ any(funnel_actors.matching_events) AS matching_events
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ final_matching_events AS matching_events,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ groupArray(10)(step_0_matching_event) AS step_0_matching_events,
+ groupArray(10)(step_1_matching_event) AS step_1_matching_events,
+ groupArray(10)(final_matching_event) AS final_matching_events,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS latest_1,
+ min(uuid_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS uuid_1,
+ min(`$session_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS `$session_id_1`,
+ min(`$window_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS `$window_id_1`
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e__pdi.person_id AS aggregation_target,
+ e.uuid AS uuid,
+ if(equals(e.event, '$pageview'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
+ if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
+ if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
+ if(equals(e.event, 'insight analyzed'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
+ if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
+ if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
+ FROM events AS e
+ INNER JOIN
+ (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
+ argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id,
+ person_distinct_id2.distinct_id AS distinct_id
+ FROM person_distinct_id2
+ WHERE equals(person_distinct_id2.team_id, 2)
+ GROUP BY person_distinct_id2.distinct_id
+ HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
+ INNER JOIN
+ (SELECT person.id AS id,
+ replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'foo'), ''), 'null'), '^"|"$', '') AS properties___foo
+ FROM person
+ WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version),
+ (SELECT person.id AS id, max(person.version) AS version
+ FROM person
+ WHERE equals(person.team_id, 2)
+ GROUP BY person.id
+ HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id)
+ WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC'))), ifNull(equals(e__pdi__person.properties___foo, 'bar'), 0))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ WHERE ifNull(equals(funnel_actors.steps, 2), 0)
+ GROUP BY funnel_actors.actor_id
+ ORDER BY funnel_actors.actor_id ASC) AS source ON equals(persons.id, source.actor_id)
+ ORDER BY persons.id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestFunnelCorrelationsActors.test_strict_funnel_correlation_with_recordings.1
+ '''
+ SELECT DISTINCT session_replay_events.session_id AS session_id
+ FROM
+ (SELECT session_replay_events.session_id AS session_id
+ FROM session_replay_events
+ WHERE equals(session_replay_events.team_id, 2)
+ GROUP BY session_replay_events.session_id) AS session_replay_events
+ WHERE ifNull(in(session_replay_events.session_id, ['s2']), 0)
+ LIMIT 100 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestFunnelCorrelationsActors.test_strict_funnel_correlation_with_recordings.2
+ '''
+ SELECT persons.id,
+ persons.id AS id,
+ source.matching_events AS matching_events
+ FROM
+ (SELECT person.id AS id
+ FROM person
+ WHERE equals(person.team_id, 2)
+ GROUP BY person.id
+ HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS persons
+ INNER JOIN
+ (SELECT funnel_actors.actor_id AS actor_id,
+ any(funnel_actors.matching_events) AS matching_events
+ FROM
+ (SELECT aggregation_target AS actor_id,
+ final_matching_events AS matching_events,
+ timestamp AS timestamp,
+ steps AS steps,
+ final_timestamp AS final_timestamp,
+ first_timestamp AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ avg(step_1_conversion_time) AS step_1_average_conversion_time_inner,
+ median(step_1_conversion_time) AS step_1_median_conversion_time_inner,
+ groupArray(10)(step_0_matching_event) AS step_0_matching_events,
+ groupArray(10)(step_1_matching_event) AS step_1_matching_events,
+ groupArray(10)(final_matching_event) AS final_matching_events,
+ argMax(latest_0, steps) AS timestamp,
+ argMax(latest_1, steps) AS final_timestamp,
+ argMax(latest_0, steps) AS first_timestamp
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ steps AS steps,
+ max(steps) OVER (PARTITION BY aggregation_target) AS max_steps,
+ step_1_conversion_time AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event,
+ latest_0 AS latest_0,
+ latest_1 AS latest_1,
+ latest_0 AS latest_0
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ latest_1 AS latest_1,
+ uuid_1 AS uuid_1,
+ `$session_id_1` AS `$session_id_1`,
+ `$window_id_1` AS `$window_id_1`,
+ if(and(ifNull(lessOrEquals(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), 2, 1) AS steps,
+ if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(latest_0, toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time,
+ tuple(latest_0, uuid_0, `$session_id_0`, `$window_id_0`) AS step_0_matching_event,
+ tuple(latest_1, uuid_1, `$session_id_1`, `$window_id_1`) AS step_1_matching_event,
+ if(isNull(latest_0), tuple(NULL, NULL, NULL, NULL), if(isNull(latest_1), step_0_matching_event, step_1_matching_event)) AS final_matching_event
+ FROM
+ (SELECT aggregation_target AS aggregation_target,
+ timestamp AS timestamp,
+ step_0 AS step_0,
+ latest_0 AS latest_0,
+ uuid_0 AS uuid_0,
+ `$session_id_0` AS `$session_id_0`,
+ `$window_id_0` AS `$window_id_0`,
+ step_1 AS step_1,
+ min(latest_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS latest_1,
+ min(uuid_1) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS uuid_1,
+ min(`$session_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS `$session_id_1`,
+ min(`$window_id_1`) OVER (PARTITION BY aggregation_target
+ ORDER BY timestamp DESC ROWS BETWEEN 1 PRECEDING AND 1 PRECEDING) AS `$window_id_1`
+ FROM
+ (SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
+ e__pdi.person_id AS aggregation_target,
+ e.uuid AS uuid,
+ if(equals(e.event, '$pageview'), 1, 0) AS step_0,
+ if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0,
+ if(ifNull(equals(step_0, 1), 0), uuid, NULL) AS uuid_0,
+ if(ifNull(equals(step_0, 1), 0), e.`$session_id`, NULL) AS `$session_id_0`,
+ if(ifNull(equals(step_0, 1), 0), e.`$window_id`, NULL) AS `$window_id_0`,
+ if(equals(e.event, 'insight analyzed'), 1, 0) AS step_1,
+ if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1,
+ if(ifNull(equals(step_1, 1), 0), uuid, NULL) AS uuid_1,
+ if(ifNull(equals(step_1, 1), 0), e.`$session_id`, NULL) AS `$session_id_1`,
+ if(ifNull(equals(step_1, 1), 0), e.`$window_id`, NULL) AS `$window_id_1`
+ FROM events AS e
+ INNER JOIN
+ (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
+ argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id,
+ person_distinct_id2.distinct_id AS distinct_id
+ FROM person_distinct_id2
+ WHERE equals(person_distinct_id2.team_id, 2)
+ GROUP BY person_distinct_id2.distinct_id
+ HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
+ INNER JOIN
+ (SELECT person.id AS id,
+ replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'foo'), ''), 'null'), '^"|"$', '') AS properties___foo
+ FROM person
+ WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version),
+ (SELECT person.id AS id, max(person.version) AS version
+ FROM person
+ WHERE equals(person.team_id, 2)
+ GROUP BY person.id
+ HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id)
+ WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2021-01-08 23:59:59.999999', 6, 'UTC'))), ifNull(equals(e__pdi__person.properties___foo, 'bar'), 0))))
+ WHERE ifNull(equals(step_0, 1), 0)))
+ GROUP BY aggregation_target,
+ steps
+ HAVING ifNull(equals(steps, max_steps), isNull(steps)
+ and isNull(max_steps)))
+ WHERE ifNull(in(steps, [1, 2]), 0)
+ ORDER BY aggregation_target ASC) AS funnel_actors
+ WHERE ifNull(notEquals(funnel_actors.steps, 2), 1)
+ GROUP BY funnel_actors.actor_id
+ ORDER BY funnel_actors.actor_id ASC) AS source ON equals(persons.id, source.actor_id)
+ ORDER BY persons.id ASC
+ LIMIT 101
+ OFFSET 0 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestFunnelCorrelationsActors.test_strict_funnel_correlation_with_recordings.3
+ '''
+ SELECT DISTINCT session_replay_events.session_id AS session_id
+ FROM
+ (SELECT session_replay_events.session_id AS session_id
+ FROM session_replay_events
+ WHERE equals(session_replay_events.team_id, 2)
+ GROUP BY session_replay_events.session_id) AS session_replay_events
+ WHERE ifNull(in(session_replay_events.session_id, ['s3']), 0)
+ LIMIT 100 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
diff --git a/posthog/hogql_queries/insights/funnels/test/test_funnel_correlation.py b/posthog/hogql_queries/insights/funnels/test/test_funnel_correlation.py
new file mode 100644
index 0000000000000..f69eb3c6977b6
--- /dev/null
+++ b/posthog/hogql_queries/insights/funnels/test/test_funnel_correlation.py
@@ -0,0 +1,2167 @@
+from typing import Any, Dict, cast
+import unittest
+
+from rest_framework.exceptions import ValidationError
+
+from posthog.constants import INSIGHT_FUNNELS
+from posthog.hogql_queries.insights.funnels.funnel_correlation_query_runner import (
+ EventContingencyTable,
+ EventStats,
+ FunnelCorrelationQueryRunner,
+)
+from posthog.hogql_queries.insights.funnels.test.test_funnel_correlations_persons import get_actors
+from posthog.hogql_queries.legacy_compatibility.filter_to_query import filter_to_query
+from posthog.models.action import Action
+from posthog.models.action_step import ActionStep
+from posthog.models.element import Element
+from posthog.models.group.util import create_group
+from posthog.models.group_type_mapping import GroupTypeMapping
+from posthog.models.instance_setting import override_instance_config
+from posthog.schema import (
+ EventPropertyFilter,
+ EventsNode,
+ FunnelCorrelationQuery,
+ FunnelsActorsQuery,
+ FunnelsQuery,
+ FunnelCorrelationResultsType,
+ GroupPropertyFilter,
+ PersonPropertyFilter,
+ PropertyOperator,
+)
+from posthog.test.base import (
+ APIBaseTest,
+ ClickhouseTestMixin,
+ _create_event,
+ _create_person,
+ also_test_with_materialized_columns,
+ flush_persons_and_events,
+ snapshot_clickhouse_queries,
+ also_test_with_person_on_events_v2,
+)
+from posthog.test.test_journeys import journeys_for
+
+
+def _create_action(**kwargs):
+ team = kwargs.pop("team")
+ name = kwargs.pop("name")
+ properties = kwargs.pop("properties", {})
+ action = Action.objects.create(team=team, name=name)
+ ActionStep.objects.create(action=action, event=name, properties=properties)
+ return action
+
+
+class TestClickhouseFunnelCorrelation(ClickhouseTestMixin, APIBaseTest):
+ maxDiff = None
+
+ def _get_events_for_filters(
+ self,
+ filters,
+ funnelCorrelationType=FunnelCorrelationResultsType.events,
+ funnelCorrelationNames=None,
+ funnelCorrelationExcludeNames=None,
+ funnelCorrelationExcludeEventNames=None,
+ funnelCorrelationEventNames=None,
+ funnelCorrelationEventExcludePropertyNames=None,
+ ):
+ funnels_query = cast(FunnelsQuery, filter_to_query(filters))
+ actors_query = FunnelsActorsQuery(source=funnels_query)
+ correlation_query = FunnelCorrelationQuery(
+ source=actors_query,
+ funnelCorrelationType=funnelCorrelationType,
+ funnelCorrelationNames=funnelCorrelationNames,
+ funnelCorrelationExcludeNames=funnelCorrelationExcludeNames,
+ funnelCorrelationExcludeEventNames=funnelCorrelationExcludeEventNames,
+ funnelCorrelationEventNames=funnelCorrelationEventNames,
+ funnelCorrelationEventExcludePropertyNames=funnelCorrelationEventExcludePropertyNames,
+ )
+ result, skewed_totals, _, _ = FunnelCorrelationQueryRunner(query=correlation_query, team=self.team)._calculate()
+ return result, skewed_totals
+
+ def _get_actors_for_event(self, filters: Dict[str, Any], event_name: str, properties=None, success=True):
+ serialized_actors = get_actors(
+ filters,
+ self.team,
+ funnelCorrelationPersonConverted=success,
+ funnelCorrelationPersonEntity=EventsNode(event=event_name, properties=properties),
+ )
+ return [str(row[0]) for row in serialized_actors]
+
+ def _get_actors_for_property(
+ self, filters: Dict[str, Any], property_values: list, success=True, funnelCorrelationNames=None
+ ):
+ funnelCorrelationPropertyValues = [
+ (
+ PersonPropertyFilter(key=prop, value=value, operator=PropertyOperator.exact)
+ if type == "person"
+ else GroupPropertyFilter(
+ key=prop, value=value, group_type_index=group_type_index, operator=PropertyOperator.exact
+ )
+ )
+ for prop, value, type, group_type_index in property_values
+ ]
+
+ serialized_actors = get_actors(
+ filters,
+ self.team,
+ funnelCorrelationType=FunnelCorrelationResultsType.properties,
+ funnelCorrelationNames=funnelCorrelationNames,
+ funnelCorrelationPersonConverted=success,
+ funnelCorrelationPropertyValues=funnelCorrelationPropertyValues,
+ )
+ return [str(row[0]) for row in serialized_actors]
+
+ def test_basic_funnel_correlation_with_events(self):
+ filters = {
+ "events": [
+ {"id": "user signed up", "type": "events", "order": 0},
+ {"id": "paid", "type": "events", "order": 1},
+ ],
+ "insight": INSIGHT_FUNNELS,
+ "date_from": "2020-01-01",
+ "date_to": "2020-01-14",
+ }
+
+ for i in range(10):
+ _create_person(distinct_ids=[f"user_{i}"], team_id=self.team.pk)
+ _create_event(
+ team=self.team,
+ event="user signed up",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-02T14:00:00Z",
+ )
+ if i % 2 == 0:
+ _create_event(
+ team=self.team,
+ event="positively_related",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-03T14:00:00Z",
+ )
+ _create_event(
+ team=self.team,
+ event="paid",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-04T14:00:00Z",
+ )
+
+ for i in range(10, 20):
+ _create_person(distinct_ids=[f"user_{i}"], team_id=self.team.pk)
+ _create_event(
+ team=self.team,
+ event="user signed up",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-02T14:00:00Z",
+ )
+ if i % 2 == 0:
+ _create_event(
+ team=self.team,
+ event="negatively_related",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-03T14:00:00Z",
+ )
+
+ result, _ = self._get_events_for_filters(filters, funnelCorrelationType=FunnelCorrelationResultsType.events)
+
+ odds_ratios = [item.pop("odds_ratio") for item in result]
+ expected_odds_ratios = [11, 1 / 11]
+
+ for odds, expected_odds in zip(odds_ratios, expected_odds_ratios):
+ self.assertAlmostEqual(odds, expected_odds)
+
+ self.assertEqual(
+ result,
+ [
+ {
+ "event": "positively_related",
+ "success_count": 5,
+ "failure_count": 0,
+ # "odds_ratio": 11.0,
+ "correlation_type": "success",
+ },
+ {
+ "event": "negatively_related",
+ "success_count": 0,
+ "failure_count": 5,
+ # "odds_ratio": 1 / 11,
+ "correlation_type": "failure",
+ },
+ ],
+ )
+
+ self.assertEqual(len(self._get_actors_for_event(filters, "positively_related")), 5)
+ self.assertEqual(
+ len(self._get_actors_for_event(filters, "positively_related", success=False)),
+ 0,
+ )
+ self.assertEqual(
+ len(self._get_actors_for_event(filters, "negatively_related", success=False)),
+ 5,
+ )
+ self.assertEqual(len(self._get_actors_for_event(filters, "negatively_related")), 0)
+
+ # Now exclude positively_related
+ result, _ = self._get_events_for_filters(
+ filters,
+ funnelCorrelationType=FunnelCorrelationResultsType.events,
+ funnelCorrelationExcludeEventNames=["positively_related"],
+ )
+
+ odds_ratio = result[0].pop("odds_ratio")
+ expected_odds_ratio = 1 / 11
+
+ self.assertAlmostEqual(odds_ratio, expected_odds_ratio)
+
+ self.assertEqual(
+ result,
+ [
+ {
+ "event": "negatively_related",
+ "success_count": 0,
+ "failure_count": 5,
+ # "odds_ratio": 1 / 11,
+ "correlation_type": "failure",
+ }
+ ],
+ )
+ # Getting specific people isn't affected by exclude_events
+ self.assertEqual(len(self._get_actors_for_event(filters, "positively_related")), 5)
+ self.assertEqual(
+ len(self._get_actors_for_event(filters, "positively_related", success=False)),
+ 0,
+ )
+ self.assertEqual(
+ len(self._get_actors_for_event(filters, "negatively_related", success=False)),
+ 5,
+ )
+ self.assertEqual(len(self._get_actors_for_event(filters, "negatively_related")), 0)
+
+ @snapshot_clickhouse_queries
+ def test_action_events_are_excluded_from_correlations(self):
+ journey = {}
+
+ for i in range(3):
+ person_id = f"user_{i}"
+ events = [
+ {
+ "event": "user signed up",
+ "timestamp": "2020-01-02T14:00:00",
+ "properties": {"key": "val"},
+ },
+ # same event, but missing property, so not part of action.
+ {"event": "user signed up", "timestamp": "2020-01-02T14:10:00"},
+ ]
+ if i % 2 == 0:
+ events.append({"event": "positively_related", "timestamp": "2020-01-03T14:00:00"})
+ events.append(
+ {
+ "event": "paid",
+ "timestamp": "2020-01-04T14:00:00",
+ "properties": {"key": "val"},
+ }
+ )
+
+ journey[person_id] = events
+
+ # one failure needed
+ journey["failure"] = [
+ {
+ "event": "user signed up",
+ "timestamp": "2020-01-02T14:00:00",
+ "properties": {"key": "val"},
+ }
+ ]
+
+ journeys_for(events_by_person=journey, team=self.team) # type: ignore
+
+ sign_up_action = _create_action(
+ name="user signed up",
+ team=self.team,
+ properties=[{"key": "key", "type": "event", "value": ["val"], "operator": "exact"}],
+ )
+
+ paid_action = _create_action(
+ name="paid",
+ team=self.team,
+ properties=[{"key": "key", "type": "event", "value": ["val"], "operator": "exact"}],
+ )
+ filters = {
+ "events": [],
+ "actions": [
+ {"id": sign_up_action.id, "order": 0},
+ {"id": paid_action.id, "order": 1},
+ ],
+ "insight": INSIGHT_FUNNELS,
+ "date_from": "2020-01-01",
+ "date_to": "2020-01-14",
+ "funnel_correlation_type": "events",
+ }
+
+ result, _ = self._get_events_for_filters(filters)
+
+ # Â missing user signed up and paid from result set, as expected
+ self.assertEqual(
+ result,
+ [
+ {
+ "event": "positively_related",
+ "success_count": 2,
+ "failure_count": 0,
+ "odds_ratio": 3,
+ "correlation_type": "success",
+ }
+ ],
+ )
+
+ @also_test_with_person_on_events_v2
+ @snapshot_clickhouse_queries
+ def test_funnel_correlation_with_events_and_groups(self):
+ GroupTypeMapping.objects.create(team=self.team, group_type="organization", group_type_index=0)
+ create_group(
+ team_id=self.team.pk,
+ group_type_index=0,
+ group_key="org:5",
+ properties={"industry": "finance"},
+ )
+ create_group(
+ team_id=self.team.pk,
+ group_type_index=0,
+ group_key="org:7",
+ properties={"industry": "finance"},
+ )
+
+ for i in range(10, 20):
+ create_group(
+ team_id=self.team.pk,
+ group_type_index=0,
+ group_key=f"org:{i}",
+ properties={},
+ )
+ _create_person(distinct_ids=[f"user_{i}"], team_id=self.team.pk)
+ _create_event(
+ team=self.team,
+ event="user signed up",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-02T14:00:00Z",
+ properties={"$group_0": f"org:{i}"},
+ )
+ if i % 2 == 0:
+ _create_event(
+ team=self.team,
+ event="positively_related",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-03T14:00:00Z",
+ properties={"$group_0": f"org:{i}"},
+ )
+ # this event shouldn't show up when dealing with groups
+ _create_event(
+ team=self.team,
+ event="positively_related_without_group",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-03T14:00:00Z",
+ )
+ _create_event(
+ team=self.team,
+ event="paid",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-04T14:00:00Z",
+ properties={"$group_0": f"org:{i}"},
+ )
+
+ # one fail group
+ _create_person(distinct_ids=[f"user_fail"], team_id=self.team.pk)
+ _create_event(
+ team=self.team,
+ event="user signed up",
+ distinct_id=f"user_fail",
+ timestamp="2020-01-02T14:00:00Z",
+ properties={"$group_0": f"org:5"},
+ )
+ _create_event(
+ team=self.team,
+ event="negatively_related",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-03T14:00:00Z",
+ properties={"$group_0": f"org:5"},
+ )
+
+ # one success group with same filter property
+ _create_person(distinct_ids=[f"user_succ"], team_id=self.team.pk)
+ _create_event(
+ team=self.team,
+ event="user signed up",
+ distinct_id=f"user_succ",
+ timestamp="2020-01-02T14:00:00Z",
+ properties={"$group_0": f"org:7"},
+ )
+ _create_event(
+ team=self.team,
+ event="negatively_related",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-03T14:00:00Z",
+ properties={"$group_0": f"org:7"},
+ )
+ _create_event(
+ team=self.team,
+ event="paid",
+ distinct_id=f"user_succ",
+ timestamp="2020-01-04T14:00:00Z",
+ properties={"$group_0": f"org:7"},
+ )
+
+ filters = {
+ "events": [
+ {"id": "user signed up", "type": "events", "order": 0},
+ {"id": "paid", "type": "events", "order": 1},
+ ],
+ "insight": INSIGHT_FUNNELS,
+ "date_from": "2020-01-01",
+ "date_to": "2020-01-14",
+ "aggregation_group_type_index": 0,
+ }
+
+ result, _ = self._get_events_for_filters(filters, funnelCorrelationType=FunnelCorrelationResultsType.events)
+
+ odds_ratios = [item.pop("odds_ratio") for item in result]
+ expected_odds_ratios = [12 / 7, 1 / 11]
+
+ for odds, expected_odds in zip(odds_ratios, expected_odds_ratios):
+ self.assertAlmostEqual(odds, expected_odds)
+
+ self.assertEqual(
+ result,
+ [
+ {
+ "event": "positively_related",
+ "success_count": 5,
+ "failure_count": 0,
+ # "odds_ratio": 12/7,
+ "correlation_type": "success",
+ },
+ {
+ "event": "negatively_related",
+ "success_count": 1,
+ "failure_count": 1,
+ # "odds_ratio": 1 / 11,
+ "correlation_type": "failure",
+ },
+ ],
+ )
+
+ self.assertEqual(len(self._get_actors_for_event(filters, "positively_related")), 5)
+ self.assertEqual(
+ len(self._get_actors_for_event(filters, "positively_related", success=False)),
+ 0,
+ )
+ self.assertEqual(len(self._get_actors_for_event(filters, "negatively_related")), 1)
+ self.assertEqual(
+ len(self._get_actors_for_event(filters, "negatively_related", success=False)),
+ 1,
+ )
+
+ # Now exclude all groups in positive
+ excludes = {
+ "properties": [
+ {
+ "key": "industry",
+ "value": "finance",
+ "type": "group",
+ "group_type_index": 0,
+ }
+ ]
+ }
+
+ result, _ = self._get_events_for_filters({**filters, **excludes}) # TODO destructure
+
+ odds_ratio = result[0].pop("odds_ratio")
+ expected_odds_ratio = 1
+ # success total and failure totals remove other groups too
+
+ self.assertAlmostEqual(odds_ratio, expected_odds_ratio)
+
+ self.assertEqual(
+ result,
+ [
+ {
+ "event": "negatively_related",
+ "success_count": 1,
+ "failure_count": 1,
+ # "odds_ratio": 1,
+ "correlation_type": "failure",
+ }
+ ],
+ )
+
+ self.assertEqual(len(self._get_actors_for_event(filters, "negatively_related")), 1)
+ self.assertEqual(
+ len(self._get_actors_for_event(filters, "negatively_related", success=False)),
+ 1,
+ )
+
+ # :FIXME: This should also work with materialized columns
+ # @also_test_with_materialized_columns(event_properties=[], person_properties=["$browser"])
+ @snapshot_clickhouse_queries
+ def test_basic_funnel_correlation_with_properties(self):
+ filters = {
+ "events": [
+ {"id": "user signed up", "type": "events", "order": 0},
+ {"id": "paid", "type": "events", "order": 1},
+ ],
+ "insight": INSIGHT_FUNNELS,
+ "date_from": "2020-01-01",
+ "date_to": "2020-01-14",
+ "funnel_correlation_type": "properties",
+ "funnel_correlation_names": ["$browser"],
+ }
+
+ for i in range(10):
+ _create_person(
+ distinct_ids=[f"user_{i}"],
+ team_id=self.team.pk,
+ properties={"$browser": "Positive"},
+ )
+ _create_event(
+ team=self.team,
+ event="user signed up",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-02T14:00:00Z",
+ )
+ _create_event(
+ team=self.team,
+ event="paid",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-04T14:00:00Z",
+ )
+
+ for i in range(10, 20):
+ _create_person(
+ distinct_ids=[f"user_{i}"],
+ team_id=self.team.pk,
+ properties={"$browser": "Negative"},
+ )
+ _create_event(
+ team=self.team,
+ event="user signed up",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-02T14:00:00Z",
+ )
+ if i % 2 == 0:
+ _create_event(
+ team=self.team,
+ event="negatively_related",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-03T14:00:00Z",
+ )
+
+ # One Positive with failure
+ _create_person(
+ distinct_ids=[f"user_fail"],
+ team_id=self.team.pk,
+ properties={"$browser": "Positive"},
+ )
+ _create_event(
+ team=self.team,
+ event="user signed up",
+ distinct_id=f"user_fail",
+ timestamp="2020-01-02T14:00:00Z",
+ )
+
+ # One Negative with success
+ _create_person(
+ distinct_ids=[f"user_succ"],
+ team_id=self.team.pk,
+ properties={"$browser": "Negative"},
+ )
+ _create_event(
+ team=self.team,
+ event="user signed up",
+ distinct_id=f"user_succ",
+ timestamp="2020-01-02T14:00:00Z",
+ )
+ _create_event(
+ team=self.team,
+ event="paid",
+ distinct_id=f"user_succ",
+ timestamp="2020-01-04T14:00:00Z",
+ )
+
+ result, _ = self._get_events_for_filters(
+ filters, funnelCorrelationType=FunnelCorrelationResultsType.properties, funnelCorrelationNames=["$browser"]
+ )
+
+ odds_ratios = [item.pop("odds_ratio") for item in result]
+
+ # Success Total = 11, Failure Total = 11
+ #
+ # Browser::Positive
+ # Success: 10
+ # Failure: 1
+
+ # Browser::Negative
+ # Success: 1
+ # Failure: 10
+
+ prior_count = 1
+ expected_odds_ratios = [
+ ((10 + prior_count) / (1 + prior_count)) * ((11 - 1 + prior_count) / (11 - 10 + prior_count)),
+ ((1 + prior_count) / (10 + prior_count)) * ((11 - 10 + prior_count) / (11 - 1 + prior_count)),
+ ]
+
+ for odds, expected_odds in zip(odds_ratios, expected_odds_ratios):
+ self.assertAlmostEqual(odds, expected_odds)
+
+ self.assertEqual(
+ result,
+ [
+ {
+ "event": "$browser::Positive",
+ "success_count": 10,
+ "failure_count": 1,
+ # "odds_ratio": 121/4,
+ "correlation_type": "success",
+ },
+ {
+ "event": "$browser::Negative",
+ "success_count": 1,
+ "failure_count": 10,
+ # "odds_ratio": 4/121,
+ "correlation_type": "failure",
+ },
+ ],
+ )
+
+ self.assertEqual(
+ len(
+ self._get_actors_for_property(
+ filters, [("$browser", "Positive", "person", None)], funnelCorrelationNames=["$browser"]
+ )
+ ),
+ 10,
+ )
+ self.assertEqual(
+ len(
+ self._get_actors_for_property(
+ filters, [("$browser", "Positive", "person", None)], False, funnelCorrelationNames=["$browser"]
+ )
+ ),
+ 1,
+ )
+ self.assertEqual(
+ len(
+ self._get_actors_for_property(
+ filters, [("$browser", "Negative", "person", None)], funnelCorrelationNames=["$browser"]
+ )
+ ),
+ 1,
+ )
+ self.assertEqual(
+ len(
+ self._get_actors_for_property(
+ filters, [("$browser", "Negative", "person", None)], False, funnelCorrelationNames=["$browser"]
+ )
+ ),
+ 10,
+ )
+
+ # TODO: Delete this test when moved to person-on-events
+ @also_test_with_materialized_columns(
+ event_properties=[], person_properties=["$browser"], verify_no_jsonextract=False
+ )
+ @snapshot_clickhouse_queries
+ def test_funnel_correlation_with_properties_and_groups(self):
+ GroupTypeMapping.objects.create(team=self.team, group_type="organization", group_type_index=0)
+
+ for i in range(10):
+ create_group(
+ team_id=self.team.pk,
+ group_type_index=0,
+ group_key=f"org:{i}",
+ properties={"industry": "positive"},
+ )
+ _create_person(
+ distinct_ids=[f"user_{i}"],
+ team_id=self.team.pk,
+ properties={"$browser": "Positive"},
+ )
+ _create_event(
+ team=self.team,
+ event="user signed up",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-02T14:00:00Z",
+ properties={"$group_0": f"org:{i}"},
+ )
+ _create_event(
+ team=self.team,
+ event="paid",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-04T14:00:00Z",
+ properties={"$group_0": f"org:{i}"},
+ )
+
+ for i in range(10, 20):
+ create_group(
+ team_id=self.team.pk,
+ group_type_index=0,
+ group_key=f"org:{i}",
+ properties={"industry": "negative"},
+ )
+ _create_person(
+ distinct_ids=[f"user_{i}"],
+ team_id=self.team.pk,
+ properties={"$browser": "Negative"},
+ )
+ _create_event(
+ team=self.team,
+ event="user signed up",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-02T14:00:00Z",
+ properties={"$group_0": f"org:{i}"},
+ )
+ if i % 2 == 0:
+ _create_event(
+ team=self.team,
+ event="negatively_related",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-03T14:00:00Z",
+ properties={"$group_0": f"org:{i}"},
+ )
+
+ # One Positive with failure
+ create_group(
+ team_id=self.team.pk,
+ group_type_index=0,
+ group_key=f"org:fail",
+ properties={"industry": "positive"},
+ )
+ _create_person(
+ distinct_ids=[f"user_fail"],
+ team_id=self.team.pk,
+ properties={"$browser": "Positive"},
+ )
+ _create_event(
+ team=self.team,
+ event="user signed up",
+ distinct_id=f"user_fail",
+ timestamp="2020-01-02T14:00:00Z",
+ properties={"$group_0": f"org:fail"},
+ )
+
+ # One Negative with success
+ create_group(
+ team_id=self.team.pk,
+ group_type_index=0,
+ group_key=f"org:succ",
+ properties={"industry": "negative"},
+ )
+ _create_person(
+ distinct_ids=[f"user_succ"],
+ team_id=self.team.pk,
+ properties={"$browser": "Negative"},
+ )
+ _create_event(
+ team=self.team,
+ event="user signed up",
+ distinct_id=f"user_succ",
+ timestamp="2020-01-02T14:00:00Z",
+ properties={"$group_0": f"org:succ"},
+ )
+ _create_event(
+ team=self.team,
+ event="paid",
+ distinct_id=f"user_succ",
+ timestamp="2020-01-04T14:00:00Z",
+ properties={"$group_0": f"org:succ"},
+ )
+
+ filters = {
+ "events": [
+ {"id": "user signed up", "type": "events", "order": 0},
+ {"id": "paid", "type": "events", "order": 1},
+ ],
+ "insight": INSIGHT_FUNNELS,
+ "date_from": "2020-01-01",
+ "date_to": "2020-01-14",
+ "aggregation_group_type_index": 0,
+ }
+
+ result, _ = self._get_events_for_filters(
+ filters, funnelCorrelationType=FunnelCorrelationResultsType.properties, funnelCorrelationNames=["industry"]
+ )
+
+ odds_ratios = [item.pop("odds_ratio") for item in result]
+
+ # Success Total = 11, Failure Total = 11
+ #
+ # Industry::Positive
+ # Success: 10
+ # Failure: 1
+
+ # Industry::Negative
+ # Success: 1
+ # Failure: 10
+
+ prior_count = 1
+ expected_odds_ratios = [
+ ((10 + prior_count) / (1 + prior_count)) * ((11 - 1 + prior_count) / (11 - 10 + prior_count)),
+ ((1 + prior_count) / (10 + prior_count)) * ((11 - 10 + prior_count) / (11 - 1 + prior_count)),
+ ]
+
+ for odds, expected_odds in zip(odds_ratios, expected_odds_ratios):
+ self.assertAlmostEqual(odds, expected_odds)
+
+ self.assertEqual(
+ result,
+ [
+ {
+ "event": "industry::positive",
+ "success_count": 10,
+ "failure_count": 1,
+ # "odds_ratio": 121/4,
+ "correlation_type": "success",
+ },
+ {
+ "event": "industry::negative",
+ "success_count": 1,
+ "failure_count": 10,
+ # "odds_ratio": 4/121,
+ "correlation_type": "failure",
+ },
+ ],
+ )
+
+ self.assertEqual(
+ len(
+ self._get_actors_for_property(
+ filters, [("industry", "positive", "group", 0)], funnelCorrelationNames=["industry"]
+ )
+ ),
+ 10,
+ )
+ self.assertEqual(
+ len(self._get_actors_for_property(filters, [("industry", "positive", "group", 0)], False)),
+ 1,
+ )
+ self.assertEqual(
+ len(self._get_actors_for_property(filters, [("industry", "negative", "group", 0)])),
+ 1,
+ )
+ self.assertEqual(
+ len(self._get_actors_for_property(filters, [("industry", "negative", "group", 0)], False)),
+ 10,
+ )
+
+ # test with `$all` as property
+ # _run property correlation with filter on all properties
+ new_result, _ = self._get_events_for_filters(
+ filters, funnelCorrelationType=FunnelCorrelationResultsType.properties, funnelCorrelationNames=["$all"]
+ )
+
+ odds_ratios = [item.pop("odds_ratio") for item in new_result]
+
+ for odds, expected_odds in zip(odds_ratios, expected_odds_ratios):
+ self.assertAlmostEqual(odds, expected_odds)
+
+ self.assertEqual(new_result, result)
+
+ @also_test_with_materialized_columns(
+ event_properties=[],
+ person_properties=["$browser"],
+ group_properties=[(0, "industry")],
+ verify_no_jsonextract=False,
+ )
+ @also_test_with_person_on_events_v2
+ @snapshot_clickhouse_queries
+ def test_funnel_correlation_with_properties_and_groups_person_on_events(self):
+ GroupTypeMapping.objects.create(team=self.team, group_type="organization", group_type_index=0)
+
+ for i in range(10):
+ create_group(
+ team_id=self.team.pk,
+ group_type_index=0,
+ group_key=f"org:{i}",
+ properties={"industry": "positive"},
+ )
+ _create_person(
+ distinct_ids=[f"user_{i}"],
+ team_id=self.team.pk,
+ properties={"$browser": "Positive"},
+ )
+ _create_event(
+ team=self.team,
+ event="user signed up",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-02T14:00:00Z",
+ properties={"$group_0": f"org:{i}"},
+ )
+ _create_event(
+ team=self.team,
+ event="paid",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-04T14:00:00Z",
+ properties={"$group_0": f"org:{i}"},
+ )
+
+ for i in range(10, 20):
+ create_group(
+ team_id=self.team.pk,
+ group_type_index=0,
+ group_key=f"org:{i}",
+ properties={"industry": "negative"},
+ )
+ _create_person(
+ distinct_ids=[f"user_{i}"],
+ team_id=self.team.pk,
+ properties={"$browser": "Negative"},
+ )
+ _create_event(
+ team=self.team,
+ event="user signed up",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-02T14:00:00Z",
+ properties={"$group_0": f"org:{i}"},
+ )
+ if i % 2 == 0:
+ _create_event(
+ team=self.team,
+ event="negatively_related",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-03T14:00:00Z",
+ properties={"$group_0": f"org:{i}"},
+ )
+
+ # One Positive with failure
+ create_group(
+ team_id=self.team.pk,
+ group_type_index=0,
+ group_key=f"org:fail",
+ properties={"industry": "positive"},
+ )
+ _create_person(
+ distinct_ids=[f"user_fail"],
+ team_id=self.team.pk,
+ properties={"$browser": "Positive"},
+ )
+ _create_event(
+ team=self.team,
+ event="user signed up",
+ distinct_id=f"user_fail",
+ timestamp="2020-01-02T14:00:00Z",
+ properties={"$group_0": f"org:fail"},
+ )
+
+ # One Negative with success
+ create_group(
+ team_id=self.team.pk,
+ group_type_index=0,
+ group_key=f"org:succ",
+ properties={"industry": "negative"},
+ )
+ _create_person(
+ distinct_ids=[f"user_succ"],
+ team_id=self.team.pk,
+ properties={"$browser": "Negative"},
+ )
+ _create_event(
+ team=self.team,
+ event="user signed up",
+ distinct_id=f"user_succ",
+ timestamp="2020-01-02T14:00:00Z",
+ properties={"$group_0": f"org:succ"},
+ )
+ _create_event(
+ team=self.team,
+ event="paid",
+ distinct_id=f"user_succ",
+ timestamp="2020-01-04T14:00:00Z",
+ properties={"$group_0": f"org:succ"},
+ )
+
+ filters = {
+ "events": [
+ {"id": "user signed up", "type": "events", "order": 0},
+ {"id": "paid", "type": "events", "order": 1},
+ ],
+ "insight": INSIGHT_FUNNELS,
+ "date_from": "2020-01-01",
+ "date_to": "2020-01-14",
+ "aggregation_group_type_index": 0,
+ }
+
+ with override_instance_config("PERSON_ON_EVENTS_ENABLED", True):
+ result, _ = self._get_events_for_filters(
+ filters,
+ funnelCorrelationType=FunnelCorrelationResultsType.properties,
+ funnelCorrelationNames=["industry"],
+ )
+
+ odds_ratios = [item.pop("odds_ratio") for item in result]
+
+ # Success Total = 11, Failure Total = 11
+ #
+ # Industry::Positive
+ # Success: 10
+ # Failure: 1
+
+ # Industry::Negative
+ # Success: 1
+ # Failure: 10
+
+ prior_count = 1
+ expected_odds_ratios = [
+ ((10 + prior_count) / (1 + prior_count)) * ((11 - 1 + prior_count) / (11 - 10 + prior_count)),
+ ((1 + prior_count) / (10 + prior_count)) * ((11 - 10 + prior_count) / (11 - 1 + prior_count)),
+ ]
+
+ for odds, expected_odds in zip(odds_ratios, expected_odds_ratios):
+ self.assertAlmostEqual(odds, expected_odds)
+
+ self.assertEqual(
+ result,
+ [
+ {
+ "event": "industry::positive",
+ "success_count": 10,
+ "failure_count": 1,
+ # "odds_ratio": 121/4,
+ "correlation_type": "success",
+ },
+ {
+ "event": "industry::negative",
+ "success_count": 1,
+ "failure_count": 10,
+ # "odds_ratio": 4/121,
+ "correlation_type": "failure",
+ },
+ ],
+ )
+
+ self.assertEqual(
+ len(self._get_actors_for_property(filters, [("industry", "positive", "group", 0)])),
+ 10,
+ )
+ self.assertEqual(
+ len(self._get_actors_for_property(filters, [("industry", "positive", "group", 0)], False)),
+ 1,
+ )
+ self.assertEqual(
+ len(self._get_actors_for_property(filters, [("industry", "negative", "group", 0)])),
+ 1,
+ )
+ self.assertEqual(
+ len(self._get_actors_for_property(filters, [("industry", "negative", "group", 0)], False)),
+ 10,
+ )
+
+ # test with `$all` as property
+ # _run property correlation with filter on all properties
+ new_result, _ = self._get_events_for_filters(
+ filters,
+ funnelCorrelationType=FunnelCorrelationResultsType.properties,
+ funnelCorrelationNames=["$all"],
+ )
+
+ odds_ratios = [item.pop("odds_ratio") for item in new_result]
+
+ for odds, expected_odds in zip(odds_ratios, expected_odds_ratios):
+ self.assertAlmostEqual(odds, expected_odds)
+
+ self.assertEqual(new_result, result)
+
+ def test_no_divide_by_zero_errors(self):
+ filters = {
+ "events": [
+ {"id": "user signed up", "type": "events", "order": 0},
+ {"id": "paid", "type": "events", "order": 1},
+ ],
+ "insight": INSIGHT_FUNNELS,
+ "date_from": "2020-01-01",
+ "date_to": "2020-01-14",
+ }
+
+ for i in range(2):
+ _create_person(
+ distinct_ids=[f"user_{i}"],
+ team_id=self.team.pk,
+ properties={"$browser": "Positive"},
+ )
+ _create_event(
+ team=self.team,
+ event="user signed up",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-02T14:00:00Z",
+ )
+ # failure count for this event is 0
+ _create_event(
+ team=self.team,
+ event="positive",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-03T14:00:00Z",
+ )
+ _create_event(
+ team=self.team,
+ event="paid",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-04T14:00:00Z",
+ )
+
+ for i in range(2, 4):
+ _create_person(
+ distinct_ids=[f"user_{i}"],
+ team_id=self.team.pk,
+ properties={"$browser": "Negative"},
+ )
+ _create_event(
+ team=self.team,
+ event="user signed up",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-02T14:00:00Z",
+ )
+ if i % 2 == 0:
+ # success count for this event is 0
+ _create_event(
+ team=self.team,
+ event="negatively_related",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-03T14:00:00Z",
+ )
+
+ result, skewed_totals = self._get_events_for_filters(filters)
+
+ self.assertFalse(skewed_totals)
+
+ odds_ratios = [item.pop("odds_ratio") for item in result]
+ expected_odds_ratios = [9, 1 / 3]
+
+ for odds, expected_odds in zip(odds_ratios, expected_odds_ratios):
+ self.assertAlmostEqual(odds, expected_odds)
+
+ self.assertEqual(
+ result,
+ [
+ {
+ "event": "positive",
+ "success_count": 2,
+ "failure_count": 0,
+ # "odds_ratio": 9.0,
+ "correlation_type": "success",
+ },
+ {
+ "event": "negatively_related",
+ "success_count": 0,
+ "failure_count": 1,
+ # "odds_ratio": 1 / 3,
+ "correlation_type": "failure",
+ },
+ ],
+ )
+
+ def test_correlation_with_properties_raises_validation_error(self):
+ filters = {
+ "events": [
+ {"id": "user signed up", "type": "events", "order": 0},
+ {"id": "paid", "type": "events", "order": 1},
+ ],
+ "insight": INSIGHT_FUNNELS,
+ "date_from": "2020-01-01",
+ "date_to": "2020-01-14",
+ }
+
+ _create_person(
+ distinct_ids=[f"user_1"],
+ team_id=self.team.pk,
+ properties={"$browser": "Positive"},
+ )
+ _create_event(
+ team=self.team,
+ event="user signed up",
+ distinct_id=f"user_1",
+ timestamp="2020-01-02T14:00:00Z",
+ )
+ _create_event(
+ team=self.team,
+ event="rick",
+ distinct_id=f"user_1",
+ timestamp="2020-01-03T14:00:00Z",
+ )
+ _create_event(
+ team=self.team,
+ event="paid",
+ distinct_id=f"user_1",
+ timestamp="2020-01-04T14:00:00Z",
+ )
+ flush_persons_and_events()
+
+ with self.assertRaises(ValidationError):
+ self._get_events_for_filters(
+ filters,
+ funnelCorrelationType=FunnelCorrelationResultsType.properties,
+ # funnelCorrelationNames=["$browser"] -- missing
+ )
+
+ with self.assertRaises(ValidationError):
+ self._get_events_for_filters(
+ filters,
+ funnelCorrelationType=FunnelCorrelationResultsType.event_with_properties,
+ # "funnelCorrelationEventNames": ["rick"] -- missing
+ )
+
+ @also_test_with_materialized_columns(
+ event_properties=[], person_properties=["$browser"], verify_no_jsonextract=False
+ )
+ def test_correlation_with_multiple_properties(self):
+ filters = {
+ "events": [
+ {"id": "user signed up", "type": "events", "order": 0},
+ {"id": "paid", "type": "events", "order": 1},
+ ],
+ "insight": INSIGHT_FUNNELS,
+ "date_from": "2020-01-01",
+ "date_to": "2020-01-14",
+ }
+
+ # 5 successful people with both properties
+ for i in range(5):
+ _create_person(
+ distinct_ids=[f"user_{i}"],
+ team_id=self.team.pk,
+ properties={"$browser": "Positive", "$nice": "very"},
+ )
+ _create_event(
+ team=self.team,
+ event="user signed up",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-02T14:00:00Z",
+ )
+ _create_event(
+ team=self.team,
+ event="paid",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-04T14:00:00Z",
+ )
+
+ # 10 successful people with some different properties
+ for i in range(5, 15):
+ _create_person(
+ distinct_ids=[f"user_{i}"],
+ team_id=self.team.pk,
+ properties={"$browser": "Positive", "$nice": "not"},
+ )
+ _create_event(
+ team=self.team,
+ event="user signed up",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-02T14:00:00Z",
+ )
+ _create_event(
+ team=self.team,
+ event="paid",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-04T14:00:00Z",
+ )
+
+ # 5 Unsuccessful people with some common properties
+ for i in range(15, 20):
+ _create_person(
+ distinct_ids=[f"user_{i}"],
+ team_id=self.team.pk,
+ properties={"$browser": "Negative", "$nice": "smh"},
+ )
+ _create_event(
+ team=self.team,
+ event="user signed up",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-02T14:00:00Z",
+ )
+
+ # One Positive with failure, no $nice property
+ _create_person(
+ distinct_ids=[f"user_fail"],
+ team_id=self.team.pk,
+ properties={"$browser": "Positive"},
+ )
+ _create_event(
+ team=self.team,
+ event="user signed up",
+ distinct_id=f"user_fail",
+ timestamp="2020-01-02T14:00:00Z",
+ )
+
+ # One Negative with success, no $nice property
+ _create_person(
+ distinct_ids=[f"user_succ"],
+ team_id=self.team.pk,
+ properties={"$browser": "Negative"},
+ )
+ _create_event(
+ team=self.team,
+ event="user signed up",
+ distinct_id=f"user_succ",
+ timestamp="2020-01-02T14:00:00Z",
+ )
+ _create_event(
+ team=self.team,
+ event="paid",
+ distinct_id=f"user_succ",
+ timestamp="2020-01-04T14:00:00Z",
+ )
+
+ result, _ = self._get_events_for_filters(
+ filters,
+ funnelCorrelationType=FunnelCorrelationResultsType.properties,
+ funnelCorrelationNames=["$browser", "$nice"],
+ )
+
+ # Success Total = 5 + 10 + 1 = 16
+ # Failure Total = 5 + 1 = 6
+ # Add 1 for priors
+
+ odds_ratios = [item.pop("odds_ratio") for item in result]
+ expected_odds_ratios = [
+ (16 / 2) * ((7 - 1) / (17 - 15)),
+ (11 / 1) * ((7 - 0) / (17 - 10)),
+ (6 / 1) * ((7 - 0) / (17 - 5)),
+ (1 / 6) * ((7 - 5) / (17 - 0)),
+ (2 / 6) * ((7 - 5) / (17 - 1)),
+ (2 / 2) * ((7 - 1) / (17 - 1)),
+ ]
+ # (success + 1) / (failure + 1)
+
+ for odds, expected_odds in zip(odds_ratios, expected_odds_ratios):
+ self.assertAlmostEqual(odds, expected_odds)
+
+ expected_result = [
+ {
+ "event": "$browser::Positive",
+ "success_count": 15,
+ "failure_count": 1,
+ # "odds_ratio": 24,
+ "correlation_type": "success",
+ },
+ {
+ "event": "$nice::not",
+ "success_count": 10,
+ "failure_count": 0,
+ # "odds_ratio": 11,
+ "correlation_type": "success",
+ },
+ {
+ "event": "$nice::very",
+ "success_count": 5,
+ "failure_count": 0,
+ # "odds_ratio": 3.5,
+ "correlation_type": "success",
+ },
+ {
+ "event": "$nice::smh",
+ "success_count": 0,
+ "failure_count": 5,
+ # "odds_ratio": 0.0196078431372549,
+ "correlation_type": "failure",
+ },
+ {
+ "event": "$browser::Negative",
+ "success_count": 1,
+ "failure_count": 5,
+ # "odds_ratio": 0.041666666666666664,
+ "correlation_type": "failure",
+ },
+ {
+ "event": "$nice::",
+ "success_count": 1,
+ "failure_count": 1,
+ # "odds_ratio": 0.375,
+ "correlation_type": "failure",
+ },
+ ]
+
+ self.assertEqual(result, expected_result)
+
+ # _run property correlation with filter on all properties
+ new_result, _ = self._get_events_for_filters(
+ filters, funnelCorrelationType=FunnelCorrelationResultsType.properties, funnelCorrelationNames=["$all"]
+ )
+
+ odds_ratios = [item.pop("odds_ratio") for item in new_result]
+
+ new_expected_odds_ratios = expected_odds_ratios[:-1]
+ new_expected_result = expected_result[:-1]
+ # When querying all properties, we don't consider properties that don't exist for part of the data
+ # since users aren't explicitly asking for that property. Thus,
+ # We discard $nice:: because it's an empty result set
+
+ for odds, expected_odds in zip(odds_ratios, new_expected_odds_ratios):
+ self.assertAlmostEqual(odds, expected_odds)
+
+ self.assertEqual(new_result, new_expected_result)
+
+ # search for $all but exclude $browser
+ new_result, _ = self._get_events_for_filters(
+ filters,
+ funnelCorrelationType=FunnelCorrelationResultsType.properties,
+ funnelCorrelationNames=["$all"],
+ funnelCorrelationExcludeNames=["$browser"],
+ )
+
+ odds_ratios = [item.pop("odds_ratio") for item in new_result]
+
+ new_expected_odds_ratios = expected_odds_ratios[1:4] # choosing the $nice property values
+ new_expected_result = expected_result[1:4]
+
+ for odds, expected_odds in zip(odds_ratios, new_expected_odds_ratios):
+ self.assertAlmostEqual(odds, expected_odds)
+
+ self.assertEqual(new_result, new_expected_result)
+
+ self.assertEqual(
+ len(
+ self._get_actors_for_property(
+ filters, [("$nice", "not", "person", None)], funnelCorrelationNames=["$browser", "$nice"]
+ )
+ ),
+ 10,
+ )
+ # self.assertEqual(
+ # len(
+ # self._get_actors_for_property(
+ # filters, [("$nice", "", "person", None)], False, funnelCorrelationNames=["$browser", "$nice"]
+ # )
+ # ),
+ # 1,
+ # )
+ self.assertEqual(
+ len(
+ self._get_actors_for_property(
+ filters, [("$nice", "very", "person", None)], funnelCorrelationNames=["$browser", "$nice"]
+ )
+ ),
+ 5,
+ )
+
+ def test_discarding_insignificant_events(self):
+ filters = {
+ "events": [
+ {"id": "user signed up", "type": "events", "order": 0},
+ {"id": "paid", "type": "events", "order": 1},
+ ],
+ "insight": INSIGHT_FUNNELS,
+ "date_from": "2020-01-01",
+ "date_to": "2020-01-14",
+ }
+
+ for i in range(10):
+ _create_person(distinct_ids=[f"user_{i}"], team_id=self.team.pk)
+ _create_event(
+ team=self.team,
+ event="user signed up",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-02T14:00:00Z",
+ )
+ if i % 2 == 0:
+ _create_event(
+ team=self.team,
+ event="positively_related",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-03T14:00:00Z",
+ )
+ if i % 10 == 0:
+ _create_event(
+ team=self.team,
+ event="low_sig_positively_related",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-03T14:20:00Z",
+ )
+ _create_event(
+ team=self.team,
+ event="paid",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-04T14:00:00Z",
+ )
+
+ for i in range(10, 20):
+ _create_person(distinct_ids=[f"user_{i}"], team_id=self.team.pk)
+ _create_event(
+ team=self.team,
+ event="user signed up",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-02T14:00:00Z",
+ )
+ if i % 2 == 0:
+ _create_event(
+ team=self.team,
+ event="negatively_related",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-03T14:00:00Z",
+ )
+ if i % 5 == 0:
+ _create_event(
+ team=self.team,
+ event="low_sig_negatively_related",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-03T14:00:00Z",
+ )
+
+ # Â Total 10 positive, 10 negative
+ # low sig count = 1 and 2, high sig count >= 5
+ # Thus, to discard the low sig count, % needs to be >= 10%, or count >= 2
+
+ # Discard both due to %
+ FunnelCorrelationQueryRunner.MIN_PERSON_PERCENTAGE = 0.11
+ FunnelCorrelationQueryRunner.MIN_PERSON_COUNT = 25
+ result, _ = self._get_events_for_filters(filters, funnelCorrelationType=FunnelCorrelationResultsType.events)
+
+ self.assertEqual(len(result), 2)
+
+ def test_events_within_conversion_window_for_correlation(self):
+ filters = {
+ "events": [
+ {"id": "user signed up", "type": "events", "order": 0},
+ {"id": "paid", "type": "events", "order": 1},
+ ],
+ "insight": INSIGHT_FUNNELS,
+ "funnel_window_interval": "10",
+ "funnel_window_interval_unit": "minute",
+ "date_from": "2020-01-01",
+ "date_to": "2020-01-14",
+ }
+
+ _create_person(distinct_ids=["user_successful"], team_id=self.team.pk)
+ _create_event(
+ team=self.team,
+ event="user signed up",
+ distinct_id="user_successful",
+ timestamp="2020-01-02T14:00:00Z",
+ )
+ _create_event(
+ team=self.team,
+ event="positively_related",
+ distinct_id="user_successful",
+ timestamp="2020-01-02T14:02:00Z",
+ )
+ _create_event(
+ team=self.team,
+ event="paid",
+ distinct_id="user_successful",
+ timestamp="2020-01-02T14:06:00Z",
+ )
+
+ _create_person(distinct_ids=["user_dropoff"], team_id=self.team.pk)
+ _create_event(
+ team=self.team,
+ event="user signed up",
+ distinct_id="user_dropoff",
+ timestamp="2020-01-02T14:00:00Z",
+ )
+ _create_event(
+ team=self.team,
+ event="NOT_negatively_related",
+ distinct_id="user_dropoff",
+ timestamp="2020-01-02T14:15:00Z", # event happened outside conversion window
+ )
+
+ result, _ = self._get_events_for_filters(filters, funnelCorrelationType=FunnelCorrelationResultsType.events)
+
+ odds_ratios = [item.pop("odds_ratio") for item in result]
+ expected_odds_ratios = [4]
+
+ for odds, expected_odds in zip(odds_ratios, expected_odds_ratios):
+ self.assertAlmostEqual(odds, expected_odds)
+
+ self.assertEqual(
+ result,
+ [
+ {
+ "event": "positively_related",
+ "success_count": 1,
+ "failure_count": 0,
+ # "odds_ratio": 4.0,
+ "correlation_type": "success",
+ }
+ ],
+ )
+
+ @also_test_with_materialized_columns(["blah", "signup_source"], verify_no_jsonextract=False)
+ def test_funnel_correlation_with_event_properties(self):
+ filters = {
+ "events": [
+ {"id": "user signed up", "type": "events", "order": 0},
+ {"id": "paid", "type": "events", "order": 1},
+ ],
+ "insight": INSIGHT_FUNNELS,
+ "date_from": "2020-01-01",
+ "date_to": "2020-01-14",
+ }
+
+ for i in range(10):
+ _create_person(distinct_ids=[f"user_{i}"], team_id=self.team.pk)
+ _create_event(
+ team=self.team,
+ event="user signed up",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-02T14:00:00Z",
+ )
+ if i % 2 == 0:
+ _create_event(
+ team=self.team,
+ event="positively_related",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-03T14:00:00Z",
+ properties={
+ "signup_source": "facebook" if i % 4 == 0 else "email",
+ "blah": "value_bleh",
+ },
+ )
+ # source: email occurs only twice, so would be discarded from result set
+ _create_event(
+ team=self.team,
+ event="paid",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-04T14:00:00Z",
+ )
+
+ for i in range(10, 20):
+ _create_person(distinct_ids=[f"user_{i}"], team_id=self.team.pk)
+ _create_event(
+ team=self.team,
+ event="user signed up",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-02T14:00:00Z",
+ )
+ if i % 2 == 0:
+ _create_event(
+ team=self.team,
+ event="negatively_related",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-03T14:00:00Z",
+ properties={"signup_source": "shazam" if i % 6 == 0 else "email"},
+ )
+ # source: shazam occurs only once, so would be discarded from result set
+
+ result, _ = self._get_events_for_filters(
+ filters,
+ funnelCorrelationType=FunnelCorrelationResultsType.event_with_properties,
+ funnelCorrelationEventNames=[
+ "positively_related",
+ "negatively_related",
+ ],
+ )
+
+ odds_ratios = [item.pop("odds_ratio") for item in result]
+ expected_odds_ratios = [11, 5.5, 2 / 11]
+
+ for odds, expected_odds in zip(odds_ratios, expected_odds_ratios):
+ self.assertAlmostEqual(odds, expected_odds)
+
+ self.assertEqual(
+ result,
+ [
+ {
+ "event": "positively_related::blah::value_bleh",
+ "success_count": 5,
+ "failure_count": 0,
+ # "odds_ratio": 11.0,
+ "correlation_type": "success",
+ },
+ {
+ "event": "positively_related::signup_source::facebook",
+ "success_count": 3,
+ "failure_count": 0,
+ # "odds_ratio": 5.5,
+ "correlation_type": "success",
+ },
+ {
+ "event": "negatively_related::signup_source::email",
+ "success_count": 0,
+ "failure_count": 3,
+ # "odds_ratio": 0.18181818181818182,
+ "correlation_type": "failure",
+ },
+ ],
+ )
+
+ self.assertEqual(
+ len(
+ self._get_actors_for_event(
+ filters,
+ "positively_related",
+ [EventPropertyFilter(operator=PropertyOperator.exact, key="blah", value="value_bleh")],
+ )
+ ),
+ 5,
+ )
+ self.assertEqual(
+ len(
+ self._get_actors_for_event(
+ filters,
+ "positively_related",
+ [EventPropertyFilter(operator=PropertyOperator.exact, key="signup_source", value="facebook")],
+ )
+ ),
+ 3,
+ )
+ self.assertEqual(
+ len(
+ self._get_actors_for_event(
+ filters,
+ "positively_related",
+ [EventPropertyFilter(operator=PropertyOperator.exact, key="signup_source", value="facebook")],
+ False,
+ )
+ ),
+ 0,
+ )
+ self.assertEqual(
+ len(
+ self._get_actors_for_event(
+ filters,
+ "negatively_related",
+ [EventPropertyFilter(operator=PropertyOperator.exact, key="signup_source", value="email")],
+ False,
+ )
+ ),
+ 3,
+ )
+
+ @also_test_with_materialized_columns(["blah", "signup_source"], verify_no_jsonextract=False)
+ @snapshot_clickhouse_queries
+ def test_funnel_correlation_with_event_properties_and_groups(self):
+ # same test as test_funnel_correlation_with_event_properties but with events attached to groups
+ GroupTypeMapping.objects.create(team=self.team, group_type="organization", group_type_index=1)
+
+ for i in range(10):
+ create_group(
+ team_id=self.team.pk,
+ group_type_index=1,
+ group_key=f"org:{i}",
+ properties={"industry": "positive"},
+ )
+ _create_person(distinct_ids=[f"user_{i}"], team_id=self.team.pk)
+ _create_event(
+ team=self.team,
+ event="user signed up",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-02T14:00:00Z",
+ properties={"$group_1": f"org:{i}"},
+ )
+ if i % 2 == 0:
+ _create_event(
+ team=self.team,
+ event="positively_related",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-03T14:00:00Z",
+ properties={
+ "signup_source": "facebook" if i % 4 == 0 else "email",
+ "blah": "value_bleh",
+ "$group_1": f"org:{i}",
+ },
+ )
+ # source: email occurs only twice, so would be discarded from result set
+ _create_event(
+ team=self.team,
+ event="paid",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-04T14:00:00Z",
+ properties={"$group_1": f"org:{i}"},
+ )
+
+ for i in range(10, 20):
+ create_group(
+ team_id=self.team.pk,
+ group_type_index=1,
+ group_key=f"org:{i}",
+ properties={"industry": "positive"},
+ )
+ _create_person(distinct_ids=[f"user_{i}"], team_id=self.team.pk)
+ _create_event(
+ team=self.team,
+ event="user signed up",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-02T14:00:00Z",
+ properties={"$group_1": f"org:{i}"},
+ )
+ if i % 2 == 0:
+ _create_event(
+ team=self.team,
+ event="negatively_related",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-03T14:00:00Z",
+ properties={
+ "signup_source": "shazam" if i % 6 == 0 else "email",
+ "$group_1": f"org:{i}",
+ },
+ )
+ # source: shazam occurs only once, so would be discarded from result set
+
+ filters = {
+ "events": [
+ {"id": "user signed up", "type": "events", "order": 0},
+ {"id": "paid", "type": "events", "order": 1},
+ ],
+ "insight": INSIGHT_FUNNELS,
+ "date_from": "2020-01-01",
+ "date_to": "2020-01-14",
+ "aggregation_group_type_index": 1,
+ }
+
+ result, _ = self._get_events_for_filters(
+ filters,
+ funnelCorrelationType=FunnelCorrelationResultsType.event_with_properties,
+ funnelCorrelationEventNames=[
+ "positively_related",
+ "negatively_related",
+ ],
+ )
+
+ odds_ratios = [item.pop("odds_ratio") for item in result]
+ expected_odds_ratios = [11, 5.5, 2 / 11]
+
+ for odds, expected_odds in zip(odds_ratios, expected_odds_ratios):
+ self.assertAlmostEqual(odds, expected_odds)
+
+ self.assertEqual(
+ result,
+ [
+ {
+ "event": "positively_related::blah::value_bleh",
+ "success_count": 5,
+ "failure_count": 0,
+ # "odds_ratio": 11.0,
+ "correlation_type": "success",
+ },
+ {
+ "event": "positively_related::signup_source::facebook",
+ "success_count": 3,
+ "failure_count": 0,
+ # "odds_ratio": 5.5,
+ "correlation_type": "success",
+ },
+ {
+ "event": "negatively_related::signup_source::email",
+ "success_count": 0,
+ "failure_count": 3,
+ # "odds_ratio": 0.18181818181818182,
+ "correlation_type": "failure",
+ },
+ ],
+ )
+
+ def test_funnel_correlation_with_event_properties_exclusions(self):
+ filters = {
+ "events": [
+ {"id": "user signed up", "type": "events", "order": 0},
+ {"id": "paid", "type": "events", "order": 1},
+ ],
+ "insight": INSIGHT_FUNNELS,
+ "date_from": "2020-01-01",
+ "date_to": "2020-01-14",
+ }
+
+ # Need more than 2 events to get a correlation
+ for i in range(3):
+ _create_person(distinct_ids=[f"user_{i}"], team_id=self.team.pk)
+ _create_event(
+ team=self.team,
+ event="user signed up",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-02T14:00:00Z",
+ )
+ _create_event(
+ team=self.team,
+ event="positively_related",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-03T14:00:00Z",
+ properties={"signup_source": "facebook", "blah": "value_bleh"},
+ )
+ _create_event(
+ team=self.team,
+ event="paid",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-04T14:00:00Z",
+ )
+
+ # Atleast one person that fails, to ensure we get results
+ _create_person(distinct_ids=[f"user_fail"], team_id=self.team.pk)
+ _create_event(
+ team=self.team,
+ event="user signed up",
+ distinct_id=f"user_fail",
+ timestamp="2020-01-02T14:00:00Z",
+ )
+
+ result, _ = self._get_events_for_filters(
+ filters,
+ funnelCorrelationType=FunnelCorrelationResultsType.event_with_properties,
+ funnelCorrelationEventNames=["positively_related"],
+ funnelCorrelationEventExcludePropertyNames=["signup_source"],
+ )
+
+ self.assertEqual(
+ result,
+ [
+ {
+ "event": "positively_related::blah::value_bleh",
+ "success_count": 3,
+ "failure_count": 0,
+ "odds_ratio": 8,
+ "correlation_type": "success",
+ },
+ # Â missing signup_source, as expected
+ ],
+ )
+
+ self.assertEqual(
+ len(
+ self._get_actors_for_event(
+ filters,
+ "positively_related",
+ [EventPropertyFilter(operator=PropertyOperator.exact, key="blah", value="value_bleh")],
+ )
+ ),
+ 3,
+ )
+
+ # If you search for persons with a specific property, even if excluded earlier, you should get them
+ self.assertEqual(
+ len(
+ self._get_actors_for_event(
+ filters,
+ "positively_related",
+ [EventPropertyFilter(operator=PropertyOperator.exact, key="signup_source", value="facebook")],
+ )
+ ),
+ 3,
+ )
+
+ # :FIXME: This should also work with materialized columns
+ # @also_test_with_materialized_columns(["$event_type", "signup_source"])
+ def test_funnel_correlation_with_event_properties_autocapture(self):
+ filters = {
+ "events": [
+ {"id": "user signed up", "type": "events", "order": 0},
+ {"id": "paid", "type": "events", "order": 1},
+ ],
+ "insight": INSIGHT_FUNNELS,
+ "date_from": "2020-01-01",
+ "date_to": "2020-01-14",
+ }
+
+ # Need a minimum of 3 hits to get a correlation result
+ for i in range(6):
+ _create_person(distinct_ids=[f"user_{i}"], team_id=self.team.pk)
+ _create_event(
+ team=self.team,
+ event="user signed up",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-02T14:00:00Z",
+ )
+ _create_event(
+ team=self.team,
+ event="$autocapture",
+ distinct_id=f"user_{i}",
+ elements=[Element(nth_of_type=1, nth_child=0, tag_name="a", href="/movie")],
+ timestamp="2020-01-03T14:00:00Z",
+ properties={"signup_source": "email", "$event_type": "click"},
+ )
+ # Test two different types of autocapture elements, with different counts, so we can accurately test results
+ if i % 2 == 0:
+ _create_event(
+ team=self.team,
+ event="$autocapture",
+ distinct_id=f"user_{i}",
+ elements=[
+ Element(
+ nth_of_type=1,
+ nth_child=0,
+ tag_name="button",
+ text="Pay $10",
+ )
+ ],
+ timestamp="2020-01-03T14:00:00Z",
+ properties={"signup_source": "facebook", "$event_type": "submit"},
+ )
+
+ _create_event(
+ team=self.team,
+ event="paid",
+ distinct_id=f"user_{i}",
+ timestamp="2020-01-04T14:00:00Z",
+ )
+
+ # Atleast one person that fails, to ensure we get results
+ _create_person(distinct_ids=[f"user_fail"], team_id=self.team.pk)
+ _create_event(
+ team=self.team,
+ event="user signed up",
+ distinct_id=f"user_fail",
+ timestamp="2020-01-02T14:00:00Z",
+ )
+
+ result, _ = self._get_events_for_filters(
+ filters,
+ funnelCorrelationType=FunnelCorrelationResultsType.event_with_properties,
+ funnelCorrelationEventNames=["$autocapture"],
+ )
+
+ # $autocapture results only return elements chain
+ self.assertEqual(
+ result,
+ [
+ {
+ "event": '$autocapture::elements_chain::click__~~__a:href="/movie"nth-child="0"nth-of-type="1"',
+ "success_count": 6,
+ "failure_count": 0,
+ "odds_ratio": 14.0,
+ "correlation_type": "success",
+ },
+ {
+ "event": '$autocapture::elements_chain::submit__~~__button:nth-child="0"nth-of-type="1"text="Pay $10"',
+ "success_count": 3,
+ "failure_count": 0,
+ "odds_ratio": 2.0,
+ "correlation_type": "success",
+ },
+ ],
+ )
+
+ # self.assertEqual(
+ # len(self._get_actors_for_event(filter, "$autocapture", {"signup_source": "facebook"})),
+ # 3,
+ # )
+ # self.assertEqual(
+ # len(self._get_actors_for_event(filter, "$autocapture", {"$event_type": "click"})),
+ # 6,
+ # )
+ # self.assertEqual(
+ # len(
+ # self._get_actors_for_event(
+ # filter,
+ # "$autocapture",
+ # [
+ # {
+ # "key": "tag_name",
+ # "operator": "exact",
+ # "type": "element",
+ # "value": "button",
+ # },
+ # {
+ # "key": "text",
+ # "operator": "exact",
+ # "type": "element",
+ # "value": "Pay $10",
+ # },
+ # ],
+ # )
+ # ),
+ # 3,
+ # )
+ # self.assertEqual(
+ # len(
+ # self._get_actors_for_event(
+ # filter,
+ # "$autocapture",
+ # [
+ # {
+ # "key": "tag_name",
+ # "operator": "exact",
+ # "type": "element",
+ # "value": "a",
+ # },
+ # {
+ # "key": "href",
+ # "operator": "exact",
+ # "type": "element",
+ # "value": "/movie",
+ # },
+ # ],
+ # )
+ # ),
+ # 6,
+ # )
+
+
+class TestCorrelationFunctions(unittest.TestCase):
+ def test_are_results_insignificant(self):
+ # Same setup as above test: test_discarding_insignificant_events
+ contingency_tables = [
+ EventContingencyTable(
+ event="negatively_related",
+ visited=EventStats(success_count=0, failure_count=5),
+ success_total=10,
+ failure_total=10,
+ ),
+ EventContingencyTable(
+ event="positively_related",
+ visited=EventStats(success_count=5, failure_count=0),
+ success_total=10,
+ failure_total=10,
+ ),
+ EventContingencyTable(
+ event="low_sig_negatively_related",
+ visited=EventStats(success_count=0, failure_count=2),
+ success_total=10,
+ failure_total=10,
+ ),
+ EventContingencyTable(
+ event="low_sig_positively_related",
+ visited=EventStats(success_count=1, failure_count=0),
+ success_total=10,
+ failure_total=10,
+ ),
+ ]
+
+ # Discard both low_sig due to %
+ FunnelCorrelationQueryRunner.MIN_PERSON_PERCENTAGE = 0.11
+ FunnelCorrelationQueryRunner.MIN_PERSON_COUNT = 25
+ result = [
+ 1
+ for contingency_table in contingency_tables
+ if not FunnelCorrelationQueryRunner.are_results_insignificant(contingency_table)
+ ]
+ self.assertEqual(len(result), 2)
+
+ # Discard one low_sig due to %
+ FunnelCorrelationQueryRunner.MIN_PERSON_PERCENTAGE = 0.051
+ FunnelCorrelationQueryRunner.MIN_PERSON_COUNT = 25
+ result = [
+ 1
+ for contingency_table in contingency_tables
+ if not FunnelCorrelationQueryRunner.are_results_insignificant(contingency_table)
+ ]
+ self.assertEqual(len(result), 3)
+
+ # Discard both due to count
+ FunnelCorrelationQueryRunner.MIN_PERSON_PERCENTAGE = 0.5
+ FunnelCorrelationQueryRunner.MIN_PERSON_COUNT = 3
+ result = [
+ 1
+ for contingency_table in contingency_tables
+ if not FunnelCorrelationQueryRunner.are_results_insignificant(contingency_table)
+ ]
+ self.assertEqual(len(result), 2)
+
+ # Discard one due to count
+ FunnelCorrelationQueryRunner.MIN_PERSON_PERCENTAGE = 0.5
+ FunnelCorrelationQueryRunner.MIN_PERSON_COUNT = 2
+ result = [
+ 1
+ for contingency_table in contingency_tables
+ if not FunnelCorrelationQueryRunner.are_results_insignificant(contingency_table)
+ ]
+ self.assertEqual(len(result), 3)
+
+ # Discard everything due to %
+ FunnelCorrelationQueryRunner.MIN_PERSON_PERCENTAGE = 0.5
+ FunnelCorrelationQueryRunner.MIN_PERSON_COUNT = 100
+ result = [
+ 1
+ for contingency_table in contingency_tables
+ if not FunnelCorrelationQueryRunner.are_results_insignificant(contingency_table)
+ ]
+ self.assertEqual(len(result), 0)
+
+ # Discard everything due to count
+ FunnelCorrelationQueryRunner.MIN_PERSON_PERCENTAGE = 0.5
+ FunnelCorrelationQueryRunner.MIN_PERSON_COUNT = 6
+ result = [
+ 1
+ for contingency_table in contingency_tables
+ if not FunnelCorrelationQueryRunner.are_results_insignificant(contingency_table)
+ ]
+ self.assertEqual(len(result), 0)
diff --git a/posthog/hogql_queries/insights/funnels/test/test_funnel_correlations_persons.py b/posthog/hogql_queries/insights/funnels/test/test_funnel_correlations_persons.py
new file mode 100644
index 0000000000000..f324dcfcf7c3a
--- /dev/null
+++ b/posthog/hogql_queries/insights/funnels/test/test_funnel_correlations_persons.py
@@ -0,0 +1,647 @@
+from typing import Any, Dict, Optional, cast
+from datetime import datetime, timedelta
+from uuid import UUID
+
+from django.utils import timezone
+from freezegun import freeze_time
+
+from posthog.constants import INSIGHT_FUNNELS
+from posthog.hogql_queries.actors_query_runner import ActorsQueryRunner
+from posthog.hogql_queries.legacy_compatibility.filter_to_query import filter_to_query
+from posthog.models.team.team import Team
+from posthog.schema import (
+ ActorsQuery,
+ EventsNode,
+ FunnelCorrelationActorsQuery,
+ FunnelCorrelationQuery,
+ FunnelCorrelationResultsType,
+ FunnelsActorsQuery,
+ FunnelsQuery,
+)
+from posthog.session_recordings.queries.test.session_replay_sql import (
+ produce_replay_summary,
+)
+from posthog.test.base import (
+ APIBaseTest,
+ ClickhouseTestMixin,
+ _create_event,
+ _create_person,
+ snapshot_clickhouse_queries,
+)
+from posthog.test.test_journeys import journeys_for
+
+FORMAT_TIME = "%Y-%m-%d 00:00:00"
+MAX_STEP_COLUMN = 0
+COUNT_COLUMN = 1
+PERSON_ID_COLUMN = 2
+
+
+def get_actors(
+ filters: Dict[str, Any],
+ team: Team,
+ funnelCorrelationType: Optional[FunnelCorrelationResultsType] = FunnelCorrelationResultsType.events,
+ funnelCorrelationNames=None,
+ funnelCorrelationPersonConverted: Optional[bool] = None,
+ funnelCorrelationPersonEntity: Optional[EventsNode] = None,
+ funnelCorrelationPropertyValues=None,
+ includeRecordings: Optional[bool] = True,
+):
+ funnels_query = cast(FunnelsQuery, filter_to_query(filters))
+ funnel_actors_query = FunnelsActorsQuery(source=funnels_query, includeRecordings=includeRecordings)
+ correlation_query = FunnelCorrelationQuery(
+ source=funnel_actors_query,
+ funnelCorrelationType=(funnelCorrelationType or FunnelCorrelationResultsType.events),
+ funnelCorrelationNames=funnelCorrelationNames,
+ # funnelCorrelationExcludeNames=funnelCorrelationExcludeNames,
+ # funnelCorrelationExcludeEventNames=funnelCorrelationExcludeEventNames,
+ # funnelCorrelationEventNames=funnelCorrelationEventNames,
+ # funnelCorrelationEventExcludePropertyNames=funnelCorrelationEventExcludePropertyNames,
+ )
+ correlation_actors_query = FunnelCorrelationActorsQuery(
+ source=correlation_query,
+ funnelCorrelationPersonConverted=funnelCorrelationPersonConverted,
+ funnelCorrelationPersonEntity=funnelCorrelationPersonEntity,
+ funnelCorrelationPropertyValues=funnelCorrelationPropertyValues,
+ )
+ persons_select = ["id", "person", *(["matched_recordings"] if includeRecordings else [])]
+ groups_select = ["actor_id"]
+ actors_query = ActorsQuery(
+ source=correlation_actors_query,
+ select=persons_select if funnels_query.aggregation_group_type_index is None else groups_select,
+ )
+ response = ActorsQueryRunner(query=actors_query, team=team).calculate()
+ return response.results
+
+
+class TestFunnelCorrelationsActors(ClickhouseTestMixin, APIBaseTest):
+ maxDiff = None
+
+ def _setup_basic_test(self):
+ filters = {
+ "events": [
+ {"id": "user signed up", "type": "events", "order": 0},
+ {"id": "paid", "type": "events", "order": 1},
+ ],
+ "insight": INSIGHT_FUNNELS,
+ "date_from": "2020-01-01",
+ "date_to": "2020-01-14",
+ }
+
+ success_target_persons = []
+ failure_target_persons = []
+ events_by_person = {}
+ for i in range(10):
+ person_id = f"user_{i}"
+ person = _create_person(distinct_ids=[person_id], team_id=self.team.pk)
+ events_by_person[person_id] = [{"event": "user signed up", "timestamp": datetime(2020, 1, 2, 14)}]
+
+ if i % 2 == 0:
+ events_by_person[person_id].append(
+ {
+ "event": "positively_related",
+ "timestamp": datetime(2020, 1, 3, 14),
+ }
+ )
+
+ success_target_persons.append(str(person.uuid))
+
+ events_by_person[person_id].append({"event": "paid", "timestamp": datetime(2020, 1, 4, 14)})
+
+ for i in range(10, 20):
+ person_id = f"user_{i}"
+ person = _create_person(distinct_ids=[person_id], team_id=self.team.pk)
+ events_by_person[person_id] = [{"event": "user signed up", "timestamp": datetime(2020, 1, 2, 14)}]
+ if i % 2 == 0:
+ events_by_person[person_id].append(
+ {
+ "event": "negatively_related",
+ "timestamp": datetime(2020, 1, 3, 14),
+ }
+ )
+ failure_target_persons.append(str(person.uuid))
+
+ # One positively_related as failure
+ person_fail_id = f"user_fail"
+ person_fail = _create_person(distinct_ids=[person_fail_id], team_id=self.team.pk)
+ events_by_person[person_fail_id] = [
+ {"event": "user signed up", "timestamp": datetime(2020, 1, 2, 14)},
+ {"event": "positively_related", "timestamp": datetime(2020, 1, 3, 14)},
+ ]
+
+ # One negatively_related as success
+ person_success_id = f"user_succ"
+ person_succ = _create_person(distinct_ids=[person_success_id], team_id=self.team.pk)
+ events_by_person[person_success_id] = [
+ {"event": "user signed up", "timestamp": datetime(2020, 1, 2, 14)},
+ {"event": "negatively_related", "timestamp": datetime(2020, 1, 3, 14)},
+ {"event": "paid", "timestamp": datetime(2020, 1, 4, 14)},
+ ]
+ journeys_for(events_by_person, self.team, create_people=False)
+
+ return (
+ filters,
+ success_target_persons,
+ failure_target_persons,
+ person_fail,
+ person_succ,
+ )
+
+ def test_basic_funnel_correlation_with_events(self):
+ (
+ filters,
+ success_target_persons,
+ failure_target_persons,
+ person_fail,
+ person_succ,
+ ) = self._setup_basic_test()
+
+ # test positively_related successes
+ serialized_actors = get_actors(
+ filters,
+ self.team,
+ funnelCorrelationPersonConverted=True,
+ funnelCorrelationPersonEntity=EventsNode(event="positively_related"),
+ )
+
+ self.assertCountEqual([str(val[1]["id"]) for val in serialized_actors], success_target_persons)
+
+ # test negatively_related failures
+ serialized_actors = get_actors(
+ filters,
+ self.team,
+ funnelCorrelationPersonConverted=False,
+ funnelCorrelationPersonEntity=EventsNode(event="negatively_related"),
+ )
+
+ self.assertCountEqual([str(val[1]["id"]) for val in serialized_actors], failure_target_persons)
+
+ # test positively_related failures
+ serialized_actors = get_actors(
+ filters,
+ self.team,
+ funnelCorrelationPersonConverted=False,
+ funnelCorrelationPersonEntity=EventsNode(event="positively_related"),
+ )
+
+ self.assertCountEqual([str(val[1]["id"]) for val in serialized_actors], [str(person_fail.uuid)])
+
+ # test negatively_related successes
+ serialized_actors = get_actors(
+ filters,
+ self.team,
+ funnelCorrelationPersonConverted=True,
+ funnelCorrelationPersonEntity=EventsNode(event="negatively_related"),
+ )
+
+ self.assertCountEqual([str(val[1]["id"]) for val in serialized_actors], [str(person_succ.uuid)])
+
+ # test all positively_related
+ serialized_actors = get_actors(
+ filters,
+ self.team,
+ funnelCorrelationPersonConverted=None,
+ funnelCorrelationPersonEntity=EventsNode(event="positively_related"),
+ )
+
+ self.assertCountEqual(
+ [str(val[1]["id"]) for val in serialized_actors],
+ [*success_target_persons, str(person_fail.uuid)],
+ )
+
+ # test all negatively_related
+ serialized_actors = get_actors(
+ filters,
+ self.team,
+ funnelCorrelationPersonConverted=None,
+ funnelCorrelationPersonEntity=EventsNode(event="negatively_related"),
+ )
+
+ self.assertCountEqual(
+ [str(val[1]["id"]) for val in serialized_actors],
+ [*failure_target_persons, str(person_succ.uuid)],
+ )
+
+ # @patch("posthog.tasks.calculate_cohort.insert_cohort_from_insight_filter.delay")
+ # def test_create_funnel_correlation_cohort(self, _insert_cohort_from_insight_filter):
+ # (
+ # filter,
+ # success_target_persons,
+ # failure_target_persons,
+ # person_fail,
+ # person_succ,
+ # ) = self._setup_basic_test()
+
+ # params = {
+ # "events": [
+ # {"id": "user signed up", "type": "events", "order": 0},
+ # {"id": "paid", "type": "events", "order": 1},
+ # ],
+ # "insight": INSIGHT_FUNNELS,
+ # "date_from": "2020-01-01",
+ # "date_to": "2020-01-14",
+ # "funnel_correlation_type": "events",
+ # "funnel_correlation_person_entity": {
+ # "id": "positively_related",
+ # "type": "events",
+ # },
+ # "funnel_correlation_person_converted": "TrUe",
+ # }
+
+ # response = self.client.post(
+ # f"/api/projects/{self.team.id}/cohorts/?{urllib.parse.urlencode(params)}",
+ # {"name": "test", "is_static": True},
+ # ).json()
+
+ # cohort_id = response["id"]
+
+ # _insert_cohort_from_insight_filter.assert_called_once_with(
+ # cohort_id,
+ # {
+ # "events": "[{'id': 'user signed up', 'type': 'events', 'order': 0}, {'id': 'paid', 'type': 'events', 'order': 1}]",
+ # "insight": "FUNNELS",
+ # "date_from": "2020-01-01",
+ # "date_to": "2020-01-14",
+ # "funnel_correlation_type": "events",
+ # "funnel_correlation_person_entity": "{'id': 'positively_related', 'type': 'events'}",
+ # "funnel_correlation_person_converted": "TrUe",
+ # },
+ # )
+
+ # insert_cohort_from_insight_filter(cohort_id, params)
+
+ # cohort = Cohort.objects.get(pk=cohort_id)
+ # people = Person.objects.filter(cohort__id=cohort.pk)
+ # self.assertEqual(cohort.errors_calculating, 0)
+ # self.assertEqual(people.count(), 5)
+ # self.assertEqual(cohort.count, 5)
+
+ def test_people_arent_returned_multiple_times(self):
+ people = journeys_for(
+ {
+ "user_1": [
+ {"event": "user signed up", "timestamp": datetime(2020, 1, 2, 14)},
+ {
+ "event": "positively_related",
+ "timestamp": datetime(2020, 1, 3, 14),
+ },
+ # duplicate event
+ {
+ "event": "positively_related",
+ "timestamp": datetime(2020, 1, 3, 14),
+ },
+ {"event": "paid", "timestamp": datetime(2020, 1, 4, 14)},
+ ]
+ },
+ self.team,
+ )
+
+ filters = {
+ "events": [
+ {"id": "user signed up", "type": "events", "order": 0},
+ {"id": "paid", "type": "events", "order": 1},
+ ],
+ "insight": INSIGHT_FUNNELS,
+ "date_from": "2020-01-01",
+ "date_to": "2020-01-14",
+ }
+
+ serialized_actors = get_actors(
+ filters,
+ self.team,
+ funnelCorrelationPersonConverted=True,
+ funnelCorrelationPersonEntity=EventsNode(event="positively_related"),
+ )
+
+ self.assertCountEqual([str(val[1]["id"]) for val in serialized_actors], [str(people["user_1"].uuid)])
+
+ @snapshot_clickhouse_queries
+ @freeze_time("2021-01-02 00:00:00.000Z")
+ def test_funnel_correlation_on_event_with_recordings(self):
+ p1 = _create_person(distinct_ids=["user_1"], team=self.team, properties={"foo": "bar"})
+ _create_event(
+ event="$pageview",
+ distinct_id="user_1",
+ team=self.team,
+ timestamp=timezone.now(),
+ properties={"$session_id": "s2", "$window_id": "w1"},
+ event_uuid="11111111-1111-1111-1111-111111111111",
+ )
+ _create_event(
+ event="insight loaded",
+ distinct_id="user_1",
+ team=self.team,
+ timestamp=(timezone.now() + timedelta(minutes=2)),
+ properties={"$session_id": "s2", "$window_id": "w2"},
+ event_uuid="31111111-1111-1111-1111-111111111111",
+ )
+ _create_event(
+ event="insight analyzed",
+ distinct_id="user_1",
+ team=self.team,
+ timestamp=(timezone.now() + timedelta(minutes=3)),
+ properties={"$session_id": "s2", "$window_id": "w2"},
+ event_uuid="21111111-1111-1111-1111-111111111111",
+ )
+
+ timestamp = datetime(2021, 1, 2, 0, 0, 0)
+ produce_replay_summary(
+ team_id=self.team.pk,
+ session_id="s2",
+ distinct_id="user_1",
+ first_timestamp=timestamp,
+ last_timestamp=timestamp,
+ )
+
+ # Success filter
+ filters = {
+ "insight": INSIGHT_FUNNELS,
+ "date_from": "2021-01-01",
+ "date_to": "2021-01-08",
+ "events": [
+ {"id": "$pageview", "order": 0},
+ {"id": "insight analyzed", "order": 1},
+ ],
+ }
+
+ results = get_actors(
+ filters,
+ self.team,
+ funnelCorrelationPersonConverted=True,
+ funnelCorrelationPersonEntity=EventsNode(event="insight loaded"),
+ )
+
+ self.assertEqual(results[0][1]["id"], p1.uuid)
+ self.assertEqual(
+ list(results[0][2]),
+ [
+ {
+ "events": [
+ {
+ "timestamp": timezone.now() + timedelta(minutes=3),
+ "uuid": UUID("21111111-1111-1111-1111-111111111111"),
+ "window_id": "w2",
+ }
+ ],
+ "session_id": "s2",
+ }
+ ],
+ )
+
+ # Drop off filter
+ filters = {
+ "insight": INSIGHT_FUNNELS,
+ "date_from": "2021-01-01",
+ "date_to": "2021-01-08",
+ "funnel_correlation_type": "events",
+ "events": [
+ {"id": "$pageview", "order": 0},
+ {"id": "insight analyzed", "order": 1},
+ {"id": "insight updated", "order": 2},
+ ],
+ }
+ results = get_actors(
+ filters,
+ self.team,
+ funnelCorrelationPersonConverted=False,
+ funnelCorrelationPersonEntity=EventsNode(event="insight loaded"),
+ )
+
+ self.assertEqual(results[0][1]["id"], p1.uuid)
+ self.assertEqual(
+ list(results[0][2]),
+ [
+ {
+ "events": [
+ {
+ "timestamp": timezone.now() + timedelta(minutes=3),
+ "uuid": UUID("21111111-1111-1111-1111-111111111111"),
+ "window_id": "w2",
+ }
+ ],
+ "session_id": "s2",
+ }
+ ],
+ )
+
+ @snapshot_clickhouse_queries
+ @freeze_time("2021-01-02 00:00:00.000Z")
+ def test_funnel_correlation_on_properties_with_recordings(self):
+ p1 = _create_person(distinct_ids=["user_1"], team=self.team, properties={"foo": "bar"})
+ _create_event(
+ event="$pageview",
+ distinct_id="user_1",
+ team=self.team,
+ timestamp=timezone.now(),
+ properties={"$session_id": "s2", "$window_id": "w1"},
+ event_uuid="11111111-1111-1111-1111-111111111111",
+ )
+ _create_event(
+ event="insight analyzed",
+ distinct_id="user_1",
+ team=self.team,
+ timestamp=(timezone.now() + timedelta(minutes=3)),
+ properties={"$session_id": "s2", "$window_id": "w2"},
+ event_uuid="21111111-1111-1111-1111-111111111111",
+ )
+
+ timestamp = datetime(2021, 1, 2, 0, 0, 0)
+ produce_replay_summary(
+ team_id=self.team.pk,
+ session_id="s2",
+ distinct_id="user_1",
+ first_timestamp=timestamp,
+ last_timestamp=timestamp,
+ )
+
+ # Success filter
+ filters = {
+ "insight": INSIGHT_FUNNELS,
+ "date_from": "2021-01-01",
+ "date_to": "2021-01-08",
+ "events": [
+ {"id": "$pageview", "order": 0},
+ {"id": "insight analyzed", "order": 1},
+ ],
+ }
+ results = get_actors(
+ filters,
+ self.team,
+ funnelCorrelationType=FunnelCorrelationResultsType.properties,
+ funnelCorrelationPersonConverted=True,
+ funnelCorrelationPropertyValues=[
+ {
+ "key": "foo",
+ "value": "bar",
+ "operator": "exact",
+ "type": "person",
+ }
+ ],
+ )
+
+ self.assertEqual(results[0][1]["id"], p1.uuid)
+ self.assertEqual(
+ list(results[0][2]),
+ [
+ {
+ "events": [
+ {
+ "timestamp": timezone.now() + timedelta(minutes=3),
+ "uuid": UUID("21111111-1111-1111-1111-111111111111"),
+ "window_id": "w2",
+ }
+ ],
+ "session_id": "s2",
+ }
+ ],
+ )
+
+ @snapshot_clickhouse_queries
+ @freeze_time("2021-01-02 00:00:00.000Z")
+ def test_strict_funnel_correlation_with_recordings(self):
+ # First use that successfully completes the strict funnel
+ p1 = _create_person(distinct_ids=["user_1"], team=self.team, properties={"foo": "bar"})
+ _create_event(
+ event="$pageview",
+ distinct_id="user_1",
+ team=self.team,
+ timestamp=timezone.now(),
+ properties={"$session_id": "s2", "$window_id": "w1"},
+ event_uuid="11111111-1111-1111-1111-111111111111",
+ )
+ _create_event(
+ event="insight analyzed",
+ distinct_id="user_1",
+ team=self.team,
+ timestamp=(timezone.now() + timedelta(minutes=3)),
+ properties={"$session_id": "s2", "$window_id": "w2"},
+ event_uuid="31111111-1111-1111-1111-111111111111",
+ )
+ _create_event(
+ event="insight analyzed", # Second event should not be returned
+ distinct_id="user_1",
+ team=self.team,
+ timestamp=(timezone.now() + timedelta(minutes=4)),
+ properties={"$session_id": "s2", "$window_id": "w2"},
+ event_uuid="41111111-1111-1111-1111-111111111111",
+ )
+ timestamp = datetime(2021, 1, 2, 0, 0, 0)
+ produce_replay_summary(
+ team_id=self.team.pk,
+ session_id="s2",
+ distinct_id="user_1",
+ first_timestamp=timestamp,
+ last_timestamp=timestamp,
+ )
+
+ # Second user with strict funnel drop off, but completed the step events for a normal funnel
+ p2 = _create_person(distinct_ids=["user_2"], team=self.team, properties={"foo": "bar"})
+ _create_event(
+ event="$pageview",
+ distinct_id="user_2",
+ team=self.team,
+ timestamp=timezone.now(),
+ properties={"$session_id": "s3", "$window_id": "w1"},
+ event_uuid="51111111-1111-1111-1111-111111111111",
+ )
+ _create_event(
+ event="insight loaded", # Interupting event
+ distinct_id="user_2",
+ team=self.team,
+ timestamp=(timezone.now() + timedelta(minutes=3)),
+ properties={"$session_id": "s3", "$window_id": "w2"},
+ event_uuid="61111111-1111-1111-1111-111111111111",
+ )
+ _create_event(
+ event="insight analyzed",
+ distinct_id="user_2",
+ team=self.team,
+ timestamp=(timezone.now() + timedelta(minutes=4)),
+ properties={"$session_id": "s3", "$window_id": "w2"},
+ event_uuid="71111111-1111-1111-1111-111111111111",
+ )
+ timestamp1 = datetime(2021, 1, 2, 0, 0, 0)
+ produce_replay_summary(
+ team_id=self.team.pk,
+ session_id="s3",
+ distinct_id="user_2",
+ first_timestamp=timestamp1,
+ last_timestamp=timestamp1,
+ )
+
+ # Success filter
+ filters = {
+ "insight": INSIGHT_FUNNELS,
+ "date_from": "2021-01-01",
+ "date_to": "2021-01-08",
+ "funnel_order_type": "strict",
+ "events": [
+ {"id": "$pageview", "order": 0},
+ {"id": "insight analyzed", "order": 1},
+ ],
+ }
+
+ results = get_actors(
+ filters,
+ self.team,
+ funnelCorrelationType=FunnelCorrelationResultsType.properties,
+ funnelCorrelationPersonConverted=True,
+ funnelCorrelationPropertyValues=[
+ {
+ "key": "foo",
+ "value": "bar",
+ "operator": "exact",
+ "type": "person",
+ }
+ ],
+ )
+
+ self.assertEqual(len(results), 1)
+ self.assertEqual(results[0][1]["id"], p1.uuid)
+ self.assertEqual(
+ list(results[0][2]),
+ [
+ {
+ "events": [
+ {
+ "timestamp": timezone.now() + timedelta(minutes=3),
+ "uuid": UUID("31111111-1111-1111-1111-111111111111"),
+ "window_id": "w2",
+ }
+ ],
+ "session_id": "s2",
+ }
+ ],
+ )
+
+ # Drop off filter
+ results = get_actors(
+ filters,
+ self.team,
+ funnelCorrelationType=FunnelCorrelationResultsType.properties,
+ funnelCorrelationPersonConverted=False,
+ funnelCorrelationPropertyValues=[
+ {
+ "key": "foo",
+ "value": "bar",
+ "operator": "exact",
+ "type": "person",
+ }
+ ],
+ )
+
+ self.assertEqual(results[0][1]["id"], p2.uuid)
+ self.assertEqual(
+ list(results[0][2]),
+ [
+ {
+ "events": [
+ {
+ "timestamp": timezone.now(),
+ "uuid": UUID("51111111-1111-1111-1111-111111111111"),
+ "window_id": "w1",
+ }
+ ],
+ "session_id": "s3",
+ }
+ ],
+ )
diff --git a/posthog/hogql_queries/insights/funnels/utils.py b/posthog/hogql_queries/insights/funnels/utils.py
index ff7a52db0a1f1..47c1487e5fbcc 100644
--- a/posthog/hogql_queries/insights/funnels/utils.py
+++ b/posthog/hogql_queries/insights/funnels/utils.py
@@ -5,8 +5,6 @@
from posthog.schema import FunnelConversionWindowTimeUnit, FunnelVizType, FunnelsFilter, StepOrderValue
from rest_framework.exceptions import ValidationError
-from posthog.settings.ee import EE_AVAILABLE
-
def get_funnel_order_class(funnelsFilter: FunnelsFilter):
from posthog.hogql_queries.insights.funnels import (
@@ -30,20 +28,7 @@ def get_funnel_actor_class(funnelsFilter: FunnelsFilter):
FunnelTrendsActors,
)
- # if filter.correlation_person_entity and EE_AVAILABLE:
- if False:
- if EE_AVAILABLE: # type: ignore
- # from ee.clickhouse.queries.funnels.funnel_correlation_persons import (
- # FunnelCorrelationActors,
- # )
-
- return FunnelActors
- # return FunnelCorrelationActors
- else:
- raise ValueError(
- "Funnel Correlations is not available without an enterprise license and enterprise supported deployment"
- )
- elif funnelsFilter.funnelVizType == FunnelVizType.trends:
+ if funnelsFilter.funnelVizType == FunnelVizType.trends:
return FunnelTrendsActors
else:
if funnelsFilter.funnelOrderType == StepOrderValue.unordered:
@@ -79,11 +64,11 @@ def get_breakdown_expr(
breakdown: List[str | int] | None, properties_column: str, normalize_url: bool | None = False
) -> ast.Expr:
if isinstance(breakdown, str) or isinstance(breakdown, int) or breakdown is None:
- return parse_expr(f"ifNull({properties_column}.{breakdown}, '')")
+ return parse_expr(f"ifNull({properties_column}.\"{breakdown}\", '')")
else:
exprs = []
for b in breakdown:
- expr = parse_expr(normalize_url_breakdown(f"ifNull({properties_column}.{b}, '')", normalize_url))
+ expr = parse_expr(normalize_url_breakdown(f"ifNull({properties_column}.\"{b}\", '')", normalize_url))
exprs.append(expr)
expression = ast.Array(exprs=exprs)
diff --git a/posthog/hogql_queries/insights/insight_actors_query_runner.py b/posthog/hogql_queries/insights/insight_actors_query_runner.py
index 57b1c8630c45f..782dd5b054a0e 100644
--- a/posthog/hogql_queries/insights/insight_actors_query_runner.py
+++ b/posthog/hogql_queries/insights/insight_actors_query_runner.py
@@ -3,6 +3,7 @@
from posthog.hogql import ast
from posthog.hogql.query import execute_hogql_query
+from posthog.hogql_queries.insights.funnels.funnel_correlation_query_runner import FunnelCorrelationQueryRunner
from posthog.hogql_queries.insights.funnels.funnels_query_runner import FunnelsQueryRunner
from posthog.hogql_queries.insights.lifecycle_query_runner import LifecycleQueryRunner
from posthog.hogql_queries.insights.paths_query_runner import PathsQueryRunner
@@ -11,7 +12,15 @@
from posthog.hogql_queries.insights.trends.trends_query_runner import TrendsQueryRunner
from posthog.hogql_queries.query_runner import QueryRunner, get_query_runner
from posthog.models.filters.mixins.utils import cached_property
-from posthog.schema import FunnelsActorsQuery, InsightActorsQuery, HogQLQueryResponse, StickinessQuery, TrendsQuery
+from posthog.schema import (
+ FunnelCorrelationActorsQuery,
+ FunnelCorrelationQuery,
+ FunnelsActorsQuery,
+ InsightActorsQuery,
+ HogQLQueryResponse,
+ StickinessQuery,
+ TrendsQuery,
+)
from posthog.types import InsightActorsQueryNode
@@ -37,6 +46,11 @@ def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery:
funnels_runner = cast(FunnelsQueryRunner, self.source_runner)
funnels_runner.context.actorsQuery = cast(FunnelsActorsQuery, self.query)
return funnels_runner.to_actors_query()
+ elif isinstance(self.source_runner, FunnelCorrelationQueryRunner):
+ funnel_correlation_runner = cast(FunnelCorrelationQueryRunner, self.source_runner)
+ assert isinstance(self.query, FunnelCorrelationActorsQuery)
+ funnel_correlation_runner.correlation_actors_query = self.query
+ return funnel_correlation_runner.to_actors_query()
elif isinstance(self.source_runner, RetentionQueryRunner):
query = cast(InsightActorsQuery, self.query)
retention_runner = cast(RetentionQueryRunner, self.source_runner)
@@ -65,6 +79,11 @@ def group_type_index(self) -> int | None:
if isinstance(self.source_runner, RetentionQueryRunner):
return cast(RetentionQueryRunner, self.source_runner).group_type_index
+ if isinstance(self.source_runner, FunnelCorrelationQueryRunner):
+ assert isinstance(self.query, FunnelCorrelationActorsQuery)
+ assert isinstance(self.query.source, FunnelCorrelationQuery)
+ return self.query.source.source.source.aggregation_group_type_index
+
if (
isinstance(self.source_runner, StickinessQueryRunner) and isinstance(self.query.source, StickinessQuery)
) or (isinstance(self.source_runner, TrendsQueryRunner) and isinstance(self.query.source, TrendsQuery)):
diff --git a/posthog/hogql_queries/insights/trends/aggregation_operations.py b/posthog/hogql_queries/insights/trends/aggregation_operations.py
index 9d60f83f5765d..a0307d625c9a1 100644
--- a/posthog/hogql_queries/insights/trends/aggregation_operations.py
+++ b/posthog/hogql_queries/insights/trends/aggregation_operations.py
@@ -5,6 +5,7 @@
from posthog.models.team.team import Team
from posthog.schema import EventsNode, ActionsNode, DataWarehouseNode
from posthog.models.filters.mixins.utils import cached_property
+from posthog.hogql_queries.insights.data_warehouse_mixin import DataWarehouseInsightQueryMixin
class QueryAlternator:
@@ -48,7 +49,7 @@ def replace_select_from(self, join_expr: ast.JoinExpr) -> None:
self._select_from = join_expr
-class AggregationOperations:
+class AggregationOperations(DataWarehouseInsightQueryMixin):
team: Team
series: Union[EventsNode, ActionsNode, DataWarehouseNode]
query_date_range: QueryDateRange
@@ -155,6 +156,8 @@ def _math_func(self, method: str, override_chain: Optional[List[str | int]]) ->
if self.series.math_property == "$session_duration":
chain = ["session_duration"]
+ elif isinstance(self.series, DataWarehouseNode) and self.series.math_property:
+ chain = [self.series.math_property]
else:
chain = ["properties", self.series.math_property]
@@ -344,6 +347,14 @@ def _events_query(
query = parse_select(
"""
+ SELECT
+ count({id_field}) AS total
+ FROM {table} AS e
+ WHERE {events_where_clause}
+ GROUP BY {person_field}
+ """
+ if isinstance(self.series, DataWarehouseNode)
+ else """
SELECT
count({id_field}) AS total
FROM events AS e
@@ -353,6 +364,7 @@ def _events_query(
""",
placeholders={
"id_field": self._id_field,
+ "table": self._table_expr,
"events_where_clause": where_clause_combined,
"sample": sample_value,
"person_field": ast.Field(
diff --git a/posthog/hogql_queries/insights/trends/breakdown.py b/posthog/hogql_queries/insights/trends/breakdown.py
index af95e0558e09e..45a3a8421e8d8 100644
--- a/posthog/hogql_queries/insights/trends/breakdown.py
+++ b/posthog/hogql_queries/insights/trends/breakdown.py
@@ -1,4 +1,4 @@
-from typing import Dict, List, Optional, Tuple, Union
+from typing import Dict, List, Optional, Tuple, Union, cast
from posthog.hogql import ast
from posthog.hogql.parser import parse_expr
from posthog.hogql.timings import HogQLTimings
@@ -27,7 +27,7 @@ class Breakdown:
timings: HogQLTimings
modifiers: HogQLQueryModifiers
events_filter: ast.Expr
- breakdown_values_override: Optional[List[str | int]]
+ breakdown_values_override: Optional[List[str | int | float]]
def __init__(
self,
@@ -38,7 +38,7 @@ def __init__(
timings: HogQLTimings,
modifiers: HogQLQueryModifiers,
events_filter: ast.Expr,
- breakdown_values_override: Optional[List[str | int]] = None,
+ breakdown_values_override: Optional[List[str | int | float]] = None,
):
self.team = team
self.query = query
@@ -51,7 +51,11 @@ def __init__(
@cached_property
def enabled(self) -> bool:
- return self.query.breakdownFilter is not None and self.query.breakdownFilter.breakdown is not None
+ return (
+ self.query.breakdownFilter is not None
+ and self.query.breakdownFilter.breakdown is not None
+ and self.has_breakdown_values
+ )
@cached_property
def is_session_type(self) -> bool:
@@ -96,7 +100,7 @@ def column_expr(self) -> ast.Expr:
)
# If there's no breakdown values
- if len(self._get_breakdown_values) == 1 and self._get_breakdown_values[0] is None:
+ if len(self._breakdown_values) == 1 and self._breakdown_values[0] is None:
return ast.Alias(alias="breakdown_value", expr=ast.Field(chain=self._properties_chain))
return ast.Alias(alias="breakdown_value", expr=self._get_breakdown_transform_func)
@@ -145,20 +149,30 @@ def events_where_filter(self) -> ast.Expr | None:
left = ast.Field(chain=self._properties_chain)
compare_ops = []
- for v in self._get_breakdown_values:
+ for _value in self._breakdown_values:
+ value: Optional[str | int | float] = _value
# If the value is one of the "other" values, then use the `transform()` func
if (
- v == BREAKDOWN_OTHER_STRING_LABEL
- or v == BREAKDOWN_OTHER_NUMERIC_LABEL
- or v == float(BREAKDOWN_OTHER_NUMERIC_LABEL)
+ value == BREAKDOWN_OTHER_STRING_LABEL
+ or value == BREAKDOWN_OTHER_NUMERIC_LABEL
+ or value == float(BREAKDOWN_OTHER_NUMERIC_LABEL)
):
transform_func = self._get_breakdown_transform_func
compare_ops.append(
- ast.CompareOperation(left=transform_func, op=ast.CompareOperationOp.Eq, right=ast.Constant(value=v))
+ ast.CompareOperation(
+ left=transform_func, op=ast.CompareOperationOp.Eq, right=ast.Constant(value=value)
+ )
)
else:
+ if (
+ value == BREAKDOWN_NULL_STRING_LABEL
+ or value == BREAKDOWN_NULL_NUMERIC_LABEL
+ or value == float(BREAKDOWN_NULL_NUMERIC_LABEL)
+ ):
+ value = None
+
compare_ops.append(
- ast.CompareOperation(left=left, op=ast.CompareOperationOp.Eq, right=ast.Constant(value=v))
+ ast.CompareOperation(left=left, op=ast.CompareOperationOp.Eq, right=ast.Constant(value=value))
)
if len(compare_ops) == 1:
@@ -170,7 +184,7 @@ def events_where_filter(self) -> ast.Expr | None:
@cached_property
def _get_breakdown_transform_func(self) -> ast.Call:
- values = self._get_breakdown_values
+ values = self._breakdown_values
all_values_are_ints_or_none = all(isinstance(value, int) or value is None for value in values)
all_values_are_floats_or_none = all(isinstance(value, float) or value is None for value in values)
@@ -201,19 +215,20 @@ def _get_breakdown_transform_func(self) -> ast.Call:
def _breakdown_buckets_ast(self) -> ast.Array:
buckets = self._get_breakdown_histogram_buckets()
values = [f"[{t[0]},{t[1]}]" for t in buckets]
+ # TODO: add this only if needed
values.append('["",""]')
return ast.Array(exprs=list(map(lambda v: ast.Constant(value=v), values)))
@cached_property
def _breakdown_values_ast(self) -> ast.Array:
- return ast.Array(exprs=[ast.Constant(value=v) for v in self._get_breakdown_values])
+ return ast.Array(exprs=[ast.Constant(value=v) for v in self._breakdown_values])
@cached_property
- def _get_breakdown_values(self) -> List[str | int]:
+ def _all_breakdown_values(self) -> List[str | int | float | None]:
# Used in the actors query
if self.breakdown_values_override is not None:
- return self.breakdown_values_override
+ return cast(List[str | int | float | None], self.breakdown_values_override)
if self.query.breakdownFilter is None:
return []
@@ -226,12 +241,35 @@ def _get_breakdown_values(self) -> List[str | int]:
chart_display_type=self._trends_display().display_type,
breakdown_filter=self.query.breakdownFilter,
query_date_range=self.query_date_range,
+ modifiers=self.modifiers,
)
- return breakdown.get_breakdown_values()
+ return cast(List[str | int | float | None], breakdown.get_breakdown_values())
+
+ @cached_property
+ def _breakdown_values(self) -> List[str | int | float]:
+ values = self._all_breakdown_values
+ if len(values) == 0 or all(value is None for value in values):
+ return []
+
+ if None in values:
+ all_values_are_ints_or_none = all(isinstance(value, int) or value is None for value in values)
+ all_values_are_floats_or_none = all(isinstance(value, float) or value is None for value in values)
+
+ if all_values_are_ints_or_none:
+ values = [v if v is not None else BREAKDOWN_NULL_NUMERIC_LABEL for v in values]
+ elif all_values_are_floats_or_none:
+ values = [v if v is not None else float(BREAKDOWN_NULL_NUMERIC_LABEL) for v in values]
+ else:
+ values = [v if v is not None else BREAKDOWN_NULL_STRING_LABEL for v in values]
+ return cast(List[str | int | float], values)
+
+ @cached_property
+ def has_breakdown_values(self) -> bool:
+ return len(self._breakdown_values) > 0
def _get_breakdown_histogram_buckets(self) -> List[Tuple[float, float]]:
buckets = []
- values = self._get_breakdown_values
+ values = self._breakdown_values
if len(values) == 1:
values = [values[0], values[0]]
@@ -241,8 +279,8 @@ def _get_breakdown_histogram_buckets(self) -> List[Tuple[float, float]]:
# Since we always `floor(x, 2)` the value, we add 0.01 to the last bucket
# to ensure it's always slightly greater than the maximum value
- lower_bound = values[i]
- upper_bound = values[i + 1] + 0.01 if last_value else values[i + 1]
+ lower_bound = float(values[i])
+ upper_bound = float(values[i + 1]) + 0.01 if last_value else float(values[i + 1])
buckets.append((lower_bound, upper_bound))
return buckets
diff --git a/posthog/hogql_queries/insights/trends/breakdown_values.py b/posthog/hogql_queries/insights/trends/breakdown_values.py
index e97ab79c8e9cf..7b1522d5f25c5 100644
--- a/posthog/hogql_queries/insights/trends/breakdown_values.py
+++ b/posthog/hogql_queries/insights/trends/breakdown_values.py
@@ -8,7 +8,15 @@
from posthog.hogql_queries.insights.trends.utils import get_properties_chain
from posthog.hogql_queries.utils.query_date_range import QueryDateRange
from posthog.models.team.team import Team
-from posthog.schema import BreakdownFilter, BreakdownType, ChartDisplayType, ActionsNode, EventsNode, DataWarehouseNode
+from posthog.schema import (
+ BreakdownFilter,
+ BreakdownType,
+ ChartDisplayType,
+ ActionsNode,
+ EventsNode,
+ DataWarehouseNode,
+ HogQLQueryModifiers,
+)
from functools import cached_property
BREAKDOWN_OTHER_STRING_LABEL = "$$_posthog_breakdown_other_$$"
@@ -29,6 +37,7 @@ class BreakdownValues:
hide_other_aggregation: Optional[bool]
breakdown_limit: Optional[int]
query_date_range: QueryDateRange
+ modifiers: HogQLQueryModifiers
def __init__(
self,
@@ -38,6 +47,7 @@ def __init__(
chart_display_type: ChartDisplayType,
breakdown_filter: BreakdownFilter,
query_date_range: QueryDateRange,
+ modifiers: HogQLQueryModifiers,
):
self.team = team
self.series = series
@@ -58,6 +68,7 @@ def __init__(
self.hide_other_aggregation = breakdown_filter.breakdown_hide_other_aggregation
self.breakdown_limit = breakdown_filter.breakdown_limit
self.query_date_range = query_date_range
+ self.modifiers = modifiers
def get_breakdown_values(self) -> List[str | int]:
if self.breakdown_type == "cohort":
@@ -139,7 +150,7 @@ def get_breakdown_values(self) -> List[str | int]:
ORDER BY
count DESC,
value DESC
- LIMIT {breakdown_limit}
+ LIMIT {breakdown_limit_plus_one}
""",
placeholders={
"select_field": select_field,
@@ -147,7 +158,7 @@ def get_breakdown_values(self) -> List[str | int]:
"table": self._table,
"date_filter": date_filter,
"events_where": self.events_filter,
- "breakdown_limit": ast.Constant(value=breakdown_limit),
+ "breakdown_limit_plus_one": ast.Constant(value=breakdown_limit + 1),
},
)
@@ -160,50 +171,69 @@ def get_breakdown_values(self) -> List[str | int]:
):
inner_events_query.order_by[0].order = "ASC"
- query = parse_select(
- """
- SELECT groupArray(value) FROM ({inner_events_query})
- """,
- placeholders={
- "inner_events_query": inner_events_query,
- },
- )
-
+ values: List[Any]
if self.histogram_bin_count is not None:
- query.select = [self._to_bucketing_expression()]
+ query = parse_select(
+ """
+ SELECT {expr} FROM ({inner_events_query})
+ """,
+ placeholders={
+ "inner_events_query": inner_events_query,
+ "expr": self._to_bucketing_expression(),
+ },
+ )
+ response = execute_hogql_query(
+ query_type="TrendsQueryBreakdownValues",
+ query=query,
+ team=self.team,
+ modifiers=self.modifiers,
+ )
+ if response.results and len(response.results) > 0:
+ values = response.results[0][0]
+ else:
+ values = []
+ else:
+ # We're not running this through groupArray, as that eats NULL values.
+ query = inner_events_query
+ response = execute_hogql_query(
+ query_type="TrendsQueryBreakdownValues",
+ query=query,
+ team=self.team,
+ modifiers=self.modifiers,
+ )
+ value_index = (response.columns or []).index("value")
+ values = [row[value_index] for row in response.results or []]
- response = execute_hogql_query(
- query_type="TrendsQueryBreakdownValues",
- query=query,
- team=self.team,
- )
+ needs_other = False
+ if len(values) == breakdown_limit + 1:
+ needs_other = True
+ values = values[:-1]
+
+ # Add "other" value if "other" is not hidden and we're not bucketing numeric values
+ if self.hide_other_aggregation is not True and self.histogram_bin_count is None:
+ all_values_are_ints_or_none = all(isinstance(value, int) or value is None for value in values)
+ all_values_are_floats_or_none = all(isinstance(value, float) or value is None for value in values)
+ all_values_are_string_or_none = all(isinstance(value, str) or value is None for value in values)
- values: List[Any] = response.results[0][0]
+ if all_values_are_string_or_none:
+ values = [BREAKDOWN_NULL_STRING_LABEL if value in (None, "") else value for value in values]
+ if needs_other:
+ values.insert(0, BREAKDOWN_OTHER_STRING_LABEL)
+ elif all_values_are_ints_or_none or all_values_are_floats_or_none:
+ if all_values_are_ints_or_none:
+ values = [BREAKDOWN_NULL_NUMERIC_LABEL if value is None else value for value in values]
+ if needs_other:
+ values.insert(0, BREAKDOWN_OTHER_NUMERIC_LABEL)
+ else:
+ values = [float(BREAKDOWN_NULL_NUMERIC_LABEL) if value is None else value for value in values]
+ if needs_other:
+ values.insert(0, float(BREAKDOWN_OTHER_NUMERIC_LABEL))
if len(values) == 0:
values.insert(0, None)
return values
- # Add "other" value if "other" is not hidden and we're not bucketing numeric values
- if self.hide_other_aggregation is not True and self.histogram_bin_count is None:
- all_values_are_ints_or_none = all(isinstance(value, int) or value is None for value in values)
- all_values_are_floats_or_none = all(isinstance(value, float) or value is None for value in values)
- all_values_are_string_or_none = all(isinstance(value, str) or value is None for value in values)
-
- if all_values_are_ints_or_none or all_values_are_floats_or_none:
- if all_values_are_ints_or_none:
- values = [BREAKDOWN_NULL_NUMERIC_LABEL if value is None else value for value in values]
- values.insert(0, BREAKDOWN_OTHER_NUMERIC_LABEL)
- else:
- values = [float(BREAKDOWN_NULL_NUMERIC_LABEL) if value is None else value for value in values]
- values.insert(0, float(BREAKDOWN_OTHER_NUMERIC_LABEL))
- elif all_values_are_string_or_none:
- values = [BREAKDOWN_NULL_STRING_LABEL if value in (None, "") else value for value in values]
- values.insert(0, BREAKDOWN_OTHER_STRING_LABEL)
-
- breakdown_limit += 1 # Add one to the limit to account for the "other" value
-
- return values[:breakdown_limit]
+ return values
def _to_bucketing_expression(self) -> ast.Expr:
assert isinstance(self.histogram_bin_count, int)
diff --git a/posthog/hogql_queries/insights/trends/data_warehouse_trends_query_builder.py b/posthog/hogql_queries/insights/trends/data_warehouse_trends_query_builder.py
deleted file mode 100644
index ac707c76d7158..0000000000000
--- a/posthog/hogql_queries/insights/trends/data_warehouse_trends_query_builder.py
+++ /dev/null
@@ -1,409 +0,0 @@
-from typing import List, Optional, cast
-from posthog.hogql import ast
-from posthog.hogql.parser import parse_select, parse_expr
-from posthog.hogql.property import property_to_expr
-from posthog.hogql.timings import HogQLTimings
-from posthog.hogql_queries.insights.trends.aggregation_operations import (
- AggregationOperations,
-)
-from posthog.hogql_queries.insights.trends.breakdown import Breakdown
-from posthog.hogql_queries.insights.trends.display import TrendsDisplay
-from posthog.hogql_queries.utils.query_date_range import QueryDateRange
-from posthog.models.filters.mixins.utils import cached_property
-from posthog.models.team.team import Team
-from posthog.schema import HogQLQueryModifiers, TrendsQuery, DataWarehouseNode
-from posthog.hogql_queries.insights.trends.trends_query_builder_abstract import TrendsQueryBuilderAbstract
-
-
-class DataWarehouseTrendsQueryBuilder(TrendsQueryBuilderAbstract):
- query: TrendsQuery
- team: Team
- query_date_range: QueryDateRange
- series: DataWarehouseNode
- timings: HogQLTimings
- modifiers: HogQLQueryModifiers
-
- def __init__(
- self,
- trends_query: TrendsQuery,
- team: Team,
- query_date_range: QueryDateRange,
- series: DataWarehouseNode,
- timings: HogQLTimings,
- modifiers: HogQLQueryModifiers,
- ):
- self.query = trends_query
- self.team = team
- self.query_date_range = query_date_range
- self.series = series
- self.timings = timings
- self.modifiers = modifiers
-
- def build_query(self) -> ast.SelectQuery | ast.SelectUnionQuery:
- breakdown = self._breakdown(is_actors_query=False)
-
- events_query: ast.SelectQuery | ast.SelectUnionQuery
-
- if self._trends_display.should_aggregate_values():
- events_query = self._get_events_subquery(False, is_actors_query=False, breakdown=breakdown)
- else:
- date_subqueries = self._get_date_subqueries(breakdown=breakdown)
- event_query = self._get_events_subquery(False, is_actors_query=False, breakdown=breakdown)
-
- events_query = ast.SelectUnionQuery(select_queries=[*date_subqueries, event_query])
-
- inner_select = self._inner_select_query(inner_query=events_query, breakdown=breakdown)
- full_query = self._outer_select_query(inner_query=inner_select, breakdown=breakdown)
-
- return full_query
-
- def _get_date_subqueries(self, breakdown: Breakdown, ignore_breakdowns: bool = False) -> List[ast.SelectQuery]:
- if not breakdown.enabled or ignore_breakdowns:
- return [
- cast(
- ast.SelectQuery,
- parse_select(
- """
- SELECT
- 0 AS total,
- {date_to_start_of_interval} - {number_interval_period} AS day_start
- FROM
- numbers(
- coalesce(dateDiff({interval}, {date_from}, {date_to}), 0)
- )
- """,
- placeholders={
- **self.query_date_range.to_placeholders(),
- },
- ),
- ),
- cast(
- ast.SelectQuery,
- parse_select(
- """
- SELECT
- 0 AS total,
- {date_from_start_of_interval} AS day_start
- """,
- placeholders={
- **self.query_date_range.to_placeholders(),
- },
- ),
- ),
- ]
-
- return [
- cast(
- ast.SelectQuery,
- parse_select(
- """
- SELECT
- 0 AS total,
- ticks.day_start as day_start,
- breakdown_value
- FROM (
- SELECT
- {date_to_start_of_interval} - {number_interval_period} AS day_start
- FROM
- numbers(
- coalesce(dateDiff({interval}, {date_from}, {date_to}), 0)
- )
- UNION ALL
- SELECT {date_from_start_of_interval} AS day_start
- ) as ticks
- CROSS JOIN (
- SELECT breakdown_value
- FROM (
- SELECT {cross_join_breakdown_values}
- )
- ARRAY JOIN breakdown_value as breakdown_value
- ) as sec
- ORDER BY breakdown_value, day_start
- """,
- placeholders={
- **self.query_date_range.to_placeholders(),
- **breakdown.placeholders(),
- },
- ),
- )
- ]
-
- def _get_events_subquery(
- self,
- no_modifications: Optional[bool],
- is_actors_query: bool,
- breakdown: Breakdown,
- breakdown_values_override: Optional[str | int] = None,
- actors_query_time_frame: Optional[str | int] = None,
- ) -> ast.SelectQuery:
- day_start = ast.Alias(
- alias="day_start",
- expr=ast.Call(
- name=f"toStartOf{self.query_date_range.interval_name.title()}",
- args=[ast.Call(name="toDateTime", args=[ast.Field(chain=[self.series.timestamp_field])])],
- ),
- )
-
- events_filter = self._events_filter(
- ignore_breakdowns=False,
- breakdown=breakdown,
- is_actors_query=is_actors_query,
- )
-
- default_query = cast(
- ast.SelectQuery,
- parse_select(
- """
- SELECT
- {aggregation_operation} AS total
- FROM {table} AS e
- WHERE {events_filter}
- """,
- placeholders={
- "events_filter": events_filter,
- "aggregation_operation": self._aggregation_operation.select_aggregation(),
- "table": self._table_expr,
- },
- ),
- )
-
- default_query.group_by = []
-
- if not self._trends_display.should_aggregate_values() and not is_actors_query:
- default_query.select.append(day_start)
- default_query.group_by.append(ast.Field(chain=["day_start"]))
-
- # TODO: Move this logic into the below branches when working on adding breakdown support for the person modal
- if is_actors_query:
- default_query.select = [ast.Alias(alias="person_id", expr=ast.Field(chain=["e", "person_id"]))]
- default_query.distinct = True
- default_query.group_by = []
-
- # No breakdowns and no complex series aggregation
- if (
- not breakdown.enabled
- and not self._aggregation_operation.requires_query_orchestration()
- and not self._aggregation_operation.aggregating_on_session_duration()
- ) or no_modifications is True:
- return default_query
- # Both breakdowns and complex series aggregation
- elif breakdown.enabled and self._aggregation_operation.requires_query_orchestration():
- raise NotImplementedError(
- "Breakdowns and complex series aggregation are not supported for Data Warehouse queries"
- )
- # Breakdowns and session duration math property
- elif breakdown.enabled and self._aggregation_operation.aggregating_on_session_duration():
- raise NotImplementedError(
- "Breakdowns and session duration math property are not supported for Data Warehouse queries"
- )
- # Just breakdowns
- elif breakdown.enabled:
- if not is_actors_query:
- default_query.select.append(breakdown.column_expr())
- default_query.group_by.append(ast.Field(chain=["breakdown_value"]))
- # Just session duration math property
- elif self._aggregation_operation.aggregating_on_session_duration():
- raise NotImplementedError("Session duration math property is not supported for Data Warehouse queries")
- # Just complex series aggregation
- elif self._aggregation_operation.requires_query_orchestration():
- raise NotImplementedError("Complex series aggregation is not supported for Data Warehouse queries")
-
- return default_query
-
- def _outer_select_query(self, breakdown: Breakdown, inner_query: ast.SelectQuery) -> ast.SelectQuery:
- query = cast(
- ast.SelectQuery,
- parse_select(
- """
- SELECT
- groupArray(day_start) AS date,
- groupArray(count) AS total
- FROM {inner_query}
- """,
- placeholders={"inner_query": inner_query},
- ),
- )
-
- query = self._trends_display.modify_outer_query(
- outer_query=query,
- inner_query=inner_query,
- dates_queries=ast.SelectUnionQuery(
- select_queries=self._get_date_subqueries(ignore_breakdowns=True, breakdown=breakdown)
- ),
- )
-
- query.order_by = [ast.OrderExpr(expr=ast.Call(name="sum", args=[ast.Field(chain=["count"])]), order="DESC")]
-
- if breakdown.enabled:
- query.select.append(
- ast.Alias(
- alias="breakdown_value",
- expr=ast.Call(
- name="ifNull",
- args=[
- ast.Call(name="toString", args=[ast.Field(chain=["breakdown_value"])]),
- ast.Constant(value=""),
- ],
- ),
- )
- )
- query.group_by = [ast.Field(chain=["breakdown_value"])]
- query.order_by.append(ast.OrderExpr(expr=ast.Field(chain=["breakdown_value"]), order="ASC"))
-
- return query
-
- def _inner_select_query(
- self, breakdown: Breakdown, inner_query: ast.SelectQuery | ast.SelectUnionQuery
- ) -> ast.SelectQuery:
- query = cast(
- ast.SelectQuery,
- parse_select(
- """
- SELECT
- sum(total) AS count
- FROM {inner_query}
- """,
- placeholders={"inner_query": inner_query},
- ),
- )
-
- if (
- self.query.trendsFilter is not None
- and self.query.trendsFilter.smoothingIntervals is not None
- and self.query.trendsFilter.smoothingIntervals > 1
- ):
- rolling_average = ast.Alias(
- alias="count",
- expr=ast.Call(
- name="floor",
- args=[
- ast.WindowFunction(
- name="avg",
- args=[ast.Call(name="sum", args=[ast.Field(chain=["total"])])],
- over_expr=ast.WindowExpr(
- order_by=[ast.OrderExpr(expr=ast.Field(chain=["day_start"]), order="ASC")],
- frame_method="ROWS",
- frame_start=ast.WindowFrameExpr(
- frame_type="PRECEDING",
- frame_value=int(self.query.trendsFilter.smoothingIntervals - 1),
- ),
- frame_end=ast.WindowFrameExpr(frame_type="CURRENT ROW"),
- ),
- )
- ],
- ),
- )
- query.select = [rolling_average]
-
- query.group_by = []
- query.order_by = []
-
- if not self._trends_display.should_aggregate_values():
- query.select.append(ast.Field(chain=["day_start"]))
- query.group_by.append(ast.Field(chain=["day_start"]))
- query.order_by.append(ast.OrderExpr(expr=ast.Field(chain=["day_start"]), order="ASC"))
-
- if breakdown.enabled:
- query.select.append(ast.Field(chain=["breakdown_value"]))
- query.group_by.append(ast.Field(chain=["breakdown_value"]))
- query.order_by.append(ast.OrderExpr(expr=ast.Field(chain=["breakdown_value"]), order="ASC"))
-
- if self._trends_display.should_wrap_inner_query():
- query = self._trends_display.wrap_inner_query(query, breakdown.enabled)
- if breakdown.enabled:
- query.select.append(ast.Field(chain=["breakdown_value"]))
-
- return query
-
- def _events_filter(
- self,
- is_actors_query: bool,
- breakdown: Breakdown | None,
- ignore_breakdowns: bool = False,
- breakdown_values_override: Optional[str | int] = None,
- actors_query_time_frame: Optional[str | int] = None,
- ) -> ast.Expr:
- series = self.series
- filters: List[ast.Expr] = []
-
- filters.extend(
- [
- parse_expr(
- "{timestamp_field} >= {date_from_with_adjusted_start_of_interval}",
- placeholders={
- "timestamp_field": ast.Call(
- name="toDateTime", args=[ast.Field(chain=[self.series.timestamp_field])]
- ),
- **self.query_date_range.to_placeholders(),
- },
- ),
- parse_expr(
- "{timestamp_field} <= {date_to}",
- placeholders={
- "timestamp_field": ast.Call(
- name="toDateTime", args=[ast.Field(chain=[self.series.timestamp_field])]
- ),
- **self.query_date_range.to_placeholders(),
- },
- ),
- ]
- )
-
- # Properties
- if self.query.properties is not None and self.query.properties != []:
- filters.append(property_to_expr(self.query.properties, self.team))
-
- # Series Filters
- if series.properties is not None and series.properties != []:
- filters.append(property_to_expr(series.properties, self.team))
-
- # Breakdown
- if not ignore_breakdowns and breakdown is not None:
- if breakdown.enabled and not breakdown.is_histogram_breakdown:
- breakdown_filter = breakdown.events_where_filter()
- if breakdown_filter is not None:
- filters.append(breakdown_filter)
-
- if len(filters) == 0:
- return ast.Constant(value=True)
-
- return ast.And(exprs=filters)
-
- def _breakdown(self, is_actors_query: bool, breakdown_values_override: Optional[str | int] = None):
- return Breakdown(
- team=self.team,
- query=self.query,
- series=self.series,
- query_date_range=self.query_date_range,
- timings=self.timings,
- modifiers=self.modifiers,
- events_filter=self._events_filter(
- breakdown=None, # Passing in None because we know we dont actually need it
- ignore_breakdowns=True,
- is_actors_query=is_actors_query,
- breakdown_values_override=breakdown_values_override,
- ),
- breakdown_values_override=[breakdown_values_override] if breakdown_values_override is not None else None,
- )
-
- @cached_property
- def _aggregation_operation(self) -> AggregationOperations:
- if self.series.math is not None and self.series.math != "total":
- raise NotImplementedError("Math types other than total are not supported for Data Warehouse queries")
-
- return AggregationOperations(
- self.team, self.series, self.query_date_range, self._trends_display.should_aggregate_values()
- )
-
- @cached_property
- def _trends_display(self) -> TrendsDisplay:
- display = (
- self.query.trendsFilter.display
- if self.query.trendsFilter is not None and self.query.trendsFilter.display is not None
- else None
- )
- return TrendsDisplay(display)
-
- @cached_property
- def _table_expr(self) -> ast.Field:
- return ast.Field(chain=[self.series.table_name])
diff --git a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_data_warehouse_query_builder.ambr b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_data_warehouse_query_builder.ambr
index 7e60aa0003dca..47eda0a092ab7 100644
--- a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_data_warehouse_query_builder.ambr
+++ b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_data_warehouse_query_builder.ambr
@@ -1,18 +1,15 @@
# serializer version: 1
# name: TestDataWarehouseQueryBuilder.test_trends_breakdown
'''
- SELECT groupArray(value)
- FROM
- (SELECT e.prop_1 AS value,
- count(e.id) AS count
- FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', 'id String, prop_1 String, prop_2 String, created DateTime64(3, \'UTC\')') AS e
- WHERE and(and(ifNull(greaterOrEquals(toTimeZone(e.created, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toTimeZone(e.created, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0)), and(ifNull(greaterOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0)))
- GROUP BY value
- ORDER BY count DESC, value DESC
- LIMIT 25)
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ SELECT e.prop_1 AS value,
+ count(e.id) AS count
+ FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', 'id String, prop_1 String, prop_2 String, created DateTime64(3, \'UTC\')') AS e
+ WHERE and(and(ifNull(greaterOrEquals(toTimeZone(e.created, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toTimeZone(e.created, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0)), and(ifNull(greaterOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0)))
+ GROUP BY value
+ ORDER BY count DESC, value DESC
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestDataWarehouseQueryBuilder.test_trends_breakdown.1
@@ -35,14 +32,14 @@
CROSS JOIN
(SELECT breakdown_value
FROM
- (SELECT ['$$_posthog_breakdown_other_$$', 'd', 'c', 'b', 'a'] AS breakdown_value) ARRAY
+ (SELECT ['d', 'c', 'b', 'a'] AS breakdown_value) ARRAY
JOIN breakdown_value AS breakdown_value) AS sec
ORDER BY sec.breakdown_value ASC, day_start ASC
UNION ALL SELECT count(e.id) AS total,
toStartOfDay(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC')) AS day_start,
- transform(ifNull(e.prop_1, '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'd', 'c', 'b', 'a'], ['$$_posthog_breakdown_other_$$', 'd', 'c', 'b', 'a'], '$$_posthog_breakdown_other_$$') AS breakdown_value
+ transform(ifNull(e.prop_1, '$$_posthog_breakdown_null_$$'), ['d', 'c', 'b', 'a'], ['d', 'c', 'b', 'a'], '$$_posthog_breakdown_other_$$') AS breakdown_value
FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', 'id String, prop_1 String, prop_2 String, created DateTime64(3, \'UTC\')') AS e
- WHERE and(ifNull(greaterOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0), or(ifNull(equals(transform(ifNull(e.prop_1, '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'd', 'c', 'b', 'a'], ['$$_posthog_breakdown_other_$$', 'd', 'c', 'b', 'a'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), equals(e.prop_1, 'd'), equals(e.prop_1, 'c'), equals(e.prop_1, 'b'), equals(e.prop_1, 'a')))
+ WHERE and(ifNull(greaterOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0), or(equals(e.prop_1, 'd'), equals(e.prop_1, 'c'), equals(e.prop_1, 'b'), equals(e.prop_1, 'a')))
GROUP BY day_start,
breakdown_value)
GROUP BY day_start,
@@ -57,18 +54,15 @@
# ---
# name: TestDataWarehouseQueryBuilder.test_trends_breakdown_with_property
'''
- SELECT groupArray(value)
- FROM
- (SELECT e.prop_1 AS value,
- count(e.id) AS count
- FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', 'id String, prop_1 String, prop_2 String, created DateTime64(3, \'UTC\')') AS e
- WHERE and(and(ifNull(greaterOrEquals(toTimeZone(e.created, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toTimeZone(e.created, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0)), and(ifNull(greaterOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0), equals(e.prop_1, 'a')))
- GROUP BY value
- ORDER BY count DESC, value DESC
- LIMIT 25)
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ SELECT e.prop_1 AS value,
+ count(e.id) AS count
+ FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', 'id String, prop_1 String, prop_2 String, created DateTime64(3, \'UTC\')') AS e
+ WHERE and(and(ifNull(greaterOrEquals(toTimeZone(e.created, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toTimeZone(e.created, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0)), and(ifNull(greaterOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0), equals(e.prop_1, 'a')))
+ GROUP BY value
+ ORDER BY count DESC, value DESC
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestDataWarehouseQueryBuilder.test_trends_breakdown_with_property.1
@@ -91,14 +85,14 @@
CROSS JOIN
(SELECT breakdown_value
FROM
- (SELECT ['$$_posthog_breakdown_other_$$', 'a'] AS breakdown_value) ARRAY
+ (SELECT ['a'] AS breakdown_value) ARRAY
JOIN breakdown_value AS breakdown_value) AS sec
ORDER BY sec.breakdown_value ASC, day_start ASC
UNION ALL SELECT count(e.id) AS total,
toStartOfDay(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC')) AS day_start,
- transform(ifNull(e.prop_1, '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'a'], ['$$_posthog_breakdown_other_$$', 'a'], '$$_posthog_breakdown_other_$$') AS breakdown_value
+ transform(ifNull(e.prop_1, '$$_posthog_breakdown_null_$$'), ['a'], ['a'], '$$_posthog_breakdown_other_$$') AS breakdown_value
FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', 'id String, prop_1 String, prop_2 String, created DateTime64(3, \'UTC\')') AS e
- WHERE and(ifNull(greaterOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0), equals(e.prop_1, 'a'), or(ifNull(equals(transform(ifNull(e.prop_1, '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'a'], ['$$_posthog_breakdown_other_$$', 'a'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), equals(e.prop_1, 'a')))
+ WHERE and(ifNull(greaterOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0), equals(e.prop_1, 'a'), equals(e.prop_1, 'a'))
GROUP BY day_start,
breakdown_value)
GROUP BY day_start,
@@ -163,32 +157,6 @@
allow_experimental_object_type=1
'''
# ---
-# name: TestDataWarehouseQueryBuilder.test_trends_other_property_invalid
- '''
- SELECT groupArray(day_start) AS date,
- groupArray(count) AS total
- FROM
- (SELECT sum(total) AS count,
- day_start AS day_start
- FROM
- (SELECT 0 AS total,
- minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start
- FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0)) AS numbers
- UNION ALL SELECT 0 AS total,
- toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC'))) AS day_start
- UNION ALL SELECT count(e.id) AS total,
- toStartOfDay(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC')) AS day_start
- FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', 'id String, prop_1 String, prop_2 String, created DateTime64(3, \'UTC\')') AS e
- WHERE and(ifNull(greaterOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0), true)
- GROUP BY day_start)
- GROUP BY day_start
- ORDER BY day_start ASC)
- ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
- '''
-# ---
# name: TestDataWarehouseQueryBuilder.test_trends_property
'''
SELECT groupArray(day_start) AS date,
diff --git a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr
index 2f1c4c1de0917..f6eb3748afb2b 100644
--- a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr
+++ b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr
@@ -93,9 +93,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_action_filtering_with_cohort_poe_v2
@@ -180,33 +180,30 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_breakdown_by_group_props_person_on_events
'''
- SELECT groupArray(value)
- FROM
- (SELECT e__group_0.properties___industry AS value,
- count(e.uuid) AS count
- FROM events AS e
- LEFT JOIN
- (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry,
- groups.group_type_index AS index,
- groups.group_key AS key
- FROM groups
- WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0))
- GROUP BY groups.group_type_index,
- groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
- WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')))
- GROUP BY value
- ORDER BY count DESC, value DESC
- LIMIT 25)
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ SELECT e__group_0.properties___industry AS value,
+ count(e.uuid) AS count
+ FROM events AS e
+ LEFT JOIN
+ (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0))
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
+ WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')))
+ GROUP BY value
+ ORDER BY count DESC, value DESC
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_breakdown_by_group_props_person_on_events.1
@@ -229,12 +226,12 @@
CROSS JOIN
(SELECT breakdown_value
FROM
- (SELECT ['$$_posthog_breakdown_other_$$', 'finance', 'technology'] AS breakdown_value) ARRAY
+ (SELECT ['finance', 'technology'] AS breakdown_value) ARRAY
JOIN breakdown_value AS breakdown_value) AS sec
ORDER BY sec.breakdown_value ASC, day_start ASC
UNION ALL SELECT count(e.uuid) AS total,
toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start,
- transform(ifNull(e__group_0.properties___industry, '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'finance', 'technology'], ['$$_posthog_breakdown_other_$$', 'finance', 'technology'], '$$_posthog_breakdown_other_$$') AS breakdown_value
+ transform(ifNull(e__group_0.properties___industry, '$$_posthog_breakdown_null_$$'), ['finance', 'technology'], ['finance', 'technology'], '$$_posthog_breakdown_other_$$') AS breakdown_value
FROM events AS e SAMPLE 1
LEFT JOIN
(SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry,
@@ -244,7 +241,7 @@
WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0))
GROUP BY groups.group_type_index,
groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
- WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(transform(ifNull(e__group_0.properties___industry, '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'finance', 'technology'], ['$$_posthog_breakdown_other_$$', 'finance', 'technology'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(e__group_0.properties___industry, 'finance'), 0), ifNull(equals(e__group_0.properties___industry, 'technology'), 0)))
+ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(e__group_0.properties___industry, 'finance'), 0), ifNull(equals(e__group_0.properties___industry, 'technology'), 0)))
GROUP BY day_start,
breakdown_value)
GROUP BY day_start,
@@ -252,9 +249,9 @@
ORDER BY day_start ASC, breakdown_value ASC)
GROUP BY breakdown_value
ORDER BY sum(count) DESC, breakdown_value ASC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_breakdown_by_group_props_person_on_events.2
@@ -290,26 +287,23 @@
# ---
# name: TestTrends.test_breakdown_by_group_props_with_person_filter_person_on_events
'''
- SELECT groupArray(value)
- FROM
- (SELECT e__group_0.properties___industry AS value,
- count(e.uuid) AS count
- FROM events AS e
- LEFT JOIN
- (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry,
- groups.group_type_index AS index,
- groups.group_key AS key
- FROM groups
- WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0))
- GROUP BY groups.group_type_index,
- groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
- WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, 'key'), ''), 'null'), '^"|"$', ''), 'value'), 0)))
- GROUP BY value
- ORDER BY count DESC, value DESC
- LIMIT 25)
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ SELECT e__group_0.properties___industry AS value,
+ count(e.uuid) AS count
+ FROM events AS e
+ LEFT JOIN
+ (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0))
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
+ WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, 'key'), ''), 'null'), '^"|"$', ''), 'value'), 0)))
+ GROUP BY value
+ ORDER BY count DESC, value DESC
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_breakdown_by_group_props_with_person_filter_person_on_events.1
@@ -332,12 +326,12 @@
CROSS JOIN
(SELECT breakdown_value
FROM
- (SELECT ['$$_posthog_breakdown_other_$$', 'finance'] AS breakdown_value) ARRAY
+ (SELECT ['finance'] AS breakdown_value) ARRAY
JOIN breakdown_value AS breakdown_value) AS sec
ORDER BY sec.breakdown_value ASC, day_start ASC
UNION ALL SELECT count(e.uuid) AS total,
toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start,
- transform(ifNull(e__group_0.properties___industry, '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'finance'], ['$$_posthog_breakdown_other_$$', 'finance'], '$$_posthog_breakdown_other_$$') AS breakdown_value
+ transform(ifNull(e__group_0.properties___industry, '$$_posthog_breakdown_null_$$'), ['finance'], ['finance'], '$$_posthog_breakdown_other_$$') AS breakdown_value
FROM events AS e SAMPLE 1
LEFT JOIN
(SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry,
@@ -347,7 +341,7 @@
WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0))
GROUP BY groups.group_type_index,
groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
- WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, 'key'), ''), 'null'), '^"|"$', ''), 'value'), 0), or(ifNull(equals(transform(ifNull(e__group_0.properties___industry, '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'finance'], ['$$_posthog_breakdown_other_$$', 'finance'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(e__group_0.properties___industry, 'finance'), 0)))
+ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, 'key'), ''), 'null'), '^"|"$', ''), 'value'), 0), ifNull(equals(e__group_0.properties___industry, 'finance'), 0))
GROUP BY day_start,
breakdown_value)
GROUP BY day_start,
@@ -355,25 +349,22 @@
ORDER BY day_start ASC, breakdown_value ASC)
GROUP BY breakdown_value
ORDER BY sum(count) DESC, breakdown_value ASC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_breakdown_filtering_with_properties_in_new_format
'''
- SELECT groupArray(value)
- FROM
- (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', '') AS value,
- count(e.uuid) AS count
- FROM events AS e
- WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), 'Firefox'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Windows'), 0)), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0)))
- GROUP BY value
- ORDER BY count DESC, value DESC
- LIMIT 25)
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', '') AS value,
+ count(e.uuid) AS count
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), 'Firefox'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Windows'), 0)), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0)))
+ GROUP BY value
+ ORDER BY count DESC, value DESC
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_breakdown_filtering_with_properties_in_new_format.1
@@ -396,14 +387,14 @@
CROSS JOIN
(SELECT breakdown_value
FROM
- (SELECT ['$$_posthog_breakdown_other_$$', 'second url'] AS breakdown_value) ARRAY
+ (SELECT ['second url'] AS breakdown_value) ARRAY
JOIN breakdown_value AS breakdown_value) AS sec
ORDER BY sec.breakdown_value ASC, day_start ASC
UNION ALL SELECT count(e.uuid) AS total,
toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start,
- transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'second url'], ['$$_posthog_breakdown_other_$$', 'second url'], '$$_posthog_breakdown_other_$$') AS breakdown_value
+ transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['second url'], ['second url'], '$$_posthog_breakdown_other_$$') AS breakdown_value
FROM events AS e SAMPLE 1
- WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), 'Firefox'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Windows'), 0)), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0), or(ifNull(equals(transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'second url'], ['$$_posthog_breakdown_other_$$', 'second url'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', ''), 'second url'), 0)))
+ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), 'Firefox'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Windows'), 0)), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', ''), 'second url'), 0))
GROUP BY day_start,
breakdown_value)
GROUP BY day_start,
@@ -411,88 +402,68 @@
ORDER BY day_start ASC, breakdown_value ASC)
GROUP BY breakdown_value
ORDER BY sum(count) DESC, breakdown_value ASC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_breakdown_filtering_with_properties_in_new_format.2
'''
- SELECT groupArray(value)
- FROM
- (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', '') AS value,
- count(e.uuid) AS count
- FROM events AS e
- WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), 'Firefox'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Windows'), 0)), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0)))
- GROUP BY value
- ORDER BY count DESC, value DESC
- LIMIT 25)
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', '') AS value,
+ count(e.uuid) AS count
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), 'Firefox'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Windows'), 0)), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0)))
+ GROUP BY value
+ ORDER BY count DESC, value DESC
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_breakdown_filtering_with_properties_in_new_format.3
'''
SELECT groupArray(day_start) AS date,
- groupArray(count) AS total,
- ifNull(toString(breakdown_value), '') AS breakdown_value
+ groupArray(count) AS total
FROM
(SELECT sum(total) AS count,
- day_start AS day_start,
- breakdown_value AS breakdown_value
+ day_start AS day_start
FROM
(SELECT 0 AS total,
- ticks.day_start AS day_start,
- sec.breakdown_value AS breakdown_value
- FROM
- (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start
- FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0)) AS numbers
- UNION ALL SELECT toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC'))) AS day_start) AS ticks
- CROSS JOIN
- (SELECT breakdown_value
- FROM
- (SELECT [NULL] AS breakdown_value) ARRAY
- JOIN breakdown_value AS breakdown_value) AS sec
- ORDER BY sec.breakdown_value ASC, day_start ASC
+ minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start
+ FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), 0)) AS numbers
+ UNION ALL SELECT 0 AS total,
+ toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC'))) AS day_start
UNION ALL SELECT count(e.uuid) AS total,
- toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start,
- replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', '') AS breakdown_value
+ toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start
FROM events AS e SAMPLE 1
- WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), 'Firefox'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Windows'), 0)), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0), isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$current_url'), ''), 'null'), '^"|"$', '')))
- GROUP BY day_start,
- breakdown_value)
- GROUP BY day_start,
- breakdown_value
- ORDER BY day_start ASC, breakdown_value ASC)
- GROUP BY breakdown_value
- ORDER BY sum(count) DESC, breakdown_value ASC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-22 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$browser'), ''), 'null'), '^"|"$', ''), 'Firefox'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Windows'), 0)), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0))
+ GROUP BY day_start)
+ GROUP BY day_start
+ ORDER BY day_start ASC)
+ ORDER BY sum(count) DESC
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_breakdown_weekly_active_users_aggregated
'''
- SELECT groupArray(value)
- FROM
- (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '') AS value,
- count(DISTINCT e__pdi.person_id) AS count
- FROM events AS e
- INNER JOIN
- (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
- person_distinct_id2.distinct_id AS distinct_id
- FROM person_distinct_id2
- WHERE equals(person_distinct_id2.team_id, 2)
- GROUP BY person_distinct_id2.distinct_id
- HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
- WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC')))), equals(e.event, '$pageview'))
- GROUP BY value
- ORDER BY count DESC, value DESC
- LIMIT 25)
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '') AS value,
+ count(DISTINCT e__pdi.person_id) AS count
+ FROM events AS e
+ INNER JOIN
+ (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
+ person_distinct_id2.distinct_id AS distinct_id
+ FROM person_distinct_id2
+ WHERE equals(person_distinct_id2.team_id, 2)
+ GROUP BY person_distinct_id2.distinct_id
+ HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
+ WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC')))), equals(e.event, '$pageview'))
+ GROUP BY value
+ ORDER BY count DESC, value DESC
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_breakdown_weekly_active_users_aggregated.1
@@ -509,7 +480,7 @@
CROSS JOIN
(SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
e__pdi.person_id AS actor_id,
- transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'val', 'bor'], ['$$_posthog_breakdown_other_$$', 'val', 'bor'], '$$_posthog_breakdown_other_$$') AS breakdown_value
+ transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['val', 'bor'], ['val', 'bor'], '$$_posthog_breakdown_other_$$') AS breakdown_value
FROM events AS e SAMPLE 1
INNER JOIN
(SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
@@ -518,7 +489,7 @@
WHERE equals(person_distinct_id2.team_id, 2)
GROUP BY person_distinct_id2.distinct_id
HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
- WHERE and(equals(e.team_id, 2), and(equals(e.event, '$pageview'), or(ifNull(equals(transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'val', 'bor'], ['$$_posthog_breakdown_other_$$', 'val', 'bor'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'bor'), 0))), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0))
+ WHERE and(equals(e.team_id, 2), and(equals(e.event, '$pageview'), or(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'bor'), 0))), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0))
GROUP BY timestamp, actor_id,
breakdown_value) AS e
WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0))
@@ -528,32 +499,29 @@
ORDER BY d.timestamp ASC)
WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0))
GROUP BY breakdown_value
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_breakdown_weekly_active_users_aggregated_materialized
'''
- SELECT groupArray(value)
- FROM
- (SELECT nullIf(nullIf(e.mat_key, ''), 'null') AS value,
- count(DISTINCT e__pdi.person_id) AS count
- FROM events AS e
- INNER JOIN
- (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
- person_distinct_id2.distinct_id AS distinct_id
- FROM person_distinct_id2
- WHERE equals(person_distinct_id2.team_id, 2)
- GROUP BY person_distinct_id2.distinct_id
- HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
- WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC')))), equals(e.event, '$pageview'))
- GROUP BY value
- ORDER BY count DESC, value DESC
- LIMIT 25)
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ SELECT nullIf(nullIf(e.mat_key, ''), 'null') AS value,
+ count(DISTINCT e__pdi.person_id) AS count
+ FROM events AS e
+ INNER JOIN
+ (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
+ person_distinct_id2.distinct_id AS distinct_id
+ FROM person_distinct_id2
+ WHERE equals(person_distinct_id2.team_id, 2)
+ GROUP BY person_distinct_id2.distinct_id
+ HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
+ WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC')))), equals(e.event, '$pageview'))
+ GROUP BY value
+ ORDER BY count DESC, value DESC
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_breakdown_weekly_active_users_aggregated_materialized.1
@@ -570,7 +538,7 @@
CROSS JOIN
(SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
e__pdi.person_id AS actor_id,
- transform(ifNull(nullIf(nullIf(e.mat_key, ''), 'null'), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'val', 'bor'], ['$$_posthog_breakdown_other_$$', 'val', 'bor'], '$$_posthog_breakdown_other_$$') AS breakdown_value
+ transform(ifNull(nullIf(nullIf(e.mat_key, ''), 'null'), '$$_posthog_breakdown_null_$$'), ['val', 'bor'], ['val', 'bor'], '$$_posthog_breakdown_other_$$') AS breakdown_value
FROM events AS e SAMPLE 1
INNER JOIN
(SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
@@ -579,7 +547,7 @@
WHERE equals(person_distinct_id2.team_id, 2)
GROUP BY person_distinct_id2.distinct_id
HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
- WHERE and(equals(e.team_id, 2), and(equals(e.event, '$pageview'), or(ifNull(equals(transform(ifNull(nullIf(nullIf(e.mat_key, ''), 'null'), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'val', 'bor'], ['$$_posthog_breakdown_other_$$', 'val', 'bor'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(nullIf(nullIf(e.mat_key, ''), 'null'), 'val'), 0), ifNull(equals(nullIf(nullIf(e.mat_key, ''), 'null'), 'bor'), 0))), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0))
+ WHERE and(equals(e.team_id, 2), and(equals(e.event, '$pageview'), or(ifNull(equals(nullIf(nullIf(e.mat_key, ''), 'null'), 'val'), 0), ifNull(equals(nullIf(nullIf(e.mat_key, ''), 'null'), 'bor'), 0))), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0))
GROUP BY timestamp, actor_id,
breakdown_value) AS e
WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0))
@@ -589,9 +557,9 @@
ORDER BY d.timestamp ASC)
WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 23:59:59', 6, 'UTC'))), 0))
GROUP BY breakdown_value
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_breakdown_weekly_active_users_daily_based_on_action
@@ -616,41 +584,38 @@
# ---
# name: TestTrends.test_breakdown_weekly_active_users_daily_based_on_action.2
'''
- SELECT groupArray(value)
- FROM
- (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '') AS value,
- count(DISTINCT e__pdi.person_id) AS count
- FROM events AS e
- INNER JOIN
- (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
- argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id,
- person_distinct_id2.distinct_id AS distinct_id
- FROM person_distinct_id2
- WHERE equals(person_distinct_id2.team_id, 2)
- GROUP BY person_distinct_id2.distinct_id
- HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
- INNER JOIN
- (SELECT person.id AS id,
- replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'name'), ''), 'null'), '^"|"$', '') AS properties___name
- FROM person
- WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version),
- (SELECT person.id AS id, max(person.version) AS version
- FROM person
- WHERE equals(person.team_id, 2)
- GROUP BY person.id
- HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id)
- WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC')))), and(equals(e.event, '$pageview'), and(or(ifNull(equals(e__pdi__person.properties___name, 'p1'), 0), ifNull(equals(e__pdi__person.properties___name, 'p2'), 0), ifNull(equals(e__pdi__person.properties___name, 'p3'), 0)), ifNull(in(e__pdi.person_id,
- (SELECT cohortpeople.person_id AS person_id
- FROM cohortpeople
- WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2))
- GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version
- HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))))
- GROUP BY value
- ORDER BY count DESC, value DESC
- LIMIT 25)
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '') AS value,
+ count(DISTINCT e__pdi.person_id) AS count
+ FROM events AS e
+ INNER JOIN
+ (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
+ argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id,
+ person_distinct_id2.distinct_id AS distinct_id
+ FROM person_distinct_id2
+ WHERE equals(person_distinct_id2.team_id, 2)
+ GROUP BY person_distinct_id2.distinct_id
+ HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
+ INNER JOIN
+ (SELECT person.id AS id,
+ replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'name'), ''), 'null'), '^"|"$', '') AS properties___name
+ FROM person
+ WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version),
+ (SELECT person.id AS id, max(person.version) AS version
+ FROM person
+ WHERE equals(person.team_id, 2)
+ GROUP BY person.id
+ HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id)
+ WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC')))), and(equals(e.event, '$pageview'), and(or(ifNull(equals(e__pdi__person.properties___name, 'p1'), 0), ifNull(equals(e__pdi__person.properties___name, 'p2'), 0), ifNull(equals(e__pdi__person.properties___name, 'p3'), 0)), ifNull(in(e__pdi.person_id,
+ (SELECT cohortpeople.person_id AS person_id
+ FROM cohortpeople
+ WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2))
+ GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version
+ HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))))
+ GROUP BY value
+ ORDER BY count DESC, value DESC
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_breakdown_weekly_active_users_daily_based_on_action.3
@@ -673,7 +638,7 @@
CROSS JOIN
(SELECT breakdown_value
FROM
- (SELECT ['$$_posthog_breakdown_other_$$', 'val'] AS breakdown_value) ARRAY
+ (SELECT ['val'] AS breakdown_value) ARRAY
JOIN breakdown_value AS breakdown_value) AS sec
ORDER BY sec.breakdown_value ASC, day_start ASC
UNION ALL SELECT counts AS total,
@@ -689,7 +654,7 @@
CROSS JOIN
(SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
e__pdi.person_id AS actor_id,
- transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'val'], ['$$_posthog_breakdown_other_$$', 'val'], '$$_posthog_breakdown_other_$$') AS breakdown_value
+ transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['val'], ['val'], '$$_posthog_breakdown_other_$$') AS breakdown_value
FROM events AS e SAMPLE 1
INNER JOIN
(SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
@@ -714,7 +679,7 @@
FROM cohortpeople
WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2))
GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version
- HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))), or(ifNull(equals(transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'val'], ['$$_posthog_breakdown_other_$$', 'val'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0))), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0))
+ HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0)), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0))
GROUP BY timestamp, actor_id,
breakdown_value) AS e
WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0))
@@ -727,33 +692,30 @@
ORDER BY day_start ASC, breakdown_value ASC)
GROUP BY breakdown_value
ORDER BY sum(count) DESC, breakdown_value ASC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_breakdown_with_filter_groups_person_on_events
'''
- SELECT groupArray(value)
- FROM
- (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '') AS value,
- count(e.uuid) AS count
- FROM events AS e
- LEFT JOIN
- (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry,
- groups.group_type_index AS index,
- groups.group_key AS key
- FROM groups
- WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0))
- GROUP BY groups.group_type_index,
- groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
- WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(equals(e__group_0.properties___industry, 'finance'), 0)))
- GROUP BY value
- ORDER BY count DESC, value DESC
- LIMIT 25)
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '') AS value,
+ count(e.uuid) AS count
+ FROM events AS e
+ LEFT JOIN
+ (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0))
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
+ WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(equals(e__group_0.properties___industry, 'finance'), 0)))
+ GROUP BY value
+ ORDER BY count DESC, value DESC
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_breakdown_with_filter_groups_person_on_events.1
@@ -776,12 +738,12 @@
CROSS JOIN
(SELECT breakdown_value
FROM
- (SELECT ['$$_posthog_breakdown_other_$$', 'uh', 'oh'] AS breakdown_value) ARRAY
+ (SELECT ['uh', 'oh'] AS breakdown_value) ARRAY
JOIN breakdown_value AS breakdown_value) AS sec
ORDER BY sec.breakdown_value ASC, day_start ASC
UNION ALL SELECT count(e.uuid) AS total,
toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start,
- transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'uh', 'oh'], ['$$_posthog_breakdown_other_$$', 'uh', 'oh'], '$$_posthog_breakdown_other_$$') AS breakdown_value
+ transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['uh', 'oh'], ['uh', 'oh'], '$$_posthog_breakdown_other_$$') AS breakdown_value
FROM events AS e SAMPLE 1
LEFT JOIN
(SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry,
@@ -791,7 +753,7 @@
WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0))
GROUP BY groups.group_type_index,
groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
- WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(equals(e__group_0.properties___industry, 'finance'), 0), or(ifNull(equals(transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'uh', 'oh'], ['$$_posthog_breakdown_other_$$', 'uh', 'oh'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'uh'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'oh'), 0)))
+ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(equals(e__group_0.properties___industry, 'finance'), 0), or(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'uh'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'oh'), 0)))
GROUP BY day_start,
breakdown_value)
GROUP BY day_start,
@@ -799,9 +761,9 @@
ORDER BY day_start ASC, breakdown_value ASC)
GROUP BY breakdown_value
ORDER BY sum(count) DESC, breakdown_value ASC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_breakdown_with_filter_groups_person_on_events_v2
@@ -820,26 +782,23 @@
# ---
# name: TestTrends.test_breakdown_with_filter_groups_person_on_events_v2.1
'''
- SELECT groupArray(value)
- FROM
- (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '') AS value,
- count(e.uuid) AS count
- FROM events AS e
- LEFT JOIN
- (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry,
- groups.group_type_index AS index,
- groups.group_key AS key
- FROM groups
- WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0))
- GROUP BY groups.group_type_index,
- groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
- WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(equals(e__group_0.properties___industry, 'finance'), 0)))
- GROUP BY value
- ORDER BY count DESC, value DESC
- LIMIT 25)
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', '') AS value,
+ count(e.uuid) AS count
+ FROM events AS e
+ LEFT JOIN
+ (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), groups._timestamp) AS properties___industry,
+ groups.group_type_index AS index,
+ groups.group_key AS key
+ FROM groups
+ WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0))
+ GROUP BY groups.group_type_index,
+ groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
+ WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(equals(e__group_0.properties___industry, 'finance'), 0)))
+ GROUP BY value
+ ORDER BY count DESC, value DESC
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_breakdown_with_filter_groups_person_on_events_v2.2
@@ -862,12 +821,12 @@
CROSS JOIN
(SELECT breakdown_value
FROM
- (SELECT ['$$_posthog_breakdown_other_$$', 'uh', 'oh'] AS breakdown_value) ARRAY
+ (SELECT ['uh', 'oh'] AS breakdown_value) ARRAY
JOIN breakdown_value AS breakdown_value) AS sec
ORDER BY sec.breakdown_value ASC, day_start ASC
UNION ALL SELECT count(DISTINCT ifNull(nullIf(e__override.override_person_id, '00000000-0000-0000-0000-000000000000'), e.person_id)) AS total,
toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start,
- transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'uh', 'oh'], ['$$_posthog_breakdown_other_$$', 'uh', 'oh'], '$$_posthog_breakdown_other_$$') AS breakdown_value
+ transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['uh', 'oh'], ['uh', 'oh'], '$$_posthog_breakdown_other_$$') AS breakdown_value
FROM events AS e SAMPLE 1
LEFT OUTER JOIN
(SELECT argMax(person_overrides.override_person_id, person_overrides.version) AS override_person_id,
@@ -883,7 +842,7 @@
WHERE and(equals(groups.team_id, 2), ifNull(equals(index, 0), 0))
GROUP BY groups.group_type_index,
groups.group_key) AS e__group_0 ON equals(e.`$group_0`, e__group_0.key)
- WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(equals(e__group_0.properties___industry, 'finance'), 0), or(ifNull(equals(transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'uh', 'oh'], ['$$_posthog_breakdown_other_$$', 'uh', 'oh'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'uh'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'oh'), 0)))
+ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(equals(e__group_0.properties___industry, 'finance'), 0), or(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'uh'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'oh'), 0)))
GROUP BY day_start,
breakdown_value)
GROUP BY day_start,
@@ -891,25 +850,22 @@
ORDER BY day_start ASC, breakdown_value ASC)
GROUP BY breakdown_value
ORDER BY sum(count) DESC, breakdown_value ASC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_dau_with_breakdown_filtering_with_sampling
'''
- SELECT groupArray(value)
- FROM
- (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value,
- count(e.uuid) AS count
- FROM events AS e
- WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')))
- GROUP BY value
- ORDER BY count DESC, value DESC
- LIMIT 25)
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value,
+ count(e.uuid) AS count
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')))
+ GROUP BY value
+ ORDER BY count DESC, value DESC
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_dau_with_breakdown_filtering_with_sampling.1
@@ -932,12 +888,12 @@
CROSS JOIN
(SELECT breakdown_value
FROM
- (SELECT ['$$_posthog_breakdown_other_$$', 'other_value', 'value'] AS breakdown_value) ARRAY
+ (SELECT ['other_value', '$$_posthog_breakdown_null_$$', 'value'] AS breakdown_value) ARRAY
JOIN breakdown_value AS breakdown_value) AS sec
ORDER BY sec.breakdown_value ASC, day_start ASC
UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total,
toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start,
- transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'other_value', 'value'], ['$$_posthog_breakdown_other_$$', 'other_value', 'value'], '$$_posthog_breakdown_other_$$') AS breakdown_value
+ transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['other_value', '$$_posthog_breakdown_null_$$', 'value'], ['other_value', '$$_posthog_breakdown_null_$$', 'value'], '$$_posthog_breakdown_other_$$') AS breakdown_value
FROM events AS e SAMPLE 1.0
INNER JOIN
(SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
@@ -946,7 +902,7 @@
WHERE equals(person_distinct_id2.team_id, 2)
GROUP BY person_distinct_id2.distinct_id
HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
- WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'other_value', 'value'], ['$$_posthog_breakdown_other_$$', 'other_value', 'value'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0)))
+ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0), isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0)))
GROUP BY day_start,
breakdown_value)
GROUP BY day_start,
@@ -954,25 +910,22 @@
ORDER BY day_start ASC, breakdown_value ASC)
GROUP BY breakdown_value
ORDER BY sum(count) DESC, breakdown_value ASC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_dau_with_breakdown_filtering_with_sampling.2
'''
- SELECT groupArray(value)
- FROM
- (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value,
- count(e.uuid) AS count
- FROM events AS e
- WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')))
- GROUP BY value
- ORDER BY count DESC, value DESC
- LIMIT 25)
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value,
+ count(e.uuid) AS count
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')))
+ GROUP BY value
+ ORDER BY count DESC, value DESC
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_dau_with_breakdown_filtering_with_sampling.3
@@ -995,12 +948,12 @@
CROSS JOIN
(SELECT breakdown_value
FROM
- (SELECT ['$$_posthog_breakdown_other_$$', 'other_value', 'value'] AS breakdown_value) ARRAY
+ (SELECT ['other_value', '$$_posthog_breakdown_null_$$', 'value'] AS breakdown_value) ARRAY
JOIN breakdown_value AS breakdown_value) AS sec
ORDER BY sec.breakdown_value ASC, day_start ASC
UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total,
toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start,
- transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'other_value', 'value'], ['$$_posthog_breakdown_other_$$', 'other_value', 'value'], '$$_posthog_breakdown_other_$$') AS breakdown_value
+ transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['other_value', '$$_posthog_breakdown_null_$$', 'value'], ['other_value', '$$_posthog_breakdown_null_$$', 'value'], '$$_posthog_breakdown_other_$$') AS breakdown_value
FROM events AS e SAMPLE 1.0
INNER JOIN
(SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
@@ -1009,7 +962,7 @@
WHERE equals(person_distinct_id2.team_id, 2)
GROUP BY person_distinct_id2.distinct_id
HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
- WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'other_value', 'value'], ['$$_posthog_breakdown_other_$$', 'other_value', 'value'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0)))
+ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0), isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0)))
GROUP BY day_start,
breakdown_value)
GROUP BY day_start,
@@ -1017,9 +970,9 @@
ORDER BY day_start ASC, breakdown_value ASC)
GROUP BY breakdown_value
ORDER BY sum(count) DESC, breakdown_value ASC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_filter_events_by_precalculated_cohort
@@ -1101,9 +1054,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_filter_events_by_precalculated_cohort_poe_v2
@@ -1167,9 +1120,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_filtering_by_multiple_groups_person_on_events
@@ -1209,9 +1162,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_filtering_by_multiple_groups_person_on_events.1
@@ -1282,44 +1235,41 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_mau_with_breakdown_filtering_and_prop_filter
'''
- SELECT groupArray(value)
- FROM
- (SELECT e__pdi__person.`properties___$some_prop` AS value,
- count(DISTINCT e__pdi.person_id) AS count
- FROM events AS e
- INNER JOIN
- (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
- argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id,
- person_distinct_id2.distinct_id AS distinct_id
- FROM person_distinct_id2
- WHERE equals(person_distinct_id2.team_id, 2)
- GROUP BY person_distinct_id2.distinct_id
- HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
- INNER JOIN
- (SELECT person.id AS id,
- replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS `properties___$some_prop`,
- replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'filter_prop'), ''), 'null'), '^"|"$', '') AS properties___filter_prop
- FROM person
- WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version),
- (SELECT person.id AS id, max(person.version) AS version
- FROM person
- WHERE equals(person.team_id, 2)
- GROUP BY person.id
- HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id)
- WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(equals(e.event, 'sign up'), ifNull(equals(e__pdi__person.properties___filter_prop, 'filter_val'), 0)))
- GROUP BY value
- ORDER BY count DESC, value DESC
- LIMIT 25)
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ SELECT e__pdi__person.`properties___$some_prop` AS value,
+ count(DISTINCT e__pdi.person_id) AS count
+ FROM events AS e
+ INNER JOIN
+ (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
+ argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id,
+ person_distinct_id2.distinct_id AS distinct_id
+ FROM person_distinct_id2
+ WHERE equals(person_distinct_id2.team_id, 2)
+ GROUP BY person_distinct_id2.distinct_id
+ HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
+ INNER JOIN
+ (SELECT person.id AS id,
+ replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS `properties___$some_prop`,
+ replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'filter_prop'), ''), 'null'), '^"|"$', '') AS properties___filter_prop
+ FROM person
+ WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version),
+ (SELECT person.id AS id, max(person.version) AS version
+ FROM person
+ WHERE equals(person.team_id, 2)
+ GROUP BY person.id
+ HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id)
+ WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(equals(e.event, 'sign up'), ifNull(equals(e__pdi__person.properties___filter_prop, 'filter_val'), 0)))
+ GROUP BY value
+ ORDER BY count DESC, value DESC
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_mau_with_breakdown_filtering_and_prop_filter.1
@@ -1342,7 +1292,7 @@
CROSS JOIN
(SELECT breakdown_value
FROM
- (SELECT ['$$_posthog_breakdown_other_$$', 'some_val2', 'some_val'] AS breakdown_value) ARRAY
+ (SELECT ['some_val2', 'some_val'] AS breakdown_value) ARRAY
JOIN breakdown_value AS breakdown_value) AS sec
ORDER BY sec.breakdown_value ASC, day_start ASC
UNION ALL SELECT counts AS total,
@@ -1358,7 +1308,7 @@
CROSS JOIN
(SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
e__pdi.person_id AS actor_id,
- transform(ifNull(e__pdi__person.`properties___$some_prop`, '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'some_val2', 'some_val'], ['$$_posthog_breakdown_other_$$', 'some_val2', 'some_val'], '$$_posthog_breakdown_other_$$') AS breakdown_value
+ transform(ifNull(e__pdi__person.`properties___$some_prop`, '$$_posthog_breakdown_null_$$'), ['some_val2', 'some_val'], ['some_val2', 'some_val'], '$$_posthog_breakdown_other_$$') AS breakdown_value
FROM events AS e SAMPLE 1
INNER JOIN
(SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
@@ -1379,7 +1329,7 @@
WHERE equals(person.team_id, 2)
GROUP BY person.id
HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id)
- WHERE and(equals(e.team_id, 2), and(equals(e.event, 'sign up'), ifNull(equals(e__pdi__person.properties___filter_prop, 'filter_val'), 0), or(ifNull(equals(transform(ifNull(e__pdi__person.`properties___$some_prop`, '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'some_val2', 'some_val'], ['$$_posthog_breakdown_other_$$', 'some_val2', 'some_val'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(e__pdi__person.`properties___$some_prop`, 'some_val2'), 0), ifNull(equals(e__pdi__person.`properties___$some_prop`, 'some_val'), 0))), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), toIntervalDay(30))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0))
+ WHERE and(equals(e.team_id, 2), and(equals(e.event, 'sign up'), ifNull(equals(e__pdi__person.properties___filter_prop, 'filter_val'), 0), or(ifNull(equals(e__pdi__person.`properties___$some_prop`, 'some_val2'), 0), ifNull(equals(e__pdi__person.`properties___$some_prop`, 'some_val'), 0))), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), toIntervalDay(30))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0))
GROUP BY timestamp, actor_id,
breakdown_value) AS e
WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(29))), 0))
@@ -1392,31 +1342,28 @@
ORDER BY day_start ASC, breakdown_value ASC)
GROUP BY breakdown_value
ORDER BY sum(count) DESC, breakdown_value ASC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_mau_with_breakdown_filtering_and_prop_filter_poe_v2
'''
- SELECT groupArray(value)
- FROM
- (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS value,
- count(DISTINCT ifNull(nullIf(e__override.override_person_id, '00000000-0000-0000-0000-000000000000'), e.person_id)) AS count
- FROM events AS e
- LEFT OUTER JOIN
- (SELECT argMax(person_overrides.override_person_id, person_overrides.version) AS override_person_id,
- person_overrides.old_person_id AS old_person_id
- FROM person_overrides
- WHERE equals(person_overrides.team_id, 2)
- GROUP BY person_overrides.old_person_id) AS e__override ON equals(e.person_id, e__override.old_person_id)
- WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(equals(e.event, 'sign up'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, 'filter_prop'), ''), 'null'), '^"|"$', ''), 'filter_val'), 0)))
- GROUP BY value
- ORDER BY count DESC, value DESC
- LIMIT 25)
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS value,
+ count(DISTINCT ifNull(nullIf(e__override.override_person_id, '00000000-0000-0000-0000-000000000000'), e.person_id)) AS count
+ FROM events AS e
+ LEFT OUTER JOIN
+ (SELECT argMax(person_overrides.override_person_id, person_overrides.version) AS override_person_id,
+ person_overrides.old_person_id AS old_person_id
+ FROM person_overrides
+ WHERE equals(person_overrides.team_id, 2)
+ GROUP BY person_overrides.old_person_id) AS e__override ON equals(e.person_id, e__override.old_person_id)
+ WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(equals(e.event, 'sign up'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, 'filter_prop'), ''), 'null'), '^"|"$', ''), 'filter_val'), 0)))
+ GROUP BY value
+ ORDER BY count DESC, value DESC
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_mau_with_breakdown_filtering_and_prop_filter_poe_v2.1
@@ -1439,7 +1386,7 @@
CROSS JOIN
(SELECT breakdown_value
FROM
- (SELECT ['$$_posthog_breakdown_other_$$', 'some_val2', 'some_val'] AS breakdown_value) ARRAY
+ (SELECT ['some_val2', 'some_val'] AS breakdown_value) ARRAY
JOIN breakdown_value AS breakdown_value) AS sec
ORDER BY sec.breakdown_value ASC, day_start ASC
UNION ALL SELECT counts AS total,
@@ -1455,7 +1402,7 @@
CROSS JOIN
(SELECT toTimeZone(e.timestamp, 'UTC') AS timestamp,
ifNull(nullIf(e__override.override_person_id, '00000000-0000-0000-0000-000000000000'), e.person_id) AS actor_id,
- transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, '$some_prop'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'some_val2', 'some_val'], ['$$_posthog_breakdown_other_$$', 'some_val2', 'some_val'], '$$_posthog_breakdown_other_$$') AS breakdown_value
+ transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, '$some_prop'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['some_val2', 'some_val'], ['some_val2', 'some_val'], '$$_posthog_breakdown_other_$$') AS breakdown_value
FROM events AS e SAMPLE 1
LEFT OUTER JOIN
(SELECT argMax(person_overrides.override_person_id, person_overrides.version) AS override_person_id,
@@ -1463,7 +1410,7 @@
FROM person_overrides
WHERE equals(person_overrides.team_id, 2)
GROUP BY person_overrides.old_person_id) AS e__override ON equals(e.person_id, e__override.old_person_id)
- WHERE and(equals(e.team_id, 2), and(equals(e.event, 'sign up'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, 'filter_prop'), ''), 'null'), '^"|"$', ''), 'filter_val'), 0), or(ifNull(equals(transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, '$some_prop'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'some_val2', 'some_val'], ['$$_posthog_breakdown_other_$$', 'some_val2', 'some_val'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, '$some_prop'), ''), 'null'), '^"|"$', ''), 'some_val2'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, '$some_prop'), ''), 'null'), '^"|"$', ''), 'some_val'), 0))), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), toIntervalDay(30))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0))
+ WHERE and(equals(e.team_id, 2), and(equals(e.event, 'sign up'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, 'filter_prop'), ''), 'null'), '^"|"$', ''), 'filter_val'), 0), or(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, '$some_prop'), ''), 'null'), '^"|"$', ''), 'some_val2'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, '$some_prop'), ''), 'null'), '^"|"$', ''), 'some_val'), 0))), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), toIntervalDay(30))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0))
GROUP BY timestamp, actor_id,
breakdown_value) AS e
WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(29))), 0))
@@ -1476,9 +1423,9 @@
ORDER BY day_start ASC, breakdown_value ASC)
GROUP BY breakdown_value
ORDER BY sum(count) DESC, breakdown_value ASC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_non_deterministic_timezones
@@ -1502,9 +1449,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_person_filtering_in_cohort_in_action
@@ -1529,30 +1476,27 @@
# ---
# name: TestTrends.test_person_filtering_in_cohort_in_action.2
'''
- SELECT groupArray(value)
- FROM
- (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value,
- count(e.uuid) AS count
- FROM events AS e
- INNER JOIN
- (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
- person_distinct_id2.distinct_id AS distinct_id
- FROM person_distinct_id2
- WHERE equals(person_distinct_id2.team_id, 2)
- GROUP BY person_distinct_id2.distinct_id
- HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
- WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), ifNull(in(e__pdi.person_id,
- (SELECT cohortpeople.person_id AS person_id
- FROM cohortpeople
- WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2))
- GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version
- HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))))
- GROUP BY value
- ORDER BY count DESC, value DESC
- LIMIT 25)
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value,
+ count(e.uuid) AS count
+ FROM events AS e
+ INNER JOIN
+ (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
+ person_distinct_id2.distinct_id AS distinct_id
+ FROM person_distinct_id2
+ WHERE equals(person_distinct_id2.team_id, 2)
+ GROUP BY person_distinct_id2.distinct_id
+ HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
+ WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), ifNull(in(e__pdi.person_id,
+ (SELECT cohortpeople.person_id AS person_id
+ FROM cohortpeople
+ WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2))
+ GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version
+ HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))))
+ GROUP BY value
+ ORDER BY count DESC, value DESC
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_person_filtering_in_cohort_in_action.3
@@ -1575,12 +1519,12 @@
CROSS JOIN
(SELECT breakdown_value
FROM
- (SELECT ['$$_posthog_breakdown_other_$$', 'value', 'other_value'] AS breakdown_value) ARRAY
+ (SELECT ['$$_posthog_breakdown_null_$$', 'value', 'other_value'] AS breakdown_value) ARRAY
JOIN breakdown_value AS breakdown_value) AS sec
ORDER BY sec.breakdown_value ASC, day_start ASC
UNION ALL SELECT count(e.uuid) AS total,
toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start,
- transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'value', 'other_value'], ['$$_posthog_breakdown_other_$$', 'value', 'other_value'], '$$_posthog_breakdown_other_$$') AS breakdown_value
+ transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_null_$$', 'value', 'other_value'], ['$$_posthog_breakdown_null_$$', 'value', 'other_value'], '$$_posthog_breakdown_other_$$') AS breakdown_value
FROM events AS e SAMPLE 1
INNER JOIN
(SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
@@ -1594,7 +1538,7 @@
FROM cohortpeople
WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2))
GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version
- HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0)), or(ifNull(equals(transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'value', 'other_value'], ['$$_posthog_breakdown_other_$$', 'value', 'other_value'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0)))
+ HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0)), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0)))
GROUP BY day_start,
breakdown_value)
GROUP BY day_start,
@@ -1602,9 +1546,9 @@
ORDER BY day_start ASC, breakdown_value ASC)
GROUP BY breakdown_value
ORDER BY sum(count) DESC, breakdown_value ASC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_person_filtering_in_cohort_in_action_poe_v2
@@ -1629,29 +1573,26 @@
# ---
# name: TestTrends.test_person_filtering_in_cohort_in_action_poe_v2.2
'''
- SELECT groupArray(value)
- FROM
- (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value,
- count(e.uuid) AS count
- FROM events AS e
- LEFT OUTER JOIN
- (SELECT argMax(person_overrides.override_person_id, person_overrides.version) AS override_person_id,
- person_overrides.old_person_id AS old_person_id
- FROM person_overrides
- WHERE equals(person_overrides.team_id, 2)
- GROUP BY person_overrides.old_person_id) AS e__override ON equals(e.person_id, e__override.old_person_id)
- WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), ifNull(in(ifNull(nullIf(e__override.override_person_id, '00000000-0000-0000-0000-000000000000'), e.person_id),
- (SELECT cohortpeople.person_id AS person_id
- FROM cohortpeople
- WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2))
- GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version
- HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))))
- GROUP BY value
- ORDER BY count DESC, value DESC
- LIMIT 25)
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value,
+ count(e.uuid) AS count
+ FROM events AS e
+ LEFT OUTER JOIN
+ (SELECT argMax(person_overrides.override_person_id, person_overrides.version) AS override_person_id,
+ person_overrides.old_person_id AS old_person_id
+ FROM person_overrides
+ WHERE equals(person_overrides.team_id, 2)
+ GROUP BY person_overrides.old_person_id) AS e__override ON equals(e.person_id, e__override.old_person_id)
+ WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), ifNull(in(ifNull(nullIf(e__override.override_person_id, '00000000-0000-0000-0000-000000000000'), e.person_id),
+ (SELECT cohortpeople.person_id AS person_id
+ FROM cohortpeople
+ WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2))
+ GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version
+ HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))))
+ GROUP BY value
+ ORDER BY count DESC, value DESC
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_person_filtering_in_cohort_in_action_poe_v2.3
@@ -1674,12 +1615,12 @@
CROSS JOIN
(SELECT breakdown_value
FROM
- (SELECT ['$$_posthog_breakdown_other_$$', 'value', 'other_value'] AS breakdown_value) ARRAY
+ (SELECT ['$$_posthog_breakdown_null_$$', 'value', 'other_value'] AS breakdown_value) ARRAY
JOIN breakdown_value AS breakdown_value) AS sec
ORDER BY sec.breakdown_value ASC, day_start ASC
UNION ALL SELECT count(e.uuid) AS total,
toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start,
- transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'value', 'other_value'], ['$$_posthog_breakdown_other_$$', 'value', 'other_value'], '$$_posthog_breakdown_other_$$') AS breakdown_value
+ transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_null_$$', 'value', 'other_value'], ['$$_posthog_breakdown_null_$$', 'value', 'other_value'], '$$_posthog_breakdown_other_$$') AS breakdown_value
FROM events AS e SAMPLE 1
LEFT OUTER JOIN
(SELECT argMax(person_overrides.override_person_id, person_overrides.version) AS override_person_id,
@@ -1692,7 +1633,7 @@
FROM cohortpeople
WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 2))
GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version
- HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0)), or(ifNull(equals(transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'value', 'other_value'], ['$$_posthog_breakdown_other_$$', 'value', 'other_value'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0)))
+ HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0)), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0)))
GROUP BY day_start,
breakdown_value)
GROUP BY day_start,
@@ -1700,9 +1641,9 @@
ORDER BY day_start ASC, breakdown_value ASC)
GROUP BY breakdown_value
ORDER BY sum(count) DESC, breakdown_value ASC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_person_property_filtering
@@ -1744,9 +1685,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_person_property_filtering_clashing_with_event_property
@@ -1788,9 +1729,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_person_property_filtering_clashing_with_event_property.1
@@ -1814,9 +1755,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_person_property_filtering_clashing_with_event_property_materialized
@@ -1858,9 +1799,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_person_property_filtering_clashing_with_event_property_materialized.1
@@ -1884,9 +1825,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_person_property_filtering_materialized
@@ -1928,9 +1869,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_same_day_with_person_on_events_v2
@@ -1968,9 +1909,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_same_day_with_person_on_events_v2.2
@@ -2000,9 +1941,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_same_day_with_person_on_events_v2_latest_override
@@ -2046,9 +1987,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_same_day_with_person_on_events_v2_latest_override.2
@@ -2092,9 +2033,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_same_day_with_person_on_events_v2_latest_override.4
@@ -2138,9 +2079,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_timezones_daily
@@ -2164,9 +2105,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_timezones_daily.1
@@ -2197,9 +2138,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_timezones_daily.2
@@ -2243,9 +2184,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_timezones_daily.3
@@ -2269,25 +2210,22 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_timezones_daily.4
'''
- SELECT groupArray(value)
- FROM
- (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '') AS value,
- count(e.uuid) AS count
- FROM events AS e
- WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')))
- GROUP BY value
- ORDER BY count DESC, value DESC
- LIMIT 25)
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '') AS value,
+ count(e.uuid) AS count
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')))
+ GROUP BY value
+ ORDER BY count DESC, value DESC
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_timezones_daily.5
@@ -2310,12 +2248,12 @@
CROSS JOIN
(SELECT breakdown_value
FROM
- (SELECT ['$$_posthog_breakdown_other_$$', 'Mac'] AS breakdown_value) ARRAY
+ (SELECT ['Mac'] AS breakdown_value) ARRAY
JOIN breakdown_value AS breakdown_value) AS sec
ORDER BY sec.breakdown_value ASC, day_start ASC
UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total,
toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start,
- transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'Mac'], ['$$_posthog_breakdown_other_$$', 'Mac'], '$$_posthog_breakdown_other_$$') AS breakdown_value
+ transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['Mac'], ['Mac'], '$$_posthog_breakdown_other_$$') AS breakdown_value
FROM events AS e SAMPLE 1
INNER JOIN
(SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
@@ -2324,7 +2262,7 @@
WHERE equals(person_distinct_id2.team_id, 2)
GROUP BY person_distinct_id2.distinct_id
HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
- WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'Mac'], ['$$_posthog_breakdown_other_$$', 'Mac'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0)))
+ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0))
GROUP BY day_start,
breakdown_value)
GROUP BY day_start,
@@ -2332,9 +2270,9 @@
ORDER BY day_start ASC, breakdown_value ASC)
GROUP BY breakdown_value
ORDER BY sum(count) DESC, breakdown_value ASC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_timezones_daily_minus_utc
@@ -2358,9 +2296,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_timezones_daily_minus_utc.1
@@ -2391,9 +2329,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_timezones_daily_minus_utc.2
@@ -2437,9 +2375,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_timezones_daily_minus_utc.3
@@ -2463,25 +2401,22 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_timezones_daily_minus_utc.4
'''
- SELECT groupArray(value)
- FROM
- (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '') AS value,
- count(e.uuid) AS count
- FROM events AS e
- WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up')))
- GROUP BY value
- ORDER BY count DESC, value DESC
- LIMIT 25)
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '') AS value,
+ count(e.uuid) AS count
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up')))
+ GROUP BY value
+ ORDER BY count DESC, value DESC
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_timezones_daily_minus_utc.5
@@ -2504,12 +2439,12 @@
CROSS JOIN
(SELECT breakdown_value
FROM
- (SELECT ['$$_posthog_breakdown_other_$$', 'Mac'] AS breakdown_value) ARRAY
+ (SELECT ['Mac'] AS breakdown_value) ARRAY
JOIN breakdown_value AS breakdown_value) AS sec
ORDER BY sec.breakdown_value ASC, day_start ASC
UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total,
toStartOfDay(toTimeZone(e.timestamp, 'America/Phoenix')) AS day_start,
- transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'Mac'], ['$$_posthog_breakdown_other_$$', 'Mac'], '$$_posthog_breakdown_other_$$') AS breakdown_value
+ transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['Mac'], ['Mac'], '$$_posthog_breakdown_other_$$') AS breakdown_value
FROM events AS e SAMPLE 1
INNER JOIN
(SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
@@ -2518,7 +2453,7 @@
WHERE equals(person_distinct_id2.team_id, 2)
GROUP BY person_distinct_id2.distinct_id
HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
- WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up'), or(ifNull(equals(transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'Mac'], ['$$_posthog_breakdown_other_$$', 'Mac'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0)))
+ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'America/Phoenix')))), lessOrEquals(toTimeZone(e.timestamp, 'America/Phoenix'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'America/Phoenix'))), equals(e.event, 'sign up'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0))
GROUP BY day_start,
breakdown_value)
GROUP BY day_start,
@@ -2526,9 +2461,9 @@
ORDER BY day_start ASC, breakdown_value ASC)
GROUP BY breakdown_value
ORDER BY sum(count) DESC, breakdown_value ASC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_timezones_daily_plus_utc
@@ -2552,9 +2487,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_timezones_daily_plus_utc.1
@@ -2585,9 +2520,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_timezones_daily_plus_utc.2
@@ -2631,9 +2566,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_timezones_daily_plus_utc.3
@@ -2657,25 +2592,22 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_timezones_daily_plus_utc.4
'''
- SELECT groupArray(value)
- FROM
- (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '') AS value,
- count(e.uuid) AS count
- FROM events AS e
- WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up')))
- GROUP BY value
- ORDER BY count DESC, value DESC
- LIMIT 25)
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', '') AS value,
+ count(e.uuid) AS count
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up')))
+ GROUP BY value
+ ORDER BY count DESC, value DESC
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_timezones_daily_plus_utc.5
@@ -2698,12 +2630,12 @@
CROSS JOIN
(SELECT breakdown_value
FROM
- (SELECT ['$$_posthog_breakdown_other_$$', 'Mac'] AS breakdown_value) ARRAY
+ (SELECT ['Mac'] AS breakdown_value) ARRAY
JOIN breakdown_value AS breakdown_value) AS sec
ORDER BY sec.breakdown_value ASC, day_start ASC
UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total,
toStartOfDay(toTimeZone(e.timestamp, 'Asia/Tokyo')) AS day_start,
- transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'Mac'], ['$$_posthog_breakdown_other_$$', 'Mac'], '$$_posthog_breakdown_other_$$') AS breakdown_value
+ transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['Mac'], ['Mac'], '$$_posthog_breakdown_other_$$') AS breakdown_value
FROM events AS e SAMPLE 1
INNER JOIN
(SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
@@ -2712,7 +2644,7 @@
WHERE equals(person_distinct_id2.team_id, 2)
GROUP BY person_distinct_id2.distinct_id
HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
- WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up'), or(ifNull(equals(transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'Mac'], ['$$_posthog_breakdown_other_$$', 'Mac'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0)))
+ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-29 00:00:00', 6, 'Asia/Tokyo')))), lessOrEquals(toTimeZone(e.timestamp, 'Asia/Tokyo'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-05 23:59:59', 6, 'Asia/Tokyo'))), equals(e.event, 'sign up'), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$os'), ''), 'null'), '^"|"$', ''), 'Mac'), 0))
GROUP BY day_start,
breakdown_value)
GROUP BY day_start,
@@ -2720,9 +2652,9 @@
ORDER BY day_start ASC, breakdown_value ASC)
GROUP BY breakdown_value
ORDER BY sum(count) DESC, breakdown_value ASC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_timezones_hourly_relative_from
@@ -2753,9 +2685,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_timezones_hourly_relative_from.1
@@ -2779,9 +2711,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_timezones_hourly_relative_from_minus_utc
@@ -2812,9 +2744,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_timezones_hourly_relative_from_minus_utc.1
@@ -2838,9 +2770,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_timezones_hourly_relative_from_plus_utc
@@ -2871,9 +2803,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_timezones_hourly_relative_from_plus_utc.1
@@ -2897,9 +2829,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_timezones_weekly
@@ -2923,9 +2855,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_timezones_weekly.1
@@ -2949,9 +2881,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_timezones_weekly_minus_utc
@@ -2975,9 +2907,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_timezones_weekly_minus_utc.1
@@ -3001,9 +2933,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_timezones_weekly_plus_utc
@@ -3027,9 +2959,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_timezones_weekly_plus_utc.1
@@ -3053,45 +2985,42 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trend_breakdown_user_props_with_filter_with_partial_property_pushdowns
'''
- SELECT groupArray(value)
- FROM
- (SELECT e__pdi__person.properties___email AS value,
- count(e.uuid) AS count
- FROM events AS e
- INNER JOIN
- (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id,
- argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
- person_distinct_id2.distinct_id AS distinct_id
- FROM person_distinct_id2
- WHERE equals(person_distinct_id2.team_id, 2)
- GROUP BY person_distinct_id2.distinct_id
- HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
- INNER JOIN
- (SELECT person.id AS id,
- replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', '') AS properties___email,
- replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$os'), ''), 'null'), '^"|"$', '') AS `properties___$os`,
- replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$browser'), ''), 'null'), '^"|"$', '') AS `properties___$browser`
- FROM person
- WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version),
- (SELECT person.id AS id, max(person.version) AS version
- FROM person
- WHERE equals(person.team_id, 2)
- GROUP BY person.id
- HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id)
- WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(or(ifNull(notILike(e__pdi__person.properties___email, '%@posthog.com%'), 1), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0)), or(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'safari'), 0)))))
- GROUP BY value
- ORDER BY count DESC, value DESC
- LIMIT 25)
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ SELECT e__pdi__person.properties___email AS value,
+ count(e.uuid) AS count
+ FROM events AS e
+ INNER JOIN
+ (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id,
+ argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
+ person_distinct_id2.distinct_id AS distinct_id
+ FROM person_distinct_id2
+ WHERE equals(person_distinct_id2.team_id, 2)
+ GROUP BY person_distinct_id2.distinct_id
+ HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
+ INNER JOIN
+ (SELECT person.id AS id,
+ replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', '') AS properties___email,
+ replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$os'), ''), 'null'), '^"|"$', '') AS `properties___$os`,
+ replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$browser'), ''), 'null'), '^"|"$', '') AS `properties___$browser`
+ FROM person
+ WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version),
+ (SELECT person.id AS id, max(person.version) AS version
+ FROM person
+ WHERE equals(person.team_id, 2)
+ GROUP BY person.id
+ HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id)
+ WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(or(ifNull(notILike(e__pdi__person.properties___email, '%@posthog.com%'), 1), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0)), or(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'safari'), 0)))))
+ GROUP BY value
+ ORDER BY count DESC, value DESC
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trend_breakdown_user_props_with_filter_with_partial_property_pushdowns.1
@@ -3114,12 +3043,12 @@
CROSS JOIN
(SELECT breakdown_value
FROM
- (SELECT ['$$_posthog_breakdown_other_$$', 'test2@posthog.com', 'test@gmail.com', 'test5@posthog.com', 'test4@posthog.com', 'test3@posthog.com'] AS breakdown_value) ARRAY
+ (SELECT ['test2@posthog.com', 'test@gmail.com', 'test5@posthog.com', 'test4@posthog.com', 'test3@posthog.com'] AS breakdown_value) ARRAY
JOIN breakdown_value AS breakdown_value) AS sec
ORDER BY sec.breakdown_value ASC, day_start ASC
UNION ALL SELECT count(e.uuid) AS total,
toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start,
- transform(ifNull(e__pdi__person.properties___email, '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'test2@posthog.com', 'test@gmail.com', 'test5@posthog.com', 'test4@posthog.com', 'test3@posthog.com'], ['$$_posthog_breakdown_other_$$', 'test2@posthog.com', 'test@gmail.com', 'test5@posthog.com', 'test4@posthog.com', 'test3@posthog.com'], '$$_posthog_breakdown_other_$$') AS breakdown_value
+ transform(ifNull(e__pdi__person.properties___email, '$$_posthog_breakdown_null_$$'), ['test2@posthog.com', 'test@gmail.com', 'test5@posthog.com', 'test4@posthog.com', 'test3@posthog.com'], ['test2@posthog.com', 'test@gmail.com', 'test5@posthog.com', 'test4@posthog.com', 'test3@posthog.com'], '$$_posthog_breakdown_other_$$') AS breakdown_value
FROM events AS e SAMPLE 1
INNER JOIN
(SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id,
@@ -3141,7 +3070,7 @@
WHERE equals(person.team_id, 2)
GROUP BY person.id
HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id)
- WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(or(ifNull(notILike(e__pdi__person.properties___email, '%@posthog.com%'), 1), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0)), or(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'safari'), 0))), or(ifNull(equals(transform(ifNull(e__pdi__person.properties___email, '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'test2@posthog.com', 'test@gmail.com', 'test5@posthog.com', 'test4@posthog.com', 'test3@posthog.com'], ['$$_posthog_breakdown_other_$$', 'test2@posthog.com', 'test@gmail.com', 'test5@posthog.com', 'test4@posthog.com', 'test3@posthog.com'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(e__pdi__person.properties___email, 'test2@posthog.com'), 0), ifNull(equals(e__pdi__person.properties___email, 'test@gmail.com'), 0), ifNull(equals(e__pdi__person.properties___email, 'test5@posthog.com'), 0), ifNull(equals(e__pdi__person.properties___email, 'test4@posthog.com'), 0), ifNull(equals(e__pdi__person.properties___email, 'test3@posthog.com'), 0)))
+ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(or(ifNull(notILike(e__pdi__person.properties___email, '%@posthog.com%'), 1), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0)), or(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'safari'), 0))), or(ifNull(equals(e__pdi__person.properties___email, 'test2@posthog.com'), 0), ifNull(equals(e__pdi__person.properties___email, 'test@gmail.com'), 0), ifNull(equals(e__pdi__person.properties___email, 'test5@posthog.com'), 0), ifNull(equals(e__pdi__person.properties___email, 'test4@posthog.com'), 0), ifNull(equals(e__pdi__person.properties___email, 'test3@posthog.com'), 0)))
GROUP BY day_start,
breakdown_value)
GROUP BY day_start,
@@ -3149,45 +3078,42 @@
ORDER BY day_start ASC, breakdown_value ASC)
GROUP BY breakdown_value
ORDER BY sum(count) DESC, breakdown_value ASC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trend_breakdown_user_props_with_filter_with_partial_property_pushdowns.2
'''
- SELECT groupArray(value)
- FROM
- (SELECT e__pdi__person.properties___email AS value,
- count(e.uuid) AS count
- FROM events AS e
- INNER JOIN
- (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id,
- argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
- person_distinct_id2.distinct_id AS distinct_id
- FROM person_distinct_id2
- WHERE equals(person_distinct_id2.team_id, 2)
- GROUP BY person_distinct_id2.distinct_id
- HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
- INNER JOIN
- (SELECT person.id AS id,
- replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', '') AS properties___email,
- replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$os'), ''), 'null'), '^"|"$', '') AS `properties___$os`,
- replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$browser'), ''), 'null'), '^"|"$', '') AS `properties___$browser`
- FROM person
- WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version),
- (SELECT person.id AS id, max(person.version) AS version
- FROM person
- WHERE equals(person.team_id, 2)
- GROUP BY person.id
- HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id)
- WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'chrome'), 0)), and(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0), ifNull(ilike(e__pdi__person.properties___email, '%@posthog.com%'), 0))))
- GROUP BY value
- ORDER BY count DESC, value DESC
- LIMIT 25)
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ SELECT e__pdi__person.properties___email AS value,
+ count(e.uuid) AS count
+ FROM events AS e
+ INNER JOIN
+ (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id,
+ argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
+ person_distinct_id2.distinct_id AS distinct_id
+ FROM person_distinct_id2
+ WHERE equals(person_distinct_id2.team_id, 2)
+ GROUP BY person_distinct_id2.distinct_id
+ HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
+ INNER JOIN
+ (SELECT person.id AS id,
+ replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', '') AS properties___email,
+ replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$os'), ''), 'null'), '^"|"$', '') AS `properties___$os`,
+ replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$browser'), ''), 'null'), '^"|"$', '') AS `properties___$browser`
+ FROM person
+ WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version),
+ (SELECT person.id AS id, max(person.version) AS version
+ FROM person
+ WHERE equals(person.team_id, 2)
+ GROUP BY person.id
+ HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id)
+ WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'chrome'), 0)), and(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0), ifNull(ilike(e__pdi__person.properties___email, '%@posthog.com%'), 0))))
+ GROUP BY value
+ ORDER BY count DESC, value DESC
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trend_breakdown_user_props_with_filter_with_partial_property_pushdowns.3
@@ -3210,12 +3136,12 @@
CROSS JOIN
(SELECT breakdown_value
FROM
- (SELECT ['$$_posthog_breakdown_other_$$', 'test2@posthog.com'] AS breakdown_value) ARRAY
+ (SELECT ['test2@posthog.com'] AS breakdown_value) ARRAY
JOIN breakdown_value AS breakdown_value) AS sec
ORDER BY sec.breakdown_value ASC, day_start ASC
UNION ALL SELECT count(e.uuid) AS total,
toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start,
- transform(ifNull(e__pdi__person.properties___email, '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'test2@posthog.com'], ['$$_posthog_breakdown_other_$$', 'test2@posthog.com'], '$$_posthog_breakdown_other_$$') AS breakdown_value
+ transform(ifNull(e__pdi__person.properties___email, '$$_posthog_breakdown_null_$$'), ['test2@posthog.com'], ['test2@posthog.com'], '$$_posthog_breakdown_other_$$') AS breakdown_value
FROM events AS e SAMPLE 1
INNER JOIN
(SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id,
@@ -3237,7 +3163,7 @@
WHERE equals(person.team_id, 2)
GROUP BY person.id
HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id)
- WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'chrome'), 0)), and(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0), ifNull(ilike(e__pdi__person.properties___email, '%@posthog.com%'), 0)), or(ifNull(equals(transform(ifNull(e__pdi__person.properties___email, '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'test2@posthog.com'], ['$$_posthog_breakdown_other_$$', 'test2@posthog.com'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(e__pdi__person.properties___email, 'test2@posthog.com'), 0)))
+ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'chrome'), 0)), and(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0), ifNull(ilike(e__pdi__person.properties___email, '%@posthog.com%'), 0)), ifNull(equals(e__pdi__person.properties___email, 'test2@posthog.com'), 0))
GROUP BY day_start,
breakdown_value)
GROUP BY day_start,
@@ -3245,9 +3171,9 @@
ORDER BY day_start ASC, breakdown_value ASC)
GROUP BY breakdown_value
ORDER BY sum(count) DESC, breakdown_value ASC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_aggregate_by_distinct_id
@@ -3271,9 +3197,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_aggregate_by_distinct_id.1
@@ -3315,43 +3241,40 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_aggregate_by_distinct_id.2
'''
- SELECT groupArray(value)
- FROM
- (SELECT e__pdi__person.`properties___$some_prop` AS value,
- count(e.uuid) AS count
- FROM events AS e
- INNER JOIN
- (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id,
- argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
- person_distinct_id2.distinct_id AS distinct_id
- FROM person_distinct_id2
- WHERE equals(person_distinct_id2.team_id, 2)
- GROUP BY person_distinct_id2.distinct_id
- HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
- INNER JOIN
- (SELECT person.id AS id,
- replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS `properties___$some_prop`
- FROM person
- WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version),
- (SELECT person.id AS id, max(person.version) AS version
- FROM person
- WHERE equals(person.team_id, 2)
- GROUP BY person.id
- HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id)
- WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')))
- GROUP BY value
- ORDER BY count DESC, value DESC
- LIMIT 25)
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ SELECT e__pdi__person.`properties___$some_prop` AS value,
+ count(e.uuid) AS count
+ FROM events AS e
+ INNER JOIN
+ (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id,
+ argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
+ person_distinct_id2.distinct_id AS distinct_id
+ FROM person_distinct_id2
+ WHERE equals(person_distinct_id2.team_id, 2)
+ GROUP BY person_distinct_id2.distinct_id
+ HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
+ INNER JOIN
+ (SELECT person.id AS id,
+ replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS `properties___$some_prop`
+ FROM person
+ WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version),
+ (SELECT person.id AS id, max(person.version) AS version
+ FROM person
+ WHERE equals(person.team_id, 2)
+ GROUP BY person.id
+ HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id)
+ WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')))
+ GROUP BY value
+ ORDER BY count DESC, value DESC
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_aggregate_by_distinct_id.3
@@ -3374,12 +3297,12 @@
CROSS JOIN
(SELECT breakdown_value
FROM
- (SELECT ['$$_posthog_breakdown_other_$$', 'some_val'] AS breakdown_value) ARRAY
+ (SELECT ['some_val', '$$_posthog_breakdown_null_$$'] AS breakdown_value) ARRAY
JOIN breakdown_value AS breakdown_value) AS sec
ORDER BY sec.breakdown_value ASC, day_start ASC
UNION ALL SELECT count(DISTINCT e.distinct_id) AS total,
toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start,
- transform(ifNull(e__pdi__person.`properties___$some_prop`, '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'some_val'], ['$$_posthog_breakdown_other_$$', 'some_val'], '$$_posthog_breakdown_other_$$') AS breakdown_value
+ transform(ifNull(e__pdi__person.`properties___$some_prop`, '$$_posthog_breakdown_null_$$'), ['some_val', '$$_posthog_breakdown_null_$$'], ['some_val', '$$_posthog_breakdown_null_$$'], '$$_posthog_breakdown_other_$$') AS breakdown_value
FROM events AS e SAMPLE 1
INNER JOIN
(SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id,
@@ -3399,7 +3322,7 @@
WHERE equals(person.team_id, 2)
GROUP BY person.id
HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id)
- WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(transform(ifNull(e__pdi__person.`properties___$some_prop`, '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'some_val'], ['$$_posthog_breakdown_other_$$', 'some_val'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(e__pdi__person.`properties___$some_prop`, 'some_val'), 0)))
+ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(e__pdi__person.`properties___$some_prop`, 'some_val'), 0), isNull(e__pdi__person.`properties___$some_prop`)))
GROUP BY day_start,
breakdown_value)
GROUP BY day_start,
@@ -3407,9 +3330,9 @@
ORDER BY day_start ASC, breakdown_value ASC)
GROUP BY breakdown_value
ORDER BY sum(count) DESC, breakdown_value ASC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_aggregate_by_distinct_id.4
@@ -3446,9 +3369,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_aggregate_by_distinct_id.5
@@ -3485,25 +3408,22 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_aggregate_by_distinct_id.6
'''
- SELECT groupArray(value)
- FROM
- (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS value,
- count(e.uuid) AS count
- FROM events AS e
- WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')))
- GROUP BY value
- ORDER BY count DESC, value DESC
- LIMIT 25)
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS value,
+ count(e.uuid) AS count
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')))
+ GROUP BY value
+ ORDER BY count DESC, value DESC
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_aggregate_by_distinct_id.7
@@ -3526,12 +3446,12 @@
CROSS JOIN
(SELECT breakdown_value
FROM
- (SELECT [NULL] AS breakdown_value) ARRAY
+ (SELECT ['$$_posthog_breakdown_null_$$'] AS breakdown_value) ARRAY
JOIN breakdown_value AS breakdown_value) AS sec
ORDER BY sec.breakdown_value ASC, day_start ASC
UNION ALL SELECT count(DISTINCT e.distinct_id) AS total,
toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start,
- replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS breakdown_value
+ transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_prop'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_null_$$'], ['$$_posthog_breakdown_null_$$'], '$$_posthog_breakdown_other_$$') AS breakdown_value
FROM events AS e SAMPLE 1
WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_prop'), ''), 'null'), '^"|"$', '')))
GROUP BY day_start,
@@ -3541,9 +3461,9 @@
ORDER BY day_start ASC, breakdown_value ASC)
GROUP BY breakdown_value
ORDER BY sum(count) DESC, breakdown_value ASC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_any_event_total_count
@@ -3567,9 +3487,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_any_event_total_count.1
@@ -3593,25 +3513,22 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_breakdown_cumulative
'''
- SELECT groupArray(value)
- FROM
- (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value,
- count(e.uuid) AS count
- FROM events AS e
- WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')))
- GROUP BY value
- ORDER BY count DESC, value DESC
- LIMIT 25)
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value,
+ count(e.uuid) AS count
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')))
+ GROUP BY value
+ ORDER BY count DESC, value DESC
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_breakdown_cumulative.1
@@ -3639,12 +3556,12 @@
CROSS JOIN
(SELECT breakdown_value
FROM
- (SELECT ['$$_posthog_breakdown_other_$$', 'value', 'other_value'] AS breakdown_value) ARRAY
+ (SELECT ['$$_posthog_breakdown_null_$$', 'value', 'other_value'] AS breakdown_value) ARRAY
JOIN breakdown_value AS breakdown_value) AS sec
ORDER BY sec.breakdown_value ASC, day_start ASC
UNION ALL SELECT count(DISTINCT e__pdi.person_id) AS total,
min(toStartOfDay(toTimeZone(e.timestamp, 'UTC'))) AS day_start,
- transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'value', 'other_value'], ['$$_posthog_breakdown_other_$$', 'value', 'other_value'], '$$_posthog_breakdown_other_$$') AS breakdown_value
+ transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_null_$$', 'value', 'other_value'], ['$$_posthog_breakdown_null_$$', 'value', 'other_value'], '$$_posthog_breakdown_other_$$') AS breakdown_value
FROM events AS e SAMPLE 1
INNER JOIN
(SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
@@ -3653,7 +3570,7 @@
WHERE equals(person_distinct_id2.team_id, 2)
GROUP BY person_distinct_id2.distinct_id
HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
- WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'value', 'other_value'], ['$$_posthog_breakdown_other_$$', 'value', 'other_value'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0)))
+ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0)))
GROUP BY e__pdi.person_id,
breakdown_value)
GROUP BY day_start,
@@ -3661,25 +3578,22 @@
ORDER BY day_start ASC, breakdown_value ASC))
GROUP BY breakdown_value
ORDER BY sum(count) DESC, breakdown_value ASC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_breakdown_cumulative_poe_v2
'''
- SELECT groupArray(value)
- FROM
- (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value,
- count(e.uuid) AS count
- FROM events AS e
- WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')))
- GROUP BY value
- ORDER BY count DESC, value DESC
- LIMIT 25)
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value,
+ count(e.uuid) AS count
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')))
+ GROUP BY value
+ ORDER BY count DESC, value DESC
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_breakdown_cumulative_poe_v2.1
@@ -3707,12 +3621,12 @@
CROSS JOIN
(SELECT breakdown_value
FROM
- (SELECT ['$$_posthog_breakdown_other_$$', 'value', 'other_value'] AS breakdown_value) ARRAY
+ (SELECT ['$$_posthog_breakdown_null_$$', 'value', 'other_value'] AS breakdown_value) ARRAY
JOIN breakdown_value AS breakdown_value) AS sec
ORDER BY sec.breakdown_value ASC, day_start ASC
UNION ALL SELECT count(DISTINCT ifNull(nullIf(e__override.override_person_id, '00000000-0000-0000-0000-000000000000'), e.person_id)) AS total,
min(toStartOfDay(toTimeZone(e.timestamp, 'UTC'))) AS day_start,
- transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'value', 'other_value'], ['$$_posthog_breakdown_other_$$', 'value', 'other_value'], '$$_posthog_breakdown_other_$$') AS breakdown_value
+ transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_null_$$', 'value', 'other_value'], ['$$_posthog_breakdown_null_$$', 'value', 'other_value'], '$$_posthog_breakdown_other_$$') AS breakdown_value
FROM events AS e SAMPLE 1
LEFT OUTER JOIN
(SELECT argMax(person_overrides.override_person_id, person_overrides.version) AS override_person_id,
@@ -3720,7 +3634,7 @@
FROM person_overrides
WHERE equals(person_overrides.team_id, 2)
GROUP BY person_overrides.old_person_id) AS e__override ON equals(e.person_id, e__override.old_person_id)
- WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'value', 'other_value'], ['$$_posthog_breakdown_other_$$', 'value', 'other_value'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0)))
+ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '')), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0)))
GROUP BY ifNull(nullIf(e__override.override_person_id, '00000000-0000-0000-0000-000000000000'), e.person_id),
breakdown_value)
GROUP BY day_start,
@@ -3728,31 +3642,28 @@
ORDER BY day_start ASC, breakdown_value ASC))
GROUP BY breakdown_value
ORDER BY sum(count) DESC, breakdown_value ASC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_breakdown_with_session_property_single_aggregate_math_and_breakdown
'''
- SELECT groupArray(value)
- FROM
- (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value,
- max(e__session.duration) AS count
- FROM events AS e
- INNER JOIN
- (SELECT events.`$session_id` AS id,
- dateDiff('second', min(events.timestamp), max(events.timestamp)) AS duration
- FROM events
- WHERE and(equals(events.team_id, 2), ifNull(notEquals(id, ''), 1))
- GROUP BY id) AS e__session ON equals(e.`$session_id`, e__session.id)
- WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')))
- GROUP BY value
- ORDER BY count DESC, value DESC
- LIMIT 25)
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value,
+ max(e__session.duration) AS count
+ FROM events AS e
+ INNER JOIN
+ (SELECT events.`$session_id` AS id,
+ dateDiff('second', min(events.timestamp), max(events.timestamp)) AS duration
+ FROM events
+ WHERE and(equals(events.team_id, 2), ifNull(notEquals(id, ''), 1))
+ GROUP BY id) AS e__session ON equals(e.`$session_id`, e__session.id)
+ WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')))
+ GROUP BY value
+ ORDER BY count DESC, value DESC
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_breakdown_with_session_property_single_aggregate_math_and_breakdown.1
@@ -3761,7 +3672,7 @@
breakdown_value AS breakdown_value
FROM
(SELECT any(e__session.duration) AS session_duration,
- transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'value2', 'value1'], ['$$_posthog_breakdown_other_$$', 'value2', 'value1'], '$$_posthog_breakdown_other_$$') AS breakdown_value
+ transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['value2', 'value1', '$$_posthog_breakdown_null_$$'], ['value2', 'value1', '$$_posthog_breakdown_null_$$'], '$$_posthog_breakdown_other_$$') AS breakdown_value
FROM events AS e SAMPLE 1
INNER JOIN
(SELECT events.`$session_id` AS id,
@@ -3769,35 +3680,32 @@
FROM events
WHERE and(equals(events.team_id, 2), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), ifNull(notEquals(id, ''), 1))
GROUP BY id) AS e__session ON equals(e.`$session_id`, e__session.id)
- WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'value2', 'value1'], ['$$_posthog_breakdown_other_$$', 'value2', 'value1'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value2'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value1'), 0)))
+ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value2'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value1'), 0), isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''))))
GROUP BY e__session.id,
breakdown_value)
GROUP BY breakdown_value
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_breakdown_with_session_property_single_aggregate_math_and_breakdown.2
'''
- SELECT groupArray(value)
- FROM
- (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value,
- max(e__session.duration) AS count
- FROM events AS e
- INNER JOIN
- (SELECT events.`$session_id` AS id,
- dateDiff('second', min(events.timestamp), max(events.timestamp)) AS duration
- FROM events
- WHERE and(equals(events.team_id, 2), ifNull(notEquals(id, ''), 1))
- GROUP BY id) AS e__session ON equals(e.`$session_id`, e__session.id)
- WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')))
- GROUP BY value
- ORDER BY count DESC, value DESC
- LIMIT 25)
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value,
+ max(e__session.duration) AS count
+ FROM events AS e
+ INNER JOIN
+ (SELECT events.`$session_id` AS id,
+ dateDiff('second', min(events.timestamp), max(events.timestamp)) AS duration
+ FROM events
+ WHERE and(equals(events.team_id, 2), ifNull(notEquals(id, ''), 1))
+ GROUP BY id) AS e__session ON equals(e.`$session_id`, e__session.id)
+ WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')))
+ GROUP BY value
+ ORDER BY count DESC, value DESC
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_breakdown_with_session_property_single_aggregate_math_and_breakdown.3
@@ -3806,7 +3714,7 @@
breakdown_value AS breakdown_value
FROM
(SELECT any(e__session.duration) AS session_duration,
- transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'value2', 'value1'], ['$$_posthog_breakdown_other_$$', 'value2', 'value1'], '$$_posthog_breakdown_other_$$') AS breakdown_value
+ transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['value2', 'value1', '$$_posthog_breakdown_null_$$'], ['value2', 'value1', '$$_posthog_breakdown_null_$$'], '$$_posthog_breakdown_other_$$') AS breakdown_value
FROM events AS e SAMPLE 1
INNER JOIN
(SELECT events.`$session_id` AS id,
@@ -3814,13 +3722,13 @@
FROM events
WHERE and(equals(events.team_id, 2), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), ifNull(notEquals(id, ''), 1))
GROUP BY id) AS e__session ON equals(e.`$session_id`, e__session.id)
- WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'value2', 'value1'], ['$$_posthog_breakdown_other_$$', 'value2', 'value1'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value2'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value1'), 0)))
+ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value2'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value1'), 0), isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''))))
GROUP BY e__session.id,
breakdown_value)
GROUP BY breakdown_value
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_compare_day_interval_relative_range
@@ -3844,9 +3752,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_compare_day_interval_relative_range.1
@@ -3870,9 +3778,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_compare_day_interval_relative_range.2
@@ -3896,9 +3804,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_count_per_user_average_aggregated
@@ -3918,9 +3826,9 @@
HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
WHERE and(equals(e.team_id, 2), equals(e.event, 'viewed video'), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(0))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))))
GROUP BY e__pdi.person_id))
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_count_per_user_average_aggregated_poe_v2
@@ -3939,25 +3847,22 @@
GROUP BY person_overrides.old_person_id) AS e__override ON equals(e.person_id, e__override.old_person_id)
WHERE and(equals(e.team_id, 2), equals(e.event, 'viewed video'), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(0))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))))
GROUP BY ifNull(nullIf(e__override.override_person_id, '00000000-0000-0000-0000-000000000000'), e.person_id)))
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_count_per_user_average_aggregated_with_event_property_breakdown_with_sampling
'''
- SELECT groupArray(value)
- FROM
- (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'color'), ''), 'null'), '^"|"$', '') AS value,
- count(e.uuid) AS count
- FROM events AS e
- WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC')))), equals(e.event, 'viewed video'))
- GROUP BY value
- ORDER BY count DESC, value DESC
- LIMIT 25)
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'color'), ''), 'null'), '^"|"$', '') AS value,
+ count(e.uuid) AS count
+ FROM events AS e
+ WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC')))), equals(e.event, 'viewed video'))
+ GROUP BY value
+ ORDER BY count DESC, value DESC
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_count_per_user_average_aggregated_with_event_property_breakdown_with_sampling.1
@@ -3969,7 +3874,7 @@
breakdown_value AS breakdown_value
FROM
(SELECT count(e.uuid) AS total,
- transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'color'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'red', 'blue'], ['$$_posthog_breakdown_other_$$', 'red', 'blue'], '$$_posthog_breakdown_other_$$') AS breakdown_value
+ transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'color'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['red', 'blue', '$$_posthog_breakdown_null_$$'], ['red', 'blue', '$$_posthog_breakdown_null_$$'], '$$_posthog_breakdown_other_$$') AS breakdown_value
FROM events AS e SAMPLE 1.0
INNER JOIN
(SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
@@ -3978,13 +3883,13 @@
WHERE equals(person_distinct_id2.team_id, 2)
GROUP BY person_distinct_id2.distinct_id
HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
- WHERE and(equals(e.team_id, 2), and(equals(e.event, 'viewed video'), or(ifNull(equals(transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'color'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'red', 'blue'], ['$$_posthog_breakdown_other_$$', 'red', 'blue'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'color'), ''), 'null'), '^"|"$', ''), 'red'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'color'), ''), 'null'), '^"|"$', ''), 'blue'), 0))), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(0))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))))
+ WHERE and(equals(e.team_id, 2), and(equals(e.event, 'viewed video'), or(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'color'), ''), 'null'), '^"|"$', ''), 'red'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'color'), ''), 'null'), '^"|"$', ''), 'blue'), 0), isNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'color'), ''), 'null'), '^"|"$', '')))), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(0))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))))
GROUP BY e__pdi.person_id,
breakdown_value)
GROUP BY breakdown_value)
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_count_per_user_average_daily
@@ -4023,9 +3928,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_count_per_user_average_daily_poe_v2
@@ -4063,9 +3968,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_groups_per_day
@@ -4089,9 +3994,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_groups_per_day_cumulative
@@ -4119,9 +4024,9 @@
GROUP BY day_start
ORDER BY day_start ASC))
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_per_day_cumulative
@@ -4149,9 +4054,9 @@
GROUP BY day_start
ORDER BY day_start ASC))
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_per_day_dau_cumulative
@@ -4186,49 +4091,46 @@
GROUP BY day_start
ORDER BY day_start ASC))
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_person_breakdown_with_session_property_single_aggregate_math_and_breakdown
'''
- SELECT groupArray(value)
- FROM
- (SELECT e__pdi__person.`properties___$some_prop` AS value,
- max(e__session.duration) AS count
- FROM events AS e
- INNER JOIN
- (SELECT events.`$session_id` AS id,
- dateDiff('second', min(events.timestamp), max(events.timestamp)) AS duration
- FROM events
- WHERE and(equals(events.team_id, 2), ifNull(notEquals(id, ''), 1))
- GROUP BY id) AS e__session ON equals(e.`$session_id`, e__session.id)
- INNER JOIN
- (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id,
- argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
- person_distinct_id2.distinct_id AS distinct_id
- FROM person_distinct_id2
- WHERE equals(person_distinct_id2.team_id, 2)
- GROUP BY person_distinct_id2.distinct_id
- HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
- INNER JOIN
- (SELECT person.id AS id,
- replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS `properties___$some_prop`
- FROM person
- WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version),
- (SELECT person.id AS id, max(person.version) AS version
- FROM person
- WHERE equals(person.team_id, 2)
- GROUP BY person.id
- HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id)
- WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')))
- GROUP BY value
- ORDER BY count DESC, value DESC
- LIMIT 25)
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ SELECT e__pdi__person.`properties___$some_prop` AS value,
+ max(e__session.duration) AS count
+ FROM events AS e
+ INNER JOIN
+ (SELECT events.`$session_id` AS id,
+ dateDiff('second', min(events.timestamp), max(events.timestamp)) AS duration
+ FROM events
+ WHERE and(equals(events.team_id, 2), ifNull(notEquals(id, ''), 1))
+ GROUP BY id) AS e__session ON equals(e.`$session_id`, e__session.id)
+ INNER JOIN
+ (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id,
+ argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id,
+ person_distinct_id2.distinct_id AS distinct_id
+ FROM person_distinct_id2
+ WHERE equals(person_distinct_id2.team_id, 2)
+ GROUP BY person_distinct_id2.distinct_id
+ HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id)
+ INNER JOIN
+ (SELECT person.id AS id,
+ replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, '$some_prop'), ''), 'null'), '^"|"$', '') AS `properties___$some_prop`
+ FROM person
+ WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version),
+ (SELECT person.id AS id, max(person.version) AS version
+ FROM person
+ WHERE equals(person.team_id, 2)
+ GROUP BY person.id
+ HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id)
+ WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')))
+ GROUP BY value
+ ORDER BY count DESC, value DESC
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_person_breakdown_with_session_property_single_aggregate_math_and_breakdown.1
@@ -4237,7 +4139,7 @@
breakdown_value AS breakdown_value
FROM
(SELECT any(e__session.duration) AS session_duration,
- transform(ifNull(e__pdi__person.`properties___$some_prop`, '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'some_val', 'another_val'], ['$$_posthog_breakdown_other_$$', 'some_val', 'another_val'], '$$_posthog_breakdown_other_$$') AS breakdown_value
+ transform(ifNull(e__pdi__person.`properties___$some_prop`, '$$_posthog_breakdown_null_$$'), ['some_val', 'another_val'], ['some_val', 'another_val'], '$$_posthog_breakdown_other_$$') AS breakdown_value
FROM events AS e SAMPLE 1
INNER JOIN
(SELECT events.`$session_id` AS id,
@@ -4263,13 +4165,13 @@
WHERE equals(person.team_id, 2)
GROUP BY person.id
HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id)
- WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(transform(ifNull(e__pdi__person.`properties___$some_prop`, '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'some_val', 'another_val'], ['$$_posthog_breakdown_other_$$', 'some_val', 'another_val'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(e__pdi__person.`properties___$some_prop`, 'some_val'), 0), ifNull(equals(e__pdi__person.`properties___$some_prop`, 'another_val'), 0)))
+ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(e__pdi__person.`properties___$some_prop`, 'some_val'), 0), ifNull(equals(e__pdi__person.`properties___$some_prop`, 'another_val'), 0)))
GROUP BY e__session.id,
breakdown_value)
GROUP BY breakdown_value
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_with_hogql_math
@@ -4293,9 +4195,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_with_session_property_single_aggregate_math
@@ -4312,9 +4214,9 @@
GROUP BY id) AS e__session ON equals(e.`$session_id`, e__session.id)
WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'))
GROUP BY e__session.id)
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_with_session_property_single_aggregate_math.1
@@ -4331,9 +4233,9 @@
GROUP BY id) AS e__session ON equals(e.`$session_id`, e__session.id)
WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'))
GROUP BY e__session.id)
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_with_session_property_total_volume_math
@@ -4369,9 +4271,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_with_session_property_total_volume_math.1
@@ -4407,31 +4309,28 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_with_session_property_total_volume_math_with_breakdowns
'''
- SELECT groupArray(value)
- FROM
- (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value,
- max(e__session.duration) AS count
- FROM events AS e
- INNER JOIN
- (SELECT events.`$session_id` AS id,
- dateDiff('second', min(events.timestamp), max(events.timestamp)) AS duration
- FROM events
- WHERE and(equals(events.team_id, 2), ifNull(notEquals(id, ''), 1))
- GROUP BY id) AS e__session ON equals(e.`$session_id`, e__session.id)
- WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')))
- GROUP BY value
- ORDER BY count DESC, value DESC
- LIMIT 25)
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value,
+ max(e__session.duration) AS count
+ FROM events AS e
+ INNER JOIN
+ (SELECT events.`$session_id` AS id,
+ dateDiff('second', min(events.timestamp), max(events.timestamp)) AS duration
+ FROM events
+ WHERE and(equals(events.team_id, 2), ifNull(notEquals(id, ''), 1))
+ GROUP BY id) AS e__session ON equals(e.`$session_id`, e__session.id)
+ WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')))
+ GROUP BY value
+ ORDER BY count DESC, value DESC
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_with_session_property_total_volume_math_with_breakdowns.1
@@ -4454,7 +4353,7 @@
CROSS JOIN
(SELECT breakdown_value
FROM
- (SELECT ['$$_posthog_breakdown_other_$$', 'value2', 'value1'] AS breakdown_value) ARRAY
+ (SELECT ['value2', 'value1'] AS breakdown_value) ARRAY
JOIN breakdown_value AS breakdown_value) AS sec
ORDER BY sec.breakdown_value ASC, day_start ASC
UNION ALL SELECT quantile(0.5)(session_duration) AS total,
@@ -4462,7 +4361,7 @@
breakdown_value AS breakdown_value
FROM
(SELECT any(e__session.duration) AS session_duration,
- transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'value2', 'value1'], ['$$_posthog_breakdown_other_$$', 'value2', 'value1'], '$$_posthog_breakdown_other_$$') AS breakdown_value,
+ transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['value2', 'value1'], ['value2', 'value1'], '$$_posthog_breakdown_other_$$') AS breakdown_value,
toStartOfWeek(toTimeZone(e.timestamp, 'UTC'), 0) AS day_start
FROM events AS e SAMPLE 1
INNER JOIN
@@ -4471,7 +4370,7 @@
FROM events
WHERE and(equals(events.team_id, 2), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), ifNull(notEquals(id, ''), 1))
GROUP BY id) AS e__session ON equals(e.`$session_id`, e__session.id)
- WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'value2', 'value1'], ['$$_posthog_breakdown_other_$$', 'value2', 'value1'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value2'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value1'), 0)))
+ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value2'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value1'), 0)))
GROUP BY day_start,
e__session.id,
breakdown_value,
@@ -4483,31 +4382,28 @@
ORDER BY day_start ASC, breakdown_value ASC)
GROUP BY breakdown_value
ORDER BY sum(count) DESC, breakdown_value ASC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_with_session_property_total_volume_math_with_breakdowns.2
'''
- SELECT groupArray(value)
- FROM
- (SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value,
- max(e__session.duration) AS count
- FROM events AS e
- INNER JOIN
- (SELECT events.`$session_id` AS id,
- dateDiff('second', min(events.timestamp), max(events.timestamp)) AS duration
- FROM events
- WHERE and(equals(events.team_id, 2), ifNull(notEquals(id, ''), 1))
- GROUP BY id) AS e__session ON equals(e.`$session_id`, e__session.id)
- WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')))
- GROUP BY value
- ORDER BY count DESC, value DESC
- LIMIT 25)
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', '') AS value,
+ max(e__session.duration) AS count
+ FROM events AS e
+ INNER JOIN
+ (SELECT events.`$session_id` AS id,
+ dateDiff('second', min(events.timestamp), max(events.timestamp)) AS duration
+ FROM events
+ WHERE and(equals(events.team_id, 2), ifNull(notEquals(id, ''), 1))
+ GROUP BY id) AS e__session ON equals(e.`$session_id`, e__session.id)
+ WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up')))
+ GROUP BY value
+ ORDER BY count DESC, value DESC
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_trends_with_session_property_total_volume_math_with_breakdowns.3
@@ -4530,7 +4426,7 @@
CROSS JOIN
(SELECT breakdown_value
FROM
- (SELECT ['$$_posthog_breakdown_other_$$', 'value2', 'value1'] AS breakdown_value) ARRAY
+ (SELECT ['value2', 'value1'] AS breakdown_value) ARRAY
JOIN breakdown_value AS breakdown_value) AS sec
ORDER BY sec.breakdown_value ASC, day_start ASC
UNION ALL SELECT quantile(0.5)(session_duration) AS total,
@@ -4538,7 +4434,7 @@
breakdown_value AS breakdown_value
FROM
(SELECT any(e__session.duration) AS session_duration,
- transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'value2', 'value1'], ['$$_posthog_breakdown_other_$$', 'value2', 'value1'], '$$_posthog_breakdown_other_$$') AS breakdown_value,
+ transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['value2', 'value1'], ['value2', 'value1'], '$$_posthog_breakdown_other_$$') AS breakdown_value,
toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start
FROM events AS e SAMPLE 1
INNER JOIN
@@ -4547,7 +4443,7 @@
FROM events
WHERE and(equals(events.team_id, 2), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), ifNull(notEquals(id, ''), 1))
GROUP BY id) AS e__session ON equals(e.`$session_id`, e__session.id)
- WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'value2', 'value1'], ['$$_posthog_breakdown_other_$$', 'value2', 'value1'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value2'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value1'), 0)))
+ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value2'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value1'), 0)))
GROUP BY day_start,
e__session.id,
breakdown_value,
@@ -4559,9 +4455,9 @@
ORDER BY day_start ASC, breakdown_value ASC)
GROUP BY breakdown_value
ORDER BY sum(count) DESC, breakdown_value ASC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_weekly_active_users_aggregated_range_narrower_than_week
@@ -4591,9 +4487,9 @@
e.actor_id
ORDER BY d.timestamp ASC)
WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-11 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0))
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_weekly_active_users_aggregated_range_wider_than_week
@@ -4623,9 +4519,9 @@
e.actor_id
ORDER BY d.timestamp ASC)
WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 23:59:59', 6, 'UTC'))), 0))
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_weekly_active_users_aggregated_range_wider_than_week_with_sampling
@@ -4655,9 +4551,9 @@
e.actor_id
ORDER BY d.timestamp ASC)
WHERE and(ifNull(greaterOrEquals(timestamp, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-08 23:59:59', 6, 'UTC'))), 0))
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_weekly_active_users_daily
@@ -4701,9 +4597,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_weekly_active_users_daily_minus_utc
@@ -4747,9 +4643,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_weekly_active_users_daily_plus_utc
@@ -4793,9 +4689,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_weekly_active_users_filtering
@@ -4850,9 +4746,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_weekly_active_users_filtering_materialized
@@ -4907,9 +4803,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_weekly_active_users_hourly
@@ -4953,9 +4849,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_weekly_active_users_weekly
@@ -4999,9 +4895,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_weekly_active_users_weekly_minus_utc
@@ -5045,9 +4941,9 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
# name: TestTrends.test_weekly_active_users_weekly_plus_utc
@@ -5091,8 +4987,8 @@
GROUP BY day_start
ORDER BY day_start ASC)
ORDER BY sum(count) DESC
- LIMIT 100 SETTINGS readonly=2,
- max_execution_time=60,
- allow_experimental_object_type=1
+ LIMIT 10000 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
'''
# ---
diff --git a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends_data_warehouse_query.ambr b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends_data_warehouse_query.ambr
new file mode 100644
index 0000000000000..db9e8e1d45000
--- /dev/null
+++ b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends_data_warehouse_query.ambr
@@ -0,0 +1,185 @@
+# serializer version: 1
+# name: TestTrendsDataWarehouseQuery.test_trends_breakdown
+ '''
+ SELECT e.prop_1 AS value,
+ count(e.id) AS count
+ FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', 'id String, prop_1 String, prop_2 String, created DateTime64(3, \'UTC\')') AS e
+ WHERE and(and(ifNull(greaterOrEquals(toTimeZone(e.created, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toTimeZone(e.created, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0)), and(ifNull(greaterOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0)))
+ GROUP BY value
+ ORDER BY count DESC, value DESC
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestTrendsDataWarehouseQuery.test_trends_breakdown.1
+ '''
+ SELECT groupArray(day_start) AS date,
+ groupArray(count) AS total,
+ ifNull(toString(breakdown_value), '') AS breakdown_value
+ FROM
+ (SELECT sum(total) AS count,
+ day_start AS day_start,
+ breakdown_value AS breakdown_value
+ FROM
+ (SELECT 0 AS total,
+ ticks.day_start AS day_start,
+ sec.breakdown_value AS breakdown_value
+ FROM
+ (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start
+ FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0)) AS numbers
+ UNION ALL SELECT toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC'))) AS day_start) AS ticks
+ CROSS JOIN
+ (SELECT breakdown_value
+ FROM
+ (SELECT ['d', 'c', 'b', 'a'] AS breakdown_value) ARRAY
+ JOIN breakdown_value AS breakdown_value) AS sec
+ ORDER BY sec.breakdown_value ASC, day_start ASC
+ UNION ALL SELECT count(e.id) AS total,
+ toStartOfDay(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC')) AS day_start,
+ transform(ifNull(e.prop_1, '$$_posthog_breakdown_null_$$'), ['d', 'c', 'b', 'a'], ['d', 'c', 'b', 'a'], '$$_posthog_breakdown_other_$$') AS breakdown_value
+ FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', 'id String, prop_1 String, prop_2 String, created DateTime64(3, \'UTC\')') AS e
+ WHERE and(ifNull(greaterOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0), or(equals(e.prop_1, 'd'), equals(e.prop_1, 'c'), equals(e.prop_1, 'b'), equals(e.prop_1, 'a')))
+ GROUP BY day_start,
+ breakdown_value)
+ GROUP BY day_start,
+ breakdown_value
+ ORDER BY day_start ASC, breakdown_value ASC)
+ GROUP BY breakdown_value
+ ORDER BY sum(count) DESC, breakdown_value ASC
+ LIMIT 100 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestTrendsDataWarehouseQuery.test_trends_breakdown_with_property
+ '''
+ SELECT e.prop_1 AS value,
+ count(e.id) AS count
+ FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', 'id String, prop_1 String, prop_2 String, created DateTime64(3, \'UTC\')') AS e
+ WHERE and(and(ifNull(greaterOrEquals(toTimeZone(e.created, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toTimeZone(e.created, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0)), and(ifNull(greaterOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0), equals(e.prop_1, 'a')))
+ GROUP BY value
+ ORDER BY count DESC, value DESC
+ LIMIT 26 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestTrendsDataWarehouseQuery.test_trends_breakdown_with_property.1
+ '''
+ SELECT groupArray(day_start) AS date,
+ groupArray(count) AS total,
+ ifNull(toString(breakdown_value), '') AS breakdown_value
+ FROM
+ (SELECT sum(total) AS count,
+ day_start AS day_start,
+ breakdown_value AS breakdown_value
+ FROM
+ (SELECT 0 AS total,
+ ticks.day_start AS day_start,
+ sec.breakdown_value AS breakdown_value
+ FROM
+ (SELECT minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start
+ FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0)) AS numbers
+ UNION ALL SELECT toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC'))) AS day_start) AS ticks
+ CROSS JOIN
+ (SELECT breakdown_value
+ FROM
+ (SELECT ['a'] AS breakdown_value) ARRAY
+ JOIN breakdown_value AS breakdown_value) AS sec
+ ORDER BY sec.breakdown_value ASC, day_start ASC
+ UNION ALL SELECT count(e.id) AS total,
+ toStartOfDay(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC')) AS day_start,
+ transform(ifNull(e.prop_1, '$$_posthog_breakdown_null_$$'), ['a'], ['a'], '$$_posthog_breakdown_other_$$') AS breakdown_value
+ FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', 'id String, prop_1 String, prop_2 String, created DateTime64(3, \'UTC\')') AS e
+ WHERE and(ifNull(greaterOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0), equals(e.prop_1, 'a'), equals(e.prop_1, 'a'))
+ GROUP BY day_start,
+ breakdown_value)
+ GROUP BY day_start,
+ breakdown_value
+ ORDER BY day_start ASC, breakdown_value ASC)
+ GROUP BY breakdown_value
+ ORDER BY sum(count) DESC, breakdown_value ASC
+ LIMIT 100 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestTrendsDataWarehouseQuery.test_trends_data_warehouse
+ '''
+ SELECT groupArray(day_start) AS date,
+ groupArray(count) AS total
+ FROM
+ (SELECT sum(total) AS count,
+ day_start AS day_start
+ FROM
+ (SELECT 0 AS total,
+ minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start
+ FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0)) AS numbers
+ UNION ALL SELECT 0 AS total,
+ toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC'))) AS day_start
+ UNION ALL SELECT count(e.id) AS total,
+ toStartOfDay(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC')) AS day_start
+ FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', 'id String, prop_1 String, prop_2 String, created DateTime64(3, \'UTC\')') AS e
+ WHERE and(ifNull(greaterOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0))
+ GROUP BY day_start)
+ GROUP BY day_start
+ ORDER BY day_start ASC)
+ ORDER BY sum(count) DESC
+ LIMIT 100 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestTrendsDataWarehouseQuery.test_trends_entity_property
+ '''
+ SELECT groupArray(day_start) AS date,
+ groupArray(count) AS total
+ FROM
+ (SELECT sum(total) AS count,
+ day_start AS day_start
+ FROM
+ (SELECT 0 AS total,
+ minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start
+ FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0)) AS numbers
+ UNION ALL SELECT 0 AS total,
+ toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC'))) AS day_start
+ UNION ALL SELECT count(e.id) AS total,
+ toStartOfDay(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC')) AS day_start
+ FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', 'id String, prop_1 String, prop_2 String, created DateTime64(3, \'UTC\')') AS e
+ WHERE and(ifNull(greaterOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0), equals(e.prop_1, 'a'))
+ GROUP BY day_start)
+ GROUP BY day_start
+ ORDER BY day_start ASC)
+ ORDER BY sum(count) DESC
+ LIMIT 100 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
+# name: TestTrendsDataWarehouseQuery.test_trends_property
+ '''
+ SELECT groupArray(day_start) AS date,
+ groupArray(count) AS total
+ FROM
+ (SELECT sum(total) AS count,
+ day_start AS day_start
+ FROM
+ (SELECT 0 AS total,
+ minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), toIntervalDay(numbers.number)) AS day_start
+ FROM numbers(coalesce(dateDiff('day', assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0)) AS numbers
+ UNION ALL SELECT 0 AS total,
+ toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC'))) AS day_start
+ UNION ALL SELECT count(e.id) AS total,
+ toStartOfDay(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC')) AS day_start
+ FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', 'id String, prop_1 String, prop_2 String, created DateTime64(3, \'UTC\')') AS e
+ WHERE and(ifNull(greaterOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0), equals(e.prop_1, 'a'))
+ GROUP BY day_start)
+ GROUP BY day_start
+ ORDER BY day_start ASC)
+ ORDER BY sum(count) DESC
+ LIMIT 100 SETTINGS readonly=2,
+ max_execution_time=60,
+ allow_experimental_object_type=1
+ '''
+# ---
diff --git a/posthog/hogql_queries/insights/trends/test/test_trends.py b/posthog/hogql_queries/insights/trends/test/test_trends.py
index daab1a808ce81..1ac54e16de629 100644
--- a/posthog/hogql_queries/insights/trends/test/test_trends.py
+++ b/posthog/hogql_queries/insights/trends/test/test_trends.py
@@ -487,7 +487,7 @@ def test_trends_per_day(self):
self.assertEqual(response[0]["data"][5], 1.0)
# just make sure this doesn't error
- def test_no_props(self):
+ def test_no_props_string(self):
PropertyDefinition.objects.create(
team=self.team,
name="$some_property",
@@ -516,6 +516,64 @@ def test_no_props(self):
self.team,
)
+ def test_no_props_numeric(self):
+ PropertyDefinition.objects.create(
+ team=self.team,
+ name="$some_property",
+ property_type="Numeric",
+ type=PropertyDefinition.Type.EVENT,
+ )
+
+ with freeze_time("2020-01-04T13:01:01Z"):
+ self._run(
+ Filter(
+ team=self.team,
+ data={
+ "date_from": "-14d",
+ "breakdown": "$some_property",
+ "events": [
+ {
+ "id": "sign up",
+ "name": "sign up",
+ "type": "events",
+ "order": 0,
+ },
+ {"id": "no events"},
+ ],
+ },
+ ),
+ self.team,
+ )
+
+ def test_no_props_boolean(self):
+ PropertyDefinition.objects.create(
+ team=self.team,
+ name="$some_property",
+ property_type="Boolean",
+ type=PropertyDefinition.Type.EVENT,
+ )
+
+ with freeze_time("2020-01-04T13:01:01Z"):
+ self._run(
+ Filter(
+ team=self.team,
+ data={
+ "date_from": "-14d",
+ "breakdown": "$some_property",
+ "events": [
+ {
+ "id": "sign up",
+ "name": "sign up",
+ "type": "events",
+ "order": 0,
+ },
+ {"id": "no events"},
+ ],
+ },
+ ),
+ self.team,
+ )
+
def test_trends_per_day_48hours(self):
self._create_events()
with freeze_time("2020-01-03T13:00:01Z"):
@@ -663,7 +721,7 @@ def test_trends_breakdown_cumulative(self):
self.team,
)
- self.assertEqual(response[0]["label"], "$$_posthog_breakdown_other_$$")
+ self.assertEqual(response[0]["label"], "$$_posthog_breakdown_null_$$")
self.assertEqual(response[0]["labels"][4], "1-Jan-2020")
self.assertEqual(response[0]["data"], [0.0, 0.0, 0.0, 0.0, 1.0, 1.0, 1.0, 1.0])
@@ -1512,7 +1570,7 @@ def test_trends_breakdown_with_session_property_single_aggregate_math_and_breakd
# empty has: 1 seconds
self.assertEqual(
sorted([resp["breakdown_value"] for resp in daily_response]),
- sorted(["value1", "value2", "$$_posthog_breakdown_other_$$"]),
+ sorted(["value1", "value2", "$$_posthog_breakdown_null_$$"]),
)
self.assertEqual(sorted([resp["aggregated_value"] for resp in daily_response]), sorted([12.5, 10, 1]))
@@ -3032,17 +3090,12 @@ def test_trends_with_session_property_total_volume_math_with_breakdowns(self):
# value1 has 0,5,10 seconds (in second interval)
# value2 has 5,10,15 seconds (in second interval)
- self.assertEqual(
- [resp["breakdown_value"] for resp in daily_response], ["value2", "value1", "$$_posthog_breakdown_other_$$"]
- )
+ self.assertEqual([resp["breakdown_value"] for resp in daily_response], ["value2", "value1"])
self.assertCountEqual(daily_response[0]["labels"], ["22-Dec-2019", "29-Dec-2019"])
self.assertCountEqual(daily_response[0]["data"], [0, 10])
self.assertCountEqual(daily_response[1]["data"], [0, 5])
- self.assertCountEqual(daily_response[2]["data"], [0, 0])
- self.assertEqual(
- [resp["breakdown_value"] for resp in weekly_response], ["value2", "value1", "$$_posthog_breakdown_other_$$"]
- )
+ self.assertEqual([resp["breakdown_value"] for resp in weekly_response], ["value2", "value1"])
self.assertCountEqual(
weekly_response[0]["labels"],
[
@@ -3058,7 +3111,6 @@ def test_trends_with_session_property_total_volume_math_with_breakdowns(self):
)
self.assertCountEqual(weekly_response[0]["data"], [0, 0, 0, 0, 7.5, 15, 0, 0])
self.assertCountEqual(weekly_response[1]["data"], [0, 0, 0, 0, 5, 5, 0, 0])
- self.assertCountEqual(weekly_response[2]["data"], [0, 0, 0, 0, 0, 0, 0, 0])
def test_trends_with_session_property_total_volume_math_with_sessions_spanning_multiple_intervals(self):
self._create_person(
@@ -3420,9 +3472,8 @@ def test_breakdown_with_filter(self):
),
self.team,
)
- self.assertEqual(len(response), 2)
+ self.assertEqual(len(response), 1)
self.assertEqual(response[0]["breakdown_value"], "val")
- self.assertEqual(response[1]["breakdown_value"], "$$_posthog_breakdown_other_$$")
def test_action_filtering(self):
sign_up_action, person = self._create_events()
@@ -4164,7 +4215,7 @@ def test_breakdown_by_person_property(self):
self.assertListEqual(
sorted(res["breakdown_value"] for res in event_response),
- ["$$_posthog_breakdown_other_$$", "person1", "person2", "person3"],
+ ["person1", "person2", "person3"],
)
for response in event_response:
@@ -4205,7 +4256,7 @@ def test_breakdown_by_person_property_for_person_on_events(self):
self.assertListEqual(
sorted(res["breakdown_value"] for res in event_response),
- ["$$_posthog_breakdown_other_$$", "person1", "person2", "person3"],
+ ["person1", "person2", "person3"],
)
for response in event_response:
@@ -4670,7 +4721,7 @@ def test_trends_aggregate_by_distinct_id(self):
self.assertEqual(daily_response[0]["data"][0], 2)
self.assertEqual(daily_response[0]["label"], "some_val")
self.assertEqual(daily_response[1]["data"][0], 1)
- self.assertEqual(daily_response[1]["label"], "$$_posthog_breakdown_other_$$")
+ self.assertEqual(daily_response[1]["label"], "$$_posthog_breakdown_null_$$")
# MAU
with freeze_time("2019-12-31T13:00:03Z"):
@@ -4741,7 +4792,7 @@ def test_breakdown_filtering_limit(self):
),
self.team,
)
- self.assertEqual(len(response), 26)
+ self.assertEqual(len(response), 25)
@also_test_with_materialized_columns(event_properties=["order"], person_properties=["name"])
def test_breakdown_with_person_property_filter(self):
@@ -4816,10 +4867,10 @@ def test_breakdown_filtering(self):
self.team,
)
- self.assertEqual(response[0]["label"], "sign up - $$_posthog_breakdown_other_$$")
+ self.assertEqual(response[0]["label"], "sign up - $$_posthog_breakdown_null_$$")
self.assertEqual(response[1]["label"], "sign up - value")
self.assertEqual(response[2]["label"], "sign up - other_value")
- self.assertEqual(response[3]["label"], "no events - none")
+ self.assertEqual(response[3]["label"], "no events - $$_posthog_breakdown_null_$$")
self.assertEqual(sum(response[0]["data"]), 2)
self.assertEqual(sum(response[1]["data"]), 2)
@@ -4877,7 +4928,7 @@ def test_breakdown_filtering_persons(self):
),
self.team,
)
- self.assertEqual(response[0]["label"], "$$_posthog_breakdown_other_$$")
+ self.assertEqual(response[0]["label"], "$$_posthog_breakdown_null_$$")
self.assertEqual(response[1]["label"], "test@gmail.com")
self.assertEqual(response[2]["label"], "test@posthog.com")
@@ -4935,7 +4986,7 @@ def test_breakdown_filtering_persons_with_action_props(self):
),
self.team,
)
- self.assertEqual(response[0]["label"], "$$_posthog_breakdown_other_$$")
+ self.assertEqual(response[0]["label"], "$$_posthog_breakdown_null_$$")
self.assertEqual(response[1]["label"], "test@gmail.com")
self.assertEqual(response[2]["label"], "test@posthog.com")
@@ -5011,18 +5062,14 @@ def test_breakdown_filtering_with_properties(self):
)
response = sorted(response, key=lambda x: x["label"])
- self.assertEqual(response[0]["label"], "$$_posthog_breakdown_other_$$")
- self.assertEqual(response[1]["label"], "first url")
- self.assertEqual(response[2]["label"], "second url")
+ self.assertEqual(response[0]["label"], "first url")
+ self.assertEqual(response[1]["label"], "second url")
- self.assertEqual(sum(response[0]["data"]), 0)
- self.assertEqual(response[0]["breakdown_value"], "$$_posthog_breakdown_other_$$")
+ self.assertEqual(sum(response[0]["data"]), 1)
+ self.assertEqual(response[0]["breakdown_value"], "first url")
self.assertEqual(sum(response[1]["data"]), 1)
- self.assertEqual(response[1]["breakdown_value"], "first url")
-
- self.assertEqual(sum(response[2]["data"]), 1)
- self.assertEqual(response[2]["breakdown_value"], "second url")
+ self.assertEqual(response[1]["breakdown_value"], "second url")
@snapshot_clickhouse_queries
def test_breakdown_filtering_with_properties_in_new_format(self):
@@ -5098,13 +5145,10 @@ def test_breakdown_filtering_with_properties_in_new_format(self):
)
response = sorted(response, key=lambda x: x["label"])
- self.assertEqual(response[0]["label"], "$$_posthog_breakdown_other_$$")
- self.assertEqual(response[1]["label"], "second url")
+ self.assertEqual(response[0]["label"], "second url")
- self.assertEqual(sum(response[0]["data"]), 0)
- self.assertEqual(response[0]["breakdown_value"], "$$_posthog_breakdown_other_$$")
- self.assertEqual(sum(response[1]["data"]), 1)
- self.assertEqual(response[1]["breakdown_value"], "second url")
+ self.assertEqual(sum(response[0]["data"]), 1)
+ self.assertEqual(response[0]["breakdown_value"], "second url")
# AND filter properties with disjoint set means results should be empty
with freeze_time("2020-01-05T13:01:01Z"):
@@ -5136,7 +5180,7 @@ def test_breakdown_filtering_with_properties_in_new_format(self):
)
response = sorted(response, key=lambda x: x["label"])
- self.assertEqual(response, [])
+ self.assertEqual(len(response), 0)
@also_test_with_person_on_events_v2
@snapshot_clickhouse_queries
@@ -5712,14 +5756,13 @@ def test_trend_breakdown_user_props_with_filter_with_partial_property_pushdowns(
self.team,
)
response = sorted(response, key=lambda item: item["breakdown_value"])
- self.assertEqual(len(response), 6)
+ self.assertEqual(len(response), 5)
# person1 shouldn't be selected because it doesn't match the filter
- self.assertEqual(response[0]["breakdown_value"], "$$_posthog_breakdown_other_$$")
- self.assertEqual(response[1]["breakdown_value"], "test2@posthog.com")
- self.assertEqual(response[2]["breakdown_value"], "test3@posthog.com")
- self.assertEqual(response[3]["breakdown_value"], "test4@posthog.com")
- self.assertEqual(response[4]["breakdown_value"], "test5@posthog.com")
- self.assertEqual(response[5]["breakdown_value"], "test@gmail.com")
+ self.assertEqual(response[0]["breakdown_value"], "test2@posthog.com")
+ self.assertEqual(response[1]["breakdown_value"], "test3@posthog.com")
+ self.assertEqual(response[2]["breakdown_value"], "test4@posthog.com")
+ self.assertEqual(response[3]["breakdown_value"], "test5@posthog.com")
+ self.assertEqual(response[4]["breakdown_value"], "test@gmail.com")
# now have more strict filters with entity props
response = self._run(
@@ -5776,9 +5819,8 @@ def test_trend_breakdown_user_props_with_filter_with_partial_property_pushdowns(
),
self.team,
)
- self.assertEqual(len(response), 2)
+ self.assertEqual(len(response), 1)
self.assertEqual(response[0]["breakdown_value"], "test2@posthog.com")
- self.assertEqual(response[1]["breakdown_value"], "$$_posthog_breakdown_other_$$")
def _create_active_users_events(self):
self._create_person(team_id=self.team.pk, distinct_ids=["p0"], properties={"name": "p1"})
@@ -7552,7 +7594,7 @@ def test_trends_count_per_user_average_with_event_property_breakdown(self):
assert len(daily_response) == 3
assert daily_response[0]["breakdown_value"] == "red"
assert daily_response[1]["breakdown_value"] == "blue"
- assert daily_response[2]["breakdown_value"] == "$$_posthog_breakdown_other_$$"
+ assert daily_response[2]["breakdown_value"] == "$$_posthog_breakdown_null_$$"
assert daily_response[0]["days"] == [
"2020-01-01",
"2020-01-02",
@@ -7566,7 +7608,7 @@ def test_trends_count_per_user_average_with_event_property_breakdown(self):
assert daily_response[2]["days"] == daily_response[0]["days"]
assert daily_response[0]["data"] == [1.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0] # red
assert daily_response[1]["data"] == [1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0] # blue
- assert daily_response[2]["data"] == [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] # $$_posthog_breakdown_other_$$
+ assert daily_response[2]["data"] == [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] # $$_posthog_breakdown_null_$$
def test_trends_count_per_user_average_with_person_property_breakdown(self):
self._create_event_count_per_actor_events()
@@ -7586,10 +7628,9 @@ def test_trends_count_per_user_average_with_person_property_breakdown(self):
self.team,
)
- assert len(daily_response) == 3
+ assert len(daily_response) == 2
assert daily_response[0]["breakdown_value"] == "mango"
assert daily_response[1]["breakdown_value"] == "tomato"
- assert daily_response[2]["breakdown_value"] == "$$_posthog_breakdown_other_$$"
assert daily_response[0]["days"] == [
"2020-01-01",
"2020-01-02",
@@ -7600,10 +7641,8 @@ def test_trends_count_per_user_average_with_person_property_breakdown(self):
"2020-01-07",
]
assert daily_response[1]["days"] == daily_response[0]["days"]
- assert daily_response[2]["days"] == daily_response[0]["days"]
assert daily_response[0]["data"] == [2.0, 0.0, 0.0, 1.0, 2.0, 0.0, 0.0] # red
assert daily_response[1]["data"] == [1.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] # blue
- assert daily_response[2]["data"] == [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0] # $$_posthog_breakdown_other_$$
def test_trends_count_per_user_average_aggregated_with_event_property_breakdown(self):
self._create_event_count_per_actor_events()
@@ -7625,10 +7664,10 @@ def test_trends_count_per_user_average_aggregated_with_event_property_breakdown(
assert len(daily_response) == 3
assert daily_response[0]["breakdown_value"] == "blue"
assert daily_response[1]["breakdown_value"] == "red"
- assert daily_response[2]["breakdown_value"] == "$$_posthog_breakdown_other_$$"
+ assert daily_response[2]["breakdown_value"] == "$$_posthog_breakdown_null_$$"
assert daily_response[0]["aggregated_value"] == 1.0 # blue
assert daily_response[1]["aggregated_value"] == 2.0 # red
- assert daily_response[2]["aggregated_value"] == 1.0 # $$_posthog_breakdown_other_$$
+ assert daily_response[2]["aggregated_value"] == 1.0 # $$_posthog_breakdown_null_$$
@snapshot_clickhouse_queries
def test_trends_count_per_user_average_aggregated_with_event_property_breakdown_with_sampling(self):
@@ -7652,10 +7691,10 @@ def test_trends_count_per_user_average_aggregated_with_event_property_breakdown_
assert len(daily_response) == 3
assert daily_response[0]["breakdown_value"] == "blue"
assert daily_response[1]["breakdown_value"] == "red"
- assert daily_response[2]["breakdown_value"] == "$$_posthog_breakdown_other_$$"
+ assert daily_response[2]["breakdown_value"] == "$$_posthog_breakdown_null_$$"
assert daily_response[0]["aggregated_value"] == 1.0 # blue
assert daily_response[1]["aggregated_value"] == 2.0 # red
- assert daily_response[2]["aggregated_value"] == 1.0 # $$_posthog_breakdown_other_$$
+ assert daily_response[2]["aggregated_value"] == 1.0 # $$_posthog_breakdown_null_$$
# TODO: Add support for avg_count by group indexes (see this Slack thread for more context: https://posthog.slack.com/archives/C0368RPHLQH/p1700484174374229)
@pytest.mark.skip(reason="support for avg_count_per_actor not included yet")
@@ -7854,13 +7893,11 @@ def test_breakdown_with_filter_groups(self):
self.team,
)
- self.assertEqual(len(response), 3)
+ self.assertEqual(len(response), 2)
self.assertEqual(response[0]["breakdown_value"], "oh")
self.assertEqual(response[0]["count"], 1)
self.assertEqual(response[1]["breakdown_value"], "uh")
self.assertEqual(response[1]["count"], 1)
- self.assertEqual(response[2]["breakdown_value"], "$$_posthog_breakdown_other_$$")
- self.assertEqual(response[2]["count"], 0)
@also_test_with_materialized_columns(
event_properties=["key"],
@@ -7921,13 +7958,11 @@ def test_breakdown_with_filter_groups_person_on_events(self):
self.team,
)
- self.assertEqual(len(response), 3)
+ self.assertEqual(len(response), 2)
self.assertEqual(response[0]["breakdown_value"], "oh")
self.assertEqual(response[0]["count"], 1)
self.assertEqual(response[1]["breakdown_value"], "uh")
self.assertEqual(response[1]["count"], 1)
- self.assertEqual(response[2]["breakdown_value"], "$$_posthog_breakdown_other_$$")
- self.assertEqual(response[2]["count"], 0)
@override_settings(PERSON_ON_EVENTS_V2_OVERRIDE=True)
@snapshot_clickhouse_queries
@@ -7999,13 +8034,11 @@ def test_breakdown_with_filter_groups_person_on_events_v2(self):
self.team,
)
- self.assertEqual(len(response), 3)
+ self.assertEqual(len(response), 2)
self.assertEqual(response[0]["breakdown_value"], "oh")
self.assertEqual(response[0]["count"], 1)
self.assertEqual(response[1]["breakdown_value"], "uh")
self.assertEqual(response[1]["count"], 1)
- self.assertEqual(response[2]["breakdown_value"], "$$_posthog_breakdown_other_$$")
- self.assertEqual(response[2]["count"], 0)
# TODO: Delete this test when moved to person-on-events
def test_breakdown_by_group_props(self):
@@ -8050,13 +8083,11 @@ def test_breakdown_by_group_props(self):
)
response = self._run(filter, self.team)
- self.assertEqual(len(response), 3)
+ self.assertEqual(len(response), 2)
self.assertEqual(response[0]["breakdown_value"], "finance")
self.assertEqual(response[0]["count"], 2)
self.assertEqual(response[1]["breakdown_value"], "technology")
self.assertEqual(response[1]["count"], 1)
- self.assertEqual(response[2]["breakdown_value"], "$$_posthog_breakdown_other_$$")
- self.assertEqual(response[2]["count"], 0)
filter = filter.shallow_clone(
{
@@ -8118,13 +8149,11 @@ def test_breakdown_by_group_props_person_on_events(self):
with override_instance_config("PERSON_ON_EVENTS_ENABLED", True):
response = self._run(filter, self.team)
- self.assertEqual(len(response), 3)
+ self.assertEqual(len(response), 2)
self.assertEqual(response[0]["breakdown_value"], "finance")
self.assertEqual(response[0]["count"], 2)
self.assertEqual(response[1]["breakdown_value"], "technology")
self.assertEqual(response[1]["count"], 1)
- self.assertEqual(response[2]["breakdown_value"], "$$_posthog_breakdown_other_$$")
- self.assertEqual(response[2]["count"], 0)
filter = filter.shallow_clone(
{
@@ -8178,11 +8207,9 @@ def test_breakdown_by_group_props_with_person_filter(self):
response = self._run(filter, self.team)
- self.assertEqual(len(response), 2)
+ self.assertEqual(len(response), 1)
self.assertEqual(response[0]["breakdown_value"], "finance")
self.assertEqual(response[0]["count"], 1)
- self.assertEqual(response[1]["breakdown_value"], "$$_posthog_breakdown_other_$$")
- self.assertEqual(response[1]["count"], 0)
# TODO: Delete this test when moved to person-on-events
def test_filtering_with_group_props(self):
@@ -8339,11 +8366,9 @@ def test_breakdown_by_group_props_with_person_filter_person_on_events(self):
with override_instance_config("PERSON_ON_EVENTS_ENABLED", True):
response = self._run(filter, self.team)
- self.assertEqual(len(response), 2)
+ self.assertEqual(len(response), 1)
self.assertEqual(response[0]["breakdown_value"], "finance")
self.assertEqual(response[0]["count"], 1)
- self.assertEqual(response[1]["breakdown_value"], "$$_posthog_breakdown_other_$$")
- self.assertEqual(response[1]["count"], 0)
@also_test_with_materialized_columns(
person_properties=["key"],
diff --git a/posthog/hogql_queries/insights/trends/test/test_data_warehouse_query_builder.py b/posthog/hogql_queries/insights/trends/test/test_trends_data_warehouse_query.py
similarity index 80%
rename from posthog/hogql_queries/insights/trends/test/test_data_warehouse_query_builder.py
rename to posthog/hogql_queries/insights/trends/test/test_trends_data_warehouse_query.py
index 19ecbca78bc28..e47de87fbada1 100644
--- a/posthog/hogql_queries/insights/trends/test/test_data_warehouse_query_builder.py
+++ b/posthog/hogql_queries/insights/trends/test/test_trends_data_warehouse_query.py
@@ -4,7 +4,7 @@
from posthog.hogql.query import execute_hogql_query
from posthog.hogql.timings import HogQLTimings
-from posthog.hogql_queries.insights.trends.data_warehouse_trends_query_builder import DataWarehouseTrendsQueryBuilder
+from posthog.hogql_queries.insights.trends.trends_query_builder import TrendsQueryBuilder
from posthog.hogql_queries.utils.query_date_range import QueryDateRange
from posthog.schema import (
BreakdownFilter,
@@ -12,6 +12,7 @@
ChartDisplayType,
DateRange,
DataWarehouseNode,
+ DataWarehouseEventsModifier,
TrendsQuery,
TrendsFilter,
)
@@ -41,7 +42,7 @@
TEST_BUCKET = "test_storage_bucket-posthog.hogql.datawarehouse.trendquery"
-class TestDataWarehouseQueryBuilder(ClickhouseTestMixin, BaseTest):
+class TestTrendsDataWarehouseQuery(ClickhouseTestMixin, BaseTest):
def teardown_method(self, method) -> None:
s3 = resource(
"s3",
@@ -66,7 +67,16 @@ def get_response(self, trends_query: TrendsQuery):
modifiers = create_default_modifiers_for_team(self.team)
if isinstance(trends_query.series[0], DataWarehouseNode):
- query_builder = DataWarehouseTrendsQueryBuilder(
+ series = trends_query.series[0]
+ modifiers.dataWarehouseEventsModifiers = [
+ DataWarehouseEventsModifier(
+ table_name=series.table_name,
+ timestamp_field=series.timestamp_field,
+ id_field=series.id_field,
+ distinct_id_field=series.distinct_id_field,
+ )
+ ]
+ query_builder = TrendsQueryBuilder(
trends_query=trends_query,
team=self.team,
query_date_range=query_date_range,
@@ -84,6 +94,7 @@ def get_response(self, trends_query: TrendsQuery):
query=query,
team=self.team,
timings=timings,
+ modifiers=modifiers,
)
def create_parquet_file(self):
@@ -143,7 +154,15 @@ def test_trends_data_warehouse(self):
trends_query = TrendsQuery(
kind="TrendsQuery",
dateRange=DateRange(date_from="2023-01-01"),
- series=[DataWarehouseNode(id=table_name, table_name=table_name, id_field="id", timestamp_field="created")],
+ series=[
+ DataWarehouseNode(
+ id=table_name,
+ table_name=table_name,
+ id_field="id",
+ distinct_id_field="customer_email",
+ timestamp_field="created",
+ )
+ ],
)
with freeze_time("2023-01-07"):
@@ -166,6 +185,7 @@ def test_trends_entity_property(self):
table_name=table_name,
id_field="id",
timestamp_field="created",
+ distinct_id_field="customer_email",
properties=clean_entity_properties([{"key": "prop_1", "value": "a", "type": "data_warehouse"}]),
)
],
@@ -185,7 +205,15 @@ def test_trends_property(self):
trends_query = TrendsQuery(
kind="TrendsQuery",
dateRange=DateRange(date_from="2023-01-01"),
- series=[DataWarehouseNode(id=table_name, table_name=table_name, id_field="id", timestamp_field="created")],
+ series=[
+ DataWarehouseNode(
+ id=table_name,
+ table_name=table_name,
+ id_field="id",
+ distinct_id_field="customer_email",
+ timestamp_field="created",
+ )
+ ],
properties=clean_entity_properties([{"key": "prop_1", "value": "a", "type": "data_warehouse"}]),
)
@@ -203,7 +231,15 @@ def test_trends_breakdown(self):
trends_query = TrendsQuery(
kind="TrendsQuery",
dateRange=DateRange(date_from="2023-01-01"),
- series=[DataWarehouseNode(id=table_name, table_name=table_name, id_field="id", timestamp_field="created")],
+ series=[
+ DataWarehouseNode(
+ id=table_name,
+ table_name=table_name,
+ id_field="id",
+ distinct_id_field="customer_email",
+ timestamp_field="created",
+ )
+ ],
breakdownFilter=BreakdownFilter(breakdown_type=BreakdownType.data_warehouse, breakdown="prop_1"),
)
@@ -212,6 +248,7 @@ def test_trends_breakdown(self):
assert response.columns is not None
assert set(response.columns).issubset({"date", "total", "breakdown_value"})
+ assert len(response.results) == 4
assert response.results[0][1] == [1, 0, 0, 0, 0, 0, 0]
assert response.results[0][2] == "a"
@@ -224,9 +261,6 @@ def test_trends_breakdown(self):
assert response.results[3][1] == [0, 0, 0, 1, 0, 0, 0]
assert response.results[3][2] == "d"
- assert response.results[4][1] == [0, 0, 0, 0, 0, 0, 0]
- assert response.results[4][2] == "$$_posthog_breakdown_other_$$"
-
@snapshot_clickhouse_queries
def test_trends_breakdown_with_property(self):
table_name = self.create_parquet_file()
@@ -234,7 +268,15 @@ def test_trends_breakdown_with_property(self):
trends_query = TrendsQuery(
kind="TrendsQuery",
dateRange=DateRange(date_from="2023-01-01"),
- series=[DataWarehouseNode(id=table_name, table_name=table_name, id_field="id", timestamp_field="created")],
+ series=[
+ DataWarehouseNode(
+ id=table_name,
+ table_name=table_name,
+ id_field="id",
+ distinct_id_field="customer_email",
+ timestamp_field="created",
+ )
+ ],
properties=clean_entity_properties([{"key": "prop_1", "value": "a", "type": "data_warehouse"}]),
breakdownFilter=BreakdownFilter(breakdown_type=BreakdownType.data_warehouse, breakdown="prop_1"),
)
@@ -244,12 +286,10 @@ def test_trends_breakdown_with_property(self):
assert response.columns is not None
assert set(response.columns).issubset({"date", "total", "breakdown_value"})
+ assert len(response.results) == 1
assert response.results[0][1] == [1, 0, 0, 0, 0, 0, 0]
assert response.results[0][2] == "a"
- assert response.results[1][1] == [0, 0, 0, 0, 0, 0, 0]
- assert response.results[1][2] == "$$_posthog_breakdown_other_$$"
-
def assert_column_names_with_display_type(self, display_type: ChartDisplayType):
# KLUDGE: creating data on every variant
table_name = self.create_parquet_file()
@@ -257,7 +297,15 @@ def assert_column_names_with_display_type(self, display_type: ChartDisplayType):
trends_query = TrendsQuery(
kind="TrendsQuery",
dateRange=DateRange(date_from="2023-01-01"),
- series=[DataWarehouseNode(id=table_name, table_name=table_name, id_field="id", timestamp_field="created")],
+ series=[
+ DataWarehouseNode(
+ id=table_name,
+ table_name=table_name,
+ id_field="id",
+ distinct_id_field="customer_email",
+ timestamp_field="created",
+ )
+ ],
trendsFilter=TrendsFilter(display=display_type),
)
diff --git a/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py b/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py
index 97c4526ddcf0c..104e232a01406 100644
--- a/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py
+++ b/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py
@@ -3,6 +3,8 @@
from unittest.mock import patch
from django.test import override_settings
from freezegun import freeze_time
+from posthog.hogql import ast
+from posthog.hogql.constants import MAX_SELECT_RETURNED_ROWS
from posthog.hogql.modifiers import create_default_modifiers_for_team
from posthog.hogql_queries.insights.trends.trends_query_runner import TrendsQueryRunner
from posthog.models.cohort.cohort import Cohort
@@ -539,18 +541,16 @@ def test_trends_breakdowns(self):
breakdown_labels = [result["breakdown_value"] for result in response.results]
- assert len(response.results) == 5
- assert breakdown_labels == ["Chrome", "Firefox", "Edge", "Safari", "$$_posthog_breakdown_other_$$"]
+ assert len(response.results) == 4
+ assert breakdown_labels == ["Chrome", "Firefox", "Edge", "Safari"]
assert response.results[0]["label"] == "Chrome"
assert response.results[1]["label"] == "Firefox"
assert response.results[2]["label"] == "Edge"
assert response.results[3]["label"] == "Safari"
- assert response.results[4]["label"] == "$$_posthog_breakdown_other_$$"
assert response.results[0]["count"] == 6
assert response.results[1]["count"] == 2
assert response.results[2]["count"] == 1
assert response.results[3]["count"] == 1
- assert response.results[4]["count"] == 0
def test_trends_breakdowns_boolean(self):
self._create_test_events()
@@ -566,16 +566,14 @@ def test_trends_breakdowns_boolean(self):
breakdown_labels = [result["breakdown_value"] for result in response.results]
- assert len(response.results) == 3
- assert breakdown_labels == ["true", "false", "$$_posthog_breakdown_other_$$"]
+ assert len(response.results) == 2
+ assert breakdown_labels == ["true", "false"]
assert response.results[0]["label"] == f"true"
assert response.results[1]["label"] == f"false"
- assert response.results[2]["label"] == f"$pageview - Other"
assert response.results[0]["count"] == 7
assert response.results[1]["count"] == 3
- assert response.results[2]["count"] == 0
def test_trends_breakdowns_histogram(self):
self._create_test_events()
@@ -671,18 +669,16 @@ def test_trends_breakdowns_hogql(self):
breakdown_labels = [result["breakdown_value"] for result in response.results]
- assert len(response.results) == 5
- assert breakdown_labels == ["Chrome", "Firefox", "Edge", "Safari", "$$_posthog_breakdown_other_$$"]
+ assert len(response.results) == 4
+ assert breakdown_labels == ["Chrome", "Firefox", "Edge", "Safari"]
assert response.results[0]["label"] == "Chrome"
assert response.results[1]["label"] == "Firefox"
assert response.results[2]["label"] == "Edge"
assert response.results[3]["label"] == "Safari"
- assert response.results[4]["label"] == "$$_posthog_breakdown_other_$$"
assert response.results[0]["count"] == 6
assert response.results[1]["count"] == 2
assert response.results[2]["count"] == 1
assert response.results[3]["count"] == 1
- assert response.results[4]["count"] == 0
def test_trends_breakdowns_multiple_hogql(self):
self._create_test_events()
@@ -698,39 +694,33 @@ def test_trends_breakdowns_multiple_hogql(self):
breakdown_labels = [result["breakdown_value"] for result in response.results]
- assert len(response.results) == 10
+ assert len(response.results) == 8
assert breakdown_labels == [
"Chrome",
"Firefox",
"Edge",
"Safari",
- "$$_posthog_breakdown_other_$$",
"Chrome",
"Edge",
"Firefox",
"Safari",
- "$$_posthog_breakdown_other_$$",
]
assert response.results[0]["label"] == f"$pageview - Chrome"
assert response.results[1]["label"] == f"$pageview - Firefox"
assert response.results[2]["label"] == f"$pageview - Edge"
assert response.results[3]["label"] == f"$pageview - Safari"
- assert response.results[4]["label"] == f"$pageview - $$_posthog_breakdown_other_$$"
- assert response.results[5]["label"] == f"$pageleave - Chrome"
- assert response.results[6]["label"] == f"$pageleave - Edge"
- assert response.results[7]["label"] == f"$pageleave - Firefox"
- assert response.results[8]["label"] == f"$pageleave - Safari"
- assert response.results[9]["label"] == f"$pageleave - $$_posthog_breakdown_other_$$"
+ assert response.results[4]["label"] == f"$pageleave - Chrome"
+ assert response.results[5]["label"] == f"$pageleave - Edge"
+ assert response.results[6]["label"] == f"$pageleave - Firefox"
+ assert response.results[7]["label"] == f"$pageleave - Safari"
assert response.results[0]["count"] == 6
assert response.results[1]["count"] == 2
assert response.results[2]["count"] == 1
assert response.results[3]["count"] == 1
- assert response.results[4]["count"] == 0
- assert response.results[5]["count"] == 3
+ assert response.results[4]["count"] == 3
+ assert response.results[5]["count"] == 1
assert response.results[6]["count"] == 1
assert response.results[7]["count"] == 1
- assert response.results[8]["count"] == 1
- assert response.results[9]["count"] == 0
def test_trends_breakdowns_and_compare(self):
self._create_test_events()
@@ -746,48 +736,38 @@ def test_trends_breakdowns_and_compare(self):
breakdown_labels = [result["breakdown_value"] for result in response.results]
- assert len(response.results) == 7
+ assert len(response.results) == 5
assert breakdown_labels == [
"Chrome",
"Safari",
- "$$_posthog_breakdown_other_$$",
"Chrome",
"Firefox",
"Edge",
- "$$_posthog_breakdown_other_$$",
]
assert response.results[0]["label"] == f"Chrome"
assert response.results[1]["label"] == f"Safari"
- assert response.results[2]["label"] == f"$$_posthog_breakdown_other_$$"
- assert response.results[3]["label"] == f"Chrome"
- assert response.results[4]["label"] == f"Firefox"
- assert response.results[5]["label"] == f"Edge"
- assert response.results[6]["label"] == f"$$_posthog_breakdown_other_$$"
+ assert response.results[2]["label"] == f"Chrome"
+ assert response.results[3]["label"] == f"Firefox"
+ assert response.results[4]["label"] == f"Edge"
assert response.results[0]["count"] == 3
assert response.results[1]["count"] == 1
- assert response.results[2]["count"] == 0
- assert response.results[3]["count"] == 3
- assert response.results[4]["count"] == 2
- assert response.results[5]["count"] == 1
- assert response.results[6]["count"] == 0
+ assert response.results[2]["count"] == 3
+ assert response.results[3]["count"] == 2
+ assert response.results[4]["count"] == 1
assert response.results[0]["compare_label"] == "current"
assert response.results[1]["compare_label"] == "current"
- assert response.results[2]["compare_label"] == "current"
+ assert response.results[2]["compare_label"] == "previous"
assert response.results[3]["compare_label"] == "previous"
assert response.results[4]["compare_label"] == "previous"
- assert response.results[5]["compare_label"] == "previous"
- assert response.results[6]["compare_label"] == "previous"
assert response.results[0]["compare"] is True
assert response.results[1]["compare"] is True
assert response.results[2]["compare"] is True
assert response.results[3]["compare"] is True
assert response.results[4]["compare"] is True
- assert response.results[5]["compare"] is True
- assert response.results[6]["compare"] is True
def test_trends_breakdown_and_aggregation_query_orchestration(self):
self._create_test_events()
@@ -803,13 +783,12 @@ def test_trends_breakdown_and_aggregation_query_orchestration(self):
breakdown_labels = [result["breakdown_value"] for result in response.results]
- assert len(response.results) == 5
- assert breakdown_labels == ["Chrome", "Firefox", "Safari", "Edge", "$$_posthog_breakdown_other_$$"]
+ assert len(response.results) == 4
+ assert breakdown_labels == ["Chrome", "Firefox", "Safari", "Edge"]
assert response.results[0]["label"] == "Chrome"
assert response.results[1]["label"] == "Firefox"
assert response.results[2]["label"] == "Safari"
assert response.results[3]["label"] == "Edge"
- assert response.results[4]["label"] == "$$_posthog_breakdown_other_$$"
assert response.results[0]["data"] == [
0,
@@ -867,20 +846,6 @@ def test_trends_breakdown_and_aggregation_query_orchestration(self):
0,
0,
]
- assert response.results[4]["data"] == [
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- 0,
- ]
def test_trends_aggregation_hogql(self):
self._create_test_events()
@@ -1152,7 +1117,7 @@ def test_breakdown_values_limit(self):
def test_breakdown_values_world_map_limit(self):
PropertyDefinition.objects.create(team=self.team, name="breakdown_value", property_type="String")
- for value in list(range(30)):
+ for value in list(range(250)):
_create_event(
team=self.team,
event="$pageview",
@@ -1161,7 +1126,7 @@ def test_breakdown_values_world_map_limit(self):
properties={"breakdown_value": f"{value}"},
)
- response = self._run_trends_query(
+ query_runner = self._create_query_runner(
"2020-01-09",
"2020-01-20",
IntervalType.day,
@@ -1169,8 +1134,11 @@ def test_breakdown_values_world_map_limit(self):
TrendsFilter(display=ChartDisplayType.WorldMap),
BreakdownFilter(breakdown="breakdown_value", breakdown_type=BreakdownType.event),
)
+ query = query_runner.to_queries()[0]
+ assert isinstance(query, ast.SelectQuery) and query.limit == ast.Constant(value=MAX_SELECT_RETURNED_ROWS)
- assert len(response.results) == 30
+ response = query_runner.calculate()
+ assert len(response.results) == 250
def test_previous_period_with_number_display(self):
self._create_test_events()
diff --git a/posthog/hogql_queries/insights/trends/trends_query_builder.py b/posthog/hogql_queries/insights/trends/trends_query_builder.py
index 0f72802f10405..ed5d867b48b75 100644
--- a/posthog/hogql_queries/insights/trends/trends_query_builder.py
+++ b/posthog/hogql_queries/insights/trends/trends_query_builder.py
@@ -3,6 +3,7 @@
from posthog.hogql.parser import parse_expr, parse_select
from posthog.hogql.property import action_to_expr, property_to_expr
from posthog.hogql.timings import HogQLTimings
+from posthog.hogql_queries.insights.data_warehouse_mixin import DataWarehouseInsightQueryMixin
from posthog.hogql_queries.insights.trends.aggregation_operations import (
AggregationOperations,
)
@@ -13,15 +14,21 @@
from posthog.models.action.action import Action
from posthog.models.filters.mixins.utils import cached_property
from posthog.models.team.team import Team
-from posthog.schema import ActionsNode, EventsNode, HogQLQueryModifiers, TrendsQuery, ChartDisplayType
-from posthog.hogql_queries.insights.trends.trends_query_builder_abstract import TrendsQueryBuilderAbstract
+from posthog.schema import (
+ ActionsNode,
+ DataWarehouseNode,
+ EventsNode,
+ HogQLQueryModifiers,
+ TrendsQuery,
+ ChartDisplayType,
+)
-class TrendsQueryBuilder(TrendsQueryBuilderAbstract):
+class TrendsQueryBuilder(DataWarehouseInsightQueryMixin):
query: TrendsQuery
team: Team
query_date_range: QueryDateRange
- series: EventsNode | ActionsNode
+ series: EventsNode | ActionsNode | DataWarehouseNode
timings: HogQLTimings
modifiers: HogQLQueryModifiers
@@ -30,7 +37,7 @@ def __init__(
trends_query: TrendsQuery,
team: Team,
query_date_range: QueryDateRange,
- series: EventsNode | ActionsNode,
+ series: EventsNode | ActionsNode | DataWarehouseNode,
timings: HogQLTimings,
modifiers: HogQLQueryModifiers,
):
@@ -179,13 +186,21 @@ def _get_events_subquery(
ast.SelectQuery,
parse_select(
"""
- SELECT
- {aggregation_operation} AS total
- FROM events AS e
- SAMPLE {sample}
- WHERE {events_filter}
- """,
+ SELECT
+ {aggregation_operation} AS total
+ FROM {table} AS e
+ WHERE {events_filter}
+ """
+ if isinstance(self.series, DataWarehouseNode)
+ else """
+ SELECT
+ {aggregation_operation} AS total
+ FROM {table} AS e
+ SAMPLE {sample}
+ WHERE {events_filter}
+ """,
placeholders={
+ "table": self._table_expr,
"events_filter": events_filter,
"aggregation_operation": self._aggregation_operation.select_aggregation(),
"sample": self._sample_value(),
diff --git a/posthog/hogql_queries/insights/trends/trends_query_builder_abstract.py b/posthog/hogql_queries/insights/trends/trends_query_builder_abstract.py
deleted file mode 100644
index d321fc0a77ecf..0000000000000
--- a/posthog/hogql_queries/insights/trends/trends_query_builder_abstract.py
+++ /dev/null
@@ -1,52 +0,0 @@
-import abc
-from posthog.hogql import ast
-from typing import List, Optional
-from posthog.hogql_queries.insights.trends.breakdown import Breakdown
-
-
-class TrendsQueryBuilderAbstract(metaclass=abc.ABCMeta):
- @abc.abstractmethod
- def build_query(self) -> ast.SelectQuery | ast.SelectUnionQuery:
- pass
-
- # Private functions not really necessary but keeping here for uniformity for now
-
- @abc.abstractmethod
- def _get_date_subqueries(self, breakdown: Breakdown, ignore_breakdowns: bool = False) -> List[ast.SelectQuery]:
- pass
-
- @abc.abstractmethod
- def _get_events_subquery(
- self,
- no_modifications: Optional[bool],
- is_actors_query: bool,
- breakdown: Breakdown,
- breakdown_values_override: Optional[str | int] = None,
- actors_query_time_frame: Optional[str | int] = None,
- ) -> ast.SelectQuery:
- pass
-
- @abc.abstractmethod
- def _outer_select_query(self, breakdown: Breakdown, inner_query: ast.SelectQuery) -> ast.SelectQuery:
- pass
-
- @abc.abstractmethod
- def _inner_select_query(
- self, breakdown: Breakdown, inner_query: ast.SelectQuery | ast.SelectUnionQuery
- ) -> ast.SelectQuery:
- pass
-
- @abc.abstractmethod
- def _events_filter(
- self,
- is_actors_query: bool,
- breakdown: Breakdown | None,
- ignore_breakdowns: bool = False,
- breakdown_values_override: Optional[str | int] = None,
- actors_query_time_frame: Optional[str | int] = None,
- ) -> ast.Expr:
- pass
-
- @abc.abstractmethod
- def _breakdown(self, is_actors_query: bool, breakdown_values_override: Optional[str | int] = None):
- pass
diff --git a/posthog/hogql_queries/insights/trends/trends_query_runner.py b/posthog/hogql_queries/insights/trends/trends_query_runner.py
index c386b5e3a7696..6cf84dcb5357b 100644
--- a/posthog/hogql_queries/insights/trends/trends_query_runner.py
+++ b/posthog/hogql_queries/insights/trends/trends_query_runner.py
@@ -18,7 +18,7 @@
from posthog.caching.utils import is_stale
from posthog.hogql import ast
-from posthog.hogql.constants import LimitContext
+from posthog.hogql.constants import LimitContext, MAX_SELECT_RETURNED_ROWS
from posthog.hogql.printer import to_printed_hogql
from posthog.hogql.query import execute_hogql_query
from posthog.hogql.timings import HogQLTimings
@@ -29,9 +29,7 @@
BREAKDOWN_OTHER_STRING_LABEL,
)
from posthog.hogql_queries.insights.trends.display import TrendsDisplay
-from posthog.hogql_queries.insights.trends.trends_query_builder_abstract import TrendsQueryBuilderAbstract
from posthog.hogql_queries.insights.trends.trends_query_builder import TrendsQueryBuilder
-from posthog.hogql_queries.insights.trends.data_warehouse_trends_query_builder import DataWarehouseTrendsQueryBuilder
from posthog.hogql_queries.insights.trends.series_with_extras import SeriesWithExtras
from posthog.hogql_queries.query_runner import QueryRunner
from posthog.hogql_queries.utils.formula_ast import FormulaAST
@@ -61,6 +59,7 @@
TrendsQuery,
TrendsQueryResponse,
HogQLQueryModifiers,
+ DataWarehouseEventsModifier,
)
from posthog.utils import format_label_date
@@ -122,28 +121,21 @@ def to_queries(self) -> List[ast.SelectQuery | ast.SelectUnionQuery]:
else:
query_date_range = self.query_previous_date_range
- query_builder: TrendsQueryBuilderAbstract
-
- if isinstance(series.series, DataWarehouseNode):
- query_builder = DataWarehouseTrendsQueryBuilder(
- trends_query=series.overriden_query or self.query,
- team=self.team,
- query_date_range=query_date_range,
- series=series.series,
- timings=self.timings,
- modifiers=self.modifiers,
- )
- else:
- query_builder = TrendsQueryBuilder(
- trends_query=series.overriden_query or self.query,
- team=self.team,
- query_date_range=query_date_range,
- series=series.series,
- timings=self.timings,
- modifiers=self.modifiers,
- )
+ query_builder = TrendsQueryBuilder(
+ trends_query=series.overriden_query or self.query,
+ team=self.team,
+ query_date_range=query_date_range,
+ series=series.series,
+ timings=self.timings,
+ modifiers=self.modifiers,
+ )
+ query = query_builder.build_query()
- queries.append(query_builder.build_query())
+ # Get around the default 100 limit, bump to the max 10000.
+ # This is useful for the world map view and other cases with a lot of breakdowns.
+ if isinstance(query, ast.SelectQuery) and query.limit is None:
+ query.limit = ast.Constant(value=MAX_SELECT_RETURNED_ROWS)
+ queries.append(query)
return queries
@@ -237,9 +229,10 @@ def to_actors_query_options(self) -> InsightActorsQueryOptionsResponse:
if is_histogram_breakdown:
buckets = breakdown._get_breakdown_histogram_buckets()
breakdown_values = [f"[{t[0]},{t[1]}]" for t in buckets]
+ # TODO: append this only if needed
breakdown_values.append('["",""]')
else:
- breakdown_values = breakdown._get_breakdown_values
+ breakdown_values = breakdown._breakdown_values
for value in breakdown_values:
if self.query.breakdownFilter is not None and self.query.breakdownFilter.breakdown_type == "cohort":
@@ -367,7 +360,8 @@ def get_value(name: str, val: Any):
raise Exception("Column not found in hogql results")
if response.columns is None:
raise Exception("No columns returned from hogql results")
-
+ if name not in response.columns:
+ return None
index = response.columns.index(name)
return val[index]
@@ -562,6 +556,20 @@ def update_hogql_modifiers(self) -> None:
):
self.modifiers.inCohortVia = InCohortVia.leftjoin_conjoined
+ datawarehouse_modifiers = []
+ for series in self.query.series:
+ if isinstance(series, DataWarehouseNode):
+ datawarehouse_modifiers.append(
+ DataWarehouseEventsModifier(
+ table_name=series.table_name,
+ timestamp_field=series.timestamp_field,
+ id_field=series.id_field,
+ distinct_id_field=series.distinct_id_field,
+ )
+ )
+
+ self.modifiers.dataWarehouseEventsModifiers = datawarehouse_modifiers
+
def setup_series(self) -> List[SeriesWithExtras]:
series_with_extras = [
SeriesWithExtras(
diff --git a/posthog/hogql_queries/legacy_compatibility/filter_to_query.py b/posthog/hogql_queries/legacy_compatibility/filter_to_query.py
index fc68d3f26c2f4..2b8f59f88a421 100644
--- a/posthog/hogql_queries/legacy_compatibility/filter_to_query.py
+++ b/posthog/hogql_queries/legacy_compatibility/filter_to_query.py
@@ -25,6 +25,7 @@
StickinessQuery,
TrendsFilter,
TrendsQuery,
+ FunnelVizType,
)
from posthog.types import InsightQueryNode
@@ -392,9 +393,15 @@ def _insight_filter(filter: Dict):
)
}
elif _insight_type(filter) == "FUNNELS":
+ funnel_viz_type = filter.get("funnel_viz_type")
+ # Backwards compatibility
+ # Before Filter.funnel_viz_type funnel trends were indicated by Filter.display being TRENDS_LINEAR
+ if funnel_viz_type is None and filter.get("display") == "ActionsLineGraph":
+ funnel_viz_type = FunnelVizType.trends
+
insight_filter = {
"funnelsFilter": FunnelsFilter(
- funnelVizType=filter.get("funnel_viz_type"),
+ funnelVizType=funnel_viz_type,
funnelOrderType=filter.get("funnel_order_type"),
funnelFromStep=filter.get("funnel_from_step"),
funnelToStep=filter.get("funnel_to_step"),
@@ -416,12 +423,16 @@ def _insight_filter(filter: Dict):
retentionType=filter.get("retention_type"),
retentionReference=filter.get("retention_reference"),
totalIntervals=filter.get("total_intervals"),
- returningEntity=to_base_entity_dict(filter.get("returning_entity"))
- if filter.get("returning_entity") is not None
- else None,
- targetEntity=to_base_entity_dict(filter.get("target_entity"))
- if filter.get("target_entity") is not None
- else None,
+ returningEntity=(
+ to_base_entity_dict(filter.get("returning_entity"))
+ if filter.get("returning_entity") is not None
+ else None
+ ),
+ targetEntity=(
+ to_base_entity_dict(filter.get("target_entity"))
+ if filter.get("target_entity") is not None
+ else None
+ ),
period=filter.get("period"),
)
}
diff --git a/posthog/hogql_queries/query_runner.py b/posthog/hogql_queries/query_runner.py
index b9d179c19ea51..3cd3d1e68bf9a 100644
--- a/posthog/hogql_queries/query_runner.py
+++ b/posthog/hogql_queries/query_runner.py
@@ -17,6 +17,8 @@
from posthog.metrics import LABEL_TEAM_ID
from posthog.models import Team
from posthog.schema import (
+ FunnelCorrelationActorsQuery,
+ FunnelCorrelationQuery,
FunnelsActorsQuery,
TrendsQuery,
FunnelsQuery,
@@ -93,6 +95,8 @@ class CachedQueryResponse(QueryResponse):
HogQLQuery,
InsightActorsQuery,
FunnelsActorsQuery,
+ FunnelCorrelationQuery,
+ FunnelCorrelationActorsQuery,
InsightActorsQueryOptions,
SessionsTimelineQuery,
WebOverviewQuery,
@@ -196,11 +200,11 @@ def get_query_runner(
limit_context=limit_context,
modifiers=modifiers,
)
- if kind == "InsightActorsQuery":
+ if kind == "InsightActorsQuery" or kind == "FunnelsActorsQuery" or kind == "FunnelCorrelationActorsQuery":
from .insights.insight_actors_query_runner import InsightActorsQueryRunner
return InsightActorsQueryRunner(
- query=cast(InsightActorsQuery | FunnelsActorsQuery | Dict[str, Any], query),
+ query=cast(InsightActorsQuery | Dict[str, Any], query),
team=team,
timings=timings,
limit_context=limit_context,
@@ -216,6 +220,16 @@ def get_query_runner(
limit_context=limit_context,
modifiers=modifiers,
)
+ if kind == "FunnelCorrelationQuery":
+ from .insights.funnels.funnel_correlation_query_runner import FunnelCorrelationQueryRunner
+
+ return FunnelCorrelationQueryRunner(
+ query=cast(FunnelCorrelationQuery | Dict[str, Any], query),
+ team=team,
+ timings=timings,
+ limit_context=limit_context,
+ modifiers=modifiers,
+ )
if kind == "HogQLQuery":
from .hogql_query_runner import HogQLQueryRunner
diff --git a/posthog/hogql_queries/web_analytics/stats_table.py b/posthog/hogql_queries/web_analytics/stats_table.py
index a7ced15c87b33..f15505365ba66 100644
--- a/posthog/hogql_queries/web_analytics/stats_table.py
+++ b/posthog/hogql_queries/web_analytics/stats_table.py
@@ -1,5 +1,6 @@
from posthog.hogql import ast
from posthog.hogql.constants import LimitContext
+from posthog.hogql.database.schema.channel_type import create_channel_type_expr
from posthog.hogql.parser import parse_select, parse_expr
from posthog.hogql_queries.insights.paginators import HogQLHasMorePaginator
from posthog.hogql_queries.web_analytics.ctes import (
@@ -273,72 +274,17 @@ def to_channel_query(self):
(SELECT
- multiIf(
- match(initial_utm_campaign, 'cross-network'),
- 'Cross Network',
-
- (
- match(initial_utm_medium, '^(.*cp.*|ppc|retargeting|paid.*)$') OR
- initial_gclid IS NOT NULL OR
- initial_gad_source IS NOT NULL
- ),
- coalesce(
- hogql_lookupPaidSourceType(initial_utm_source),
- hogql_lookupPaidDomainType(initial_referring_domain),
- if(
- match(initial_utm_campaign, '^(.*(([^a-df-z]|^)shop|shopping).*)$'),
- 'Paid Shopping',
- NULL
- ),
- hogql_lookupPaidMediumType(initial_utm_medium),
- multiIf (
- initial_gad_source = '1',
- 'Paid Search',
-
- match(initial_utm_campaign, '^(.*video.*)$'),
- 'Paid Video',
-
- 'Paid Other'
- )
- ),
-
- (
- initial_referring_domain = '$direct'
- AND (initial_utm_medium IS NULL OR initial_utm_medium = '')
- AND (initial_utm_source IS NULL OR initial_utm_source IN ('', '(direct)', 'direct'))
- ),
- 'Direct',
-
- coalesce(
- hogql_lookupOrganicSourceType(initial_utm_source),
- hogql_lookupOrganicDomainType(initial_referring_domain),
- if(
- match(initial_utm_campaign, '^(.*(([^a-df-z]|^)shop|shopping).*)$'),
- 'Organic Shopping',
- NULL
- ),
- hogql_lookupOrganicMediumType(initial_utm_medium),
- multiIf(
- match(initial_utm_campaign, '^(.*video.*)$'),
- 'Organic Video',
-
- match(initial_utm_medium, 'push$'),
- 'Push',
-
- 'Other'
- )
- )
- ) AS breakdown_value,
+ {channel_type} AS breakdown_value,
count() as total_pageviews,
uniq(pid) as unique_visitors
FROM
(SELECT
- person.properties.$initial_utm_campaign AS initial_utm_campaign,
- person.properties.$initial_utm_medium AS initial_utm_medium,
- person.properties.$initial_utm_source AS initial_utm_source,
- person.properties.$initial_referring_domain AS initial_referring_domain,
- person.properties.$initial_gclid AS initial_gclid,
- person.properties.$initial_gad_source AS initial_gad_source,
+ toString(person.properties.$initial_utm_campaign) AS initial_utm_campaign,
+ toString(person.properties.$initial_utm_medium) AS initial_utm_medium,
+ toString(person.properties.$initial_utm_source) AS initial_utm_source,
+ toString(person.properties.$initial_referring_domain) AS initial_referring_domain,
+ toString(person.properties.$initial_gclid) AS initial_gclid,
+ toString(person.properties.$initial_gad_source) AS initial_gad_source,
person_id AS pid
FROM events
SAMPLE {sample_rate}
@@ -361,6 +307,16 @@ def to_channel_query(self):
"counts_where": self.events_where(),
"where_breakdown": self.where_breakdown(),
"sample_rate": self._sample_ratio,
+ "channel_type": create_channel_type_expr(
+ campaign=ast.Call(name="toString", args=[ast.Field(chain=["initial_utm_campaign"])]),
+ medium=ast.Call(name="toString", args=[ast.Field(chain=["initial_utm_medium"])]),
+ source=ast.Call(name="toString", args=[ast.Field(chain=["initial_utm_source"])]),
+ referring_domain=ast.Call(
+ name="toString", args=[ast.Field(chain=["initial_referring_domain"])]
+ ),
+ gclid=ast.Call(name="toString", args=[ast.Field(chain=["initial_gclid"])]),
+ gad_source=ast.Call(name="toString", args=[ast.Field(chain=["initial_gad_source"])]),
+ ),
},
)
diff --git a/posthog/hogql_queries/web_analytics/test/test_web_overview.py b/posthog/hogql_queries/web_analytics/test/test_web_overview.py
index e4fc03121ab1b..63a26ffea9233 100644
--- a/posthog/hogql_queries/web_analytics/test/test_web_overview.py
+++ b/posthog/hogql_queries/web_analytics/test/test_web_overview.py
@@ -1,4 +1,5 @@
from freezegun import freeze_time
+from parameterized import parameterized
from posthog.hogql_queries.web_analytics.web_overview import WebOverviewQueryRunner
from posthog.schema import WebOverviewQuery, DateRange
@@ -35,20 +36,25 @@ def _create_events(self, data, event="$pageview"):
)
return person_result
- def _run_web_overview_query(self, date_from, date_to, compare=True):
+ def _run_web_overview_query(self, date_from, date_to, use_sessions_table=False, compare=True):
query = WebOverviewQuery(
dateRange=DateRange(date_from=date_from, date_to=date_to),
properties=[],
compare=compare,
+ useSessionsTable=use_sessions_table,
)
runner = WebOverviewQueryRunner(team=self.team, query=query)
return runner.calculate()
- def test_no_crash_when_no_data(self):
- results = self._run_web_overview_query("2023-12-08", "2023-12-15").results
+ @parameterized.expand([(True,), (False,)])
+ def test_no_crash_when_no_data(self, use_sessions_table):
+ results = self._run_web_overview_query(
+ "2023-12-08", "2023-12-15", use_sessions_table=use_sessions_table
+ ).results
self.assertEqual(5, len(results))
- def test_increase_in_users(self):
+ @parameterized.expand([(True,), (False,)])
+ def test_increase_in_users(self, use_sessions_table):
self._create_events(
[
("p1", [("2023-12-02", "s1a"), ("2023-12-03", "s1a"), ("2023-12-12", "s1b")]),
@@ -56,7 +62,9 @@ def test_increase_in_users(self):
]
)
- results = self._run_web_overview_query("2023-12-08", "2023-12-15").results
+ results = self._run_web_overview_query(
+ "2023-12-08", "2023-12-15", use_sessions_table=use_sessions_table
+ ).results
visitors = results[0]
self.assertEqual("visitors", visitors.key)
@@ -88,7 +96,8 @@ def test_increase_in_users(self):
self.assertEqual(0, bounce.previous)
self.assertEqual(None, bounce.changeFromPreviousPct)
- def test_all_time(self):
+ @parameterized.expand([(True,), (False,)])
+ def test_all_time(self, use_sessions_table):
self._create_events(
[
("p1", [("2023-12-02", "s1a"), ("2023-12-03", "s1a"), ("2023-12-12", "s1b")]),
@@ -96,7 +105,9 @@ def test_all_time(self):
]
)
- results = self._run_web_overview_query("all", "2023-12-15", compare=False).results
+ results = self._run_web_overview_query(
+ "all", "2023-12-15", compare=False, use_sessions_table=use_sessions_table
+ ).results
visitors = results[0]
self.assertEqual("visitors", visitors.key)
@@ -128,11 +139,14 @@ def test_all_time(self):
self.assertEqual(None, bounce.previous)
self.assertEqual(None, bounce.changeFromPreviousPct)
- def test_filter_test_accounts(self):
+ @parameterized.expand([(True,), (False,)])
+ def test_filter_test_accounts(self, use_sessions_table):
# Create 1 test account
self._create_events([("test", [("2023-12-02", "s1"), ("2023-12-03", "s1")])])
- results = self._run_web_overview_query("2023-12-01", "2023-12-03").results
+ results = self._run_web_overview_query(
+ "2023-12-01", "2023-12-03", use_sessions_table=use_sessions_table
+ ).results
visitors = results[0]
self.assertEqual(0, visitors.value)
@@ -149,3 +163,25 @@ def test_filter_test_accounts(self):
bounce = results[4]
self.assertEqual("bounce rate", bounce.key)
self.assertEqual(None, bounce.value)
+
+ @parameterized.expand([(True,), (False,)])
+ def test_correctly_counts_pageviews_in_long_running_session(self, use_sessions_table):
+ # this test is important when using the sessions table as the raw sessions table will have 3 entries, one per day
+ self._create_events(
+ [
+ ("p1", [("2023-12-01", "s1"), ("2023-12-02", "s1"), ("2023-12-03", "s1")]),
+ ]
+ )
+
+ results = self._run_web_overview_query(
+ "2023-12-01", "2023-12-03", use_sessions_table=use_sessions_table
+ ).results
+
+ visitors = results[0]
+ self.assertEqual(1, visitors.value)
+
+ views = results[1]
+ self.assertEqual(3, views.value)
+
+ sessions = results[2]
+ self.assertEqual(1, sessions.value)
diff --git a/posthog/hogql_queries/web_analytics/web_analytics_query_runner.py b/posthog/hogql_queries/web_analytics/web_analytics_query_runner.py
index e20a2810274a9..da4f98edcbf32 100644
--- a/posthog/hogql_queries/web_analytics/web_analytics_query_runner.py
+++ b/posthog/hogql_queries/web_analytics/web_analytics_query_runner.py
@@ -101,6 +101,22 @@ def session_having(self, include_previous_period: Optional[bool] = None):
self.team,
)
+ def sessions_table_properties(self, include_previous_period: Optional[bool] = None):
+ properties = [
+ parse_expr(
+ "sessions.min_timestamp >= {date_from}",
+ placeholders={
+ "date_from": self.query_date_range.previous_period_date_from_as_hogql()
+ if include_previous_period
+ else self.query_date_range.date_from_as_hogql(),
+ },
+ )
+ ]
+ return property_to_expr(
+ properties,
+ self.team,
+ )
+
def events_where(self):
properties = [self.events_where_data_range(), self.query.properties, self._test_account_filters]
diff --git a/posthog/hogql_queries/web_analytics/web_overview.py b/posthog/hogql_queries/web_analytics/web_overview.py
index 2019803faf78a..38388315c8f0b 100644
--- a/posthog/hogql_queries/web_analytics/web_overview.py
+++ b/posthog/hogql_queries/web_analytics/web_overview.py
@@ -19,6 +19,9 @@ class WebOverviewQueryRunner(WebAnalyticsQueryRunner):
query_type = WebOverviewQuery
def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery:
+ if self.query.useSessionsTable:
+ return self.to_query_with_session_table()
+
with self.timings.measure("date_expr"):
start = self.query_date_range.previous_period_date_from_as_hogql()
mid = self.query_date_range.date_from_as_hogql()
@@ -168,6 +171,113 @@ def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery:
},
)
+ def to_query_with_session_table(self) -> ast.SelectQuery | ast.SelectUnionQuery:
+ with self.timings.measure("date_expr"):
+ start = self.query_date_range.previous_period_date_from_as_hogql()
+ mid = self.query_date_range.date_from_as_hogql()
+ end = self.query_date_range.date_to_as_hogql()
+
+ if self.query.compare:
+ return parse_select(
+ """
+SELECT
+ uniq(if(min_timestamp >= {mid} AND min_timestamp < {end}, person_id, NULL)) AS unique_users,
+ uniq(if(min_timestamp >= {start} AND min_timestamp < {mid}, person_id, NULL)) AS previous_unique_users,
+ sumIf(filtered_pageview_count, min_timestamp >= {mid} AND min_timestamp < {end}) AS current_pageviews,
+ sumIf(filtered_pageview_count, min_timestamp >= {start} AND min_timestamp < {mid}) AS previous_pageviews,
+ uniq(if(min_timestamp >= {mid} AND min_timestamp < {end}, session_id, NULL)) AS unique_sessions,
+ uniq(if(min_timestamp >= {start} AND min_timestamp < {mid}, session_id, NULL)) AS previous_unique_sessions,
+ avg(if(min_timestamp >= {mid}, duration, NULL)) AS avg_duration_s,
+ avg(if(min_timestamp < {mid}, duration, NULL)) AS prev_avg_duration_s,
+ avg(if(min_timestamp >= {mid}, is_bounce, NULL)) AS bounce_rate,
+ avg(if(min_timestamp < {mid}, is_bounce, NULL)) AS prev_bounce_rate
+FROM (
+ SELECT
+ any(events.person_id) as person_id,
+ events.`$session_id` as session_id,
+ min(sessions.min_timestamp) as min_timestamp,
+ any(sessions.duration) as duration,
+ any(sessions.pageview_count) as session_pageview_count,
+ any(sessions.autocapture_count) as session_autocapture_count,
+ count() as filtered_pageview_count,
+ and(
+ duration < 30,
+ session_pageview_count = 1,
+ session_autocapture_count = 0
+ ) as is_bounce
+ FROM events
+ JOIN sessions
+ ON events.`$session_id` = sessions.session_id
+ WHERE and(
+ `$session_id` IS NOT NULL,
+ event = '$pageview',
+ timestamp >= {start},
+ timestamp < {end},
+ {event_properties}
+ )
+ GROUP BY `$session_id`
+ HAVING and(
+ min_timestamp >= {start},
+ min_timestamp < {end}
+ )
+)
+
+ """,
+ placeholders={
+ "start": start,
+ "mid": mid,
+ "end": end,
+ "event_properties": self.event_properties(),
+ },
+ )
+ else:
+ return parse_select(
+ """
+ SELECT
+ uniq(person_id) AS unique_users,
+ NULL as previous_unique_users,
+ sum(filtered_pageview_count) AS current_pageviews,
+ NULL as previous_pageviews,
+ uniq(session_id) AS unique_sessions,
+ NULL as previous_unique_sessions,
+ avg(duration) AS avg_duration_s,
+ NULL as prev_avg_duration_s,
+ avg(is_bounce) AS bounce_rate,
+ NULL as prev_bounce_rate
+FROM (
+ SELECT
+ any(events.person_id) as person_id,
+ events.`$session_id` as session_id,
+ min(sessions.min_timestamp) as min_timestamp,
+ any(sessions.duration) as duration,
+ any(sessions.pageview_count) as session_pageview_count,
+ any(sessions.autocapture_count) as session_autocapture_count,
+ count() as filtered_pageview_count,
+ and(
+ duration < 30,
+ session_pageview_count = 1,
+ session_autocapture_count = 0
+ ) as is_bounce
+ FROM events
+ JOIN sessions
+ ON events.`$session_id` = sessions.session_id
+ WHERE and(
+ `$session_id` IS NOT NULL,
+ event = '$pageview',
+ timestamp >= {mid},
+ timestamp < {end},
+ {event_properties}
+ )
+ GROUP BY `$session_id`
+ HAVING and(
+ min_timestamp >= {mid},
+ min_timestamp < {end}
+ )
+)
+ """,
+ placeholders={"mid": mid, "end": end, "event_properties": self.event_properties()},
+ )
+
def calculate(self):
response = execute_hogql_query(
query_type="overview_stats_pages_query",
diff --git a/posthog/management/commands/compare_hogql_insights.py b/posthog/management/commands/compare_hogql_insights.py
index 792a981ebc16a..44bab4f6e7127 100644
--- a/posthog/management/commands/compare_hogql_insights.py
+++ b/posthog/management/commands/compare_hogql_insights.py
@@ -19,11 +19,18 @@ def handle(self, *args, **options):
.order_by("created_at")
.all()
)
- # insights = [i for i in insights if "breakdown" not in i.filters]
+ # len(insights)
+ insights = [i for i in insights if "breakdown" in i.filters]
+ len(insights)
# insights = [i for i in insights if "formula" not in i.filters]
- # insights = [i for i in insights if i.filters.get("display") == "ActionsLineGraph"]
- # insights = [i for i in insights if i.id == 1133835]
- for insight in insights[-100:]:
+ # len(insights)
+ insights = [i for i in insights if i.filters.get("display") != "ActionsLineGraph"]
+ len(insights)
+ # insights = [i for i in insights if i.filters.get("display") == "ActionsLineGraphCumulative"]
+ # len(insights)
+ # insights = [i for i in insights if i.id > 1134855]
+ # len(insights)
+ for insight in insights[0:500]:
for event in insight.filters.get("events", []):
if event.get("math") in ("median", "p90", "p95", "p99"):
event["math"] = "sum"
@@ -35,7 +42,7 @@ def handle(self, *args, **options):
insight_type = insight.filters.get("insight")
print( # noqa: T201
f"Checking {insight_type} Insight {insight.id} {insight.short_id} - {insight.name} "
- f"(team {insight.team_id})... Interval: {insight.filters.get('interval')}"
+ f"(team {insight.team_id})... Interval: {insight.filters.get('interval')}. {insight.filters.get('display')}"
)
if insight.filters.get("aggregation_group_type_index", None) is not None:
del insight.filters["aggregation_group_type_index"]
@@ -61,13 +68,16 @@ def handle(self, *args, **options):
continue
try:
all_ok = True
+ sorter = lambda x: (
+ "$$_posthog_breakdown_other_$$" if x.get("breakdown_value") == "Other" else x.get("breakdown_value")
+ )
sorted_legacy_results = sorted(
legacy_results,
- key=lambda x: "$$_posthog_breakdown_other_$$" if x.get("label") == "Other" else x.get("label"), # type: ignore
+ key=sorter,
)
- sorted_hogql_results = sorted(hogql_results, key=lambda x: x.get("label"))
+ sorted_hogql_results = sorted(hogql_results, key=lambda x: x.get("breakdown_value"))
for legacy_result, hogql_result in zip(sorted_legacy_results, sorted_hogql_results):
- fields = ["label", "count", "data", "labels", "days"]
+ fields = ["label", "count", "aggregated_value", "data", "labels", "days"]
for field in fields:
legacy_value = legacy_result.get(field)
hogql_value = hogql_result.get(field)
@@ -76,10 +86,11 @@ def handle(self, *args, **options):
hogql_value = int(hogql_value)
if field == "data":
legacy_value = [int(x) for x in legacy_value or []]
- hogql_value = [int(x) for x in hogql_value]
+ hogql_value = [int(x) for x in hogql_value or []]
if legacy_value != hogql_value:
if (
- (field == "labels" and insight.filters.get("interval") == "month")
+ (field == "days" and hogql_value == [])
+ or (field == "labels" and insight.filters.get("interval") == "month")
or (field == "labels" and legacy_value == [] and hogql_value is None)
or (
field == "label"
@@ -93,7 +104,7 @@ def handle(self, *args, **options):
f" ({insight.id}). MISMATCH in {legacy_result.get('status')} row, field {field}"
)
print("Legacy:", legacy_value) # noqa: T201
- print("HogQL:", hogql_value) # noqa: T201
+ print("HogQL: ", hogql_value) # noqa: T201
print(json.dumps(insight.filters)) # noqa: T201
print("") # noqa: T201
all_ok = False
diff --git a/posthog/management/commands/migrate_team.py b/posthog/management/commands/migrate_team.py
new file mode 100644
index 0000000000000..67215dd42aa9c
--- /dev/null
+++ b/posthog/management/commands/migrate_team.py
@@ -0,0 +1,303 @@
+import datetime as dt
+import logging
+
+from django.core.management.base import BaseCommand, CommandError
+from django.db import transaction
+
+from posthog.batch_exports.models import BATCH_EXPORT_INTERVALS
+from posthog.batch_exports.service import (
+ backfill_export,
+ disable_and_delete_export,
+ sync_batch_export,
+)
+from posthog.models import (
+ BatchExport,
+ BatchExportBackfill,
+ BatchExportDestination,
+ BatchExportRun,
+ Team,
+)
+from posthog.temporal.common.client import sync_connect
+
+logger = logging.getLogger(__name__)
+logger.setLevel(logging.INFO)
+
+EXPORT_NAME = "PostHog HTTP Migration"
+VALID_INTERVALS = {i[0] for i in BATCH_EXPORT_INTERVALS}
+REGION_URLS = {
+ "us": "https://app.posthog.com/batch",
+ "eu": "https://eu.posthog.com/batch",
+}
+
+
+class Command(BaseCommand):
+ help = "Creates an HTTP batch export for a team to migrate data to another PostHog instance, \
+ or another team within the same instance."
+
+ def add_arguments(self, parser):
+ parser.add_argument("--team-id", default=None, type=int, help="Team ID to migrate from (on this instance)")
+ parser.add_argument("--interval", default=None, type=str, help="Interval to use for the batch export")
+ parser.add_argument(
+ "--start-at",
+ default=None,
+ type=str,
+ help="Timestamp to start the backfill from in UTC, 'YYYY-MM-DD' or 'YYYY-MM-DD HH:MM:SS'",
+ )
+ parser.add_argument(
+ "--delete-existing", default=False, type=bool, help="Delete existing batch export if it exists"
+ )
+ parser.add_argument("--dest-token", default=None, type=str, help="Destination Project API Key (token)")
+ parser.add_argument("--dest-region", default=None, type=str, help="Destination region")
+ parser.add_argument(
+ "--end-days-from-now",
+ default=30,
+ type=int,
+ help="Number of days from now to automatically end the ongoing export at, the default is usually fine",
+ )
+ parser.add_argument(
+ "--exclude-event",
+ "-e",
+ nargs="+",
+ dest="exclude_events",
+ required=False,
+ type=str,
+ help="Event to exclude from migration. Can be used multiple times.",
+ )
+ parser.add_argument(
+ "--include-event",
+ "-i",
+ nargs="+",
+ dest="include_events",
+ required=False,
+ type=str,
+ help="Event to include in migration. Can be used multiple times.",
+ )
+
+ def handle(self, **options):
+ team_id = options["team_id"]
+ interval = options["interval"]
+ start_at = options["start_at"]
+ dest_token = options["dest_token"]
+ dest_region = options["dest_region"]
+ verbose = options["verbosity"] > 1
+ exclude_events = options["exclude_events"]
+ include_events = options["include_events"]
+
+ create_args = [
+ interval,
+ start_at,
+ dest_token,
+ dest_region,
+ ]
+ create_requested = any(create_args)
+
+ if not team_id:
+ raise CommandError("source Team ID is required")
+
+ team = Team.objects.select_related("organization").get(id=team_id)
+
+ display(
+ "Team",
+ name=team.name,
+ organization=team.organization.name,
+ )
+
+ try:
+ existing_export = BatchExport.objects.get(
+ team=team, destination__type="HTTP", name=EXPORT_NAME, deleted=False
+ )
+
+ display_existing(existing_export=existing_export, verbose=verbose)
+
+ if options["delete_existing"]:
+ result = input("Enter [y] to continue deleting the existing migration (Ctrl+C to cancel) ")
+ if result.lower() != "y":
+ raise CommandError("Didn't receive 'y', exiting")
+ print() # noqa: T201
+
+ disable_and_delete_export(existing_export)
+ existing_export = None
+ display("Deleted existing batch export and backfill")
+ except BatchExport.DoesNotExist:
+ existing_export = None
+ display("No existing migration was found")
+ except BatchExport.MultipleObjectsReturned:
+ raise CommandError(
+ "More than one existing migration found! This should never happen if the management command is used, we don't know enough to proceed"
+ )
+
+ if not create_requested:
+ # User didn't provide any arguments to create a migration, so they must have just wanted
+ # to check the status and/or delete the existing migration.
+ return
+ elif existing_export:
+ display(
+ "Existing migration job already exists and it wasn't deleted, exiting without creating a new batch export"
+ )
+ return
+
+ end_days_from_now = options["end_days_from_now"]
+
+ create_migration(
+ team_id=team_id,
+ interval=interval,
+ start_at=start_at,
+ dest_token=dest_token,
+ dest_region=dest_region,
+ end_days_from_now=end_days_from_now,
+ exclude_events=exclude_events,
+ include_events=include_events,
+ )
+
+
+def display_existing(*, existing_export: BatchExport, verbose: bool):
+ existing_backfill = BatchExportBackfill.objects.get(batch_export=existing_export)
+ most_recent_run = BatchExportRun.objects.filter(batch_export=existing_export).order_by("-created_at").first()
+
+ if verbose:
+ display(
+ "Existing migration batch export (verbose details)",
+ batch_export_id=existing_export.id,
+ paused=existing_export.paused,
+ interval=existing_export.interval,
+ created_at=existing_export.created_at,
+ last_updated_at=existing_export.last_updated_at,
+ exclude_events=existing_export.destination.exclude_events,
+ include_events=existing_export.destination.include_events,
+ )
+ display(
+ "Existing migration backfill (verbose details)",
+ backfill_id=existing_backfill.id,
+ status=existing_backfill.status,
+ start_at=existing_backfill.start_at,
+ created_at=existing_backfill.created_at,
+ last_updated_at=existing_backfill.last_updated_at,
+ )
+
+ if not most_recent_run:
+ display("No batch export runs found, is the migration brand new?")
+ else:
+ most_recent_completed_run = (
+ BatchExportRun.objects.filter(batch_export=existing_export, status=BatchExportRun.Status.COMPLETED)
+ .order_by("-finished_at")
+ .first()
+ )
+
+ if most_recent_completed_run:
+ data_start_at = existing_backfill.start_at
+ data_end_at = most_recent_completed_run.data_interval_end
+ display(
+ "Found an existing migration, range of data migrated:",
+ start=data_start_at,
+ end=data_end_at,
+ interval=existing_export.interval,
+ )
+ if existing_export.paused:
+ display("The batch export backfill is still catching up to realtime")
+ else:
+ display(
+ "The batch export is unpaused, meaning the primary backfill completed and this is now in realtime export mode",
+ )
+
+ if not most_recent_completed_run or verbose:
+ display(
+ "Most recent run (verbose details)",
+ run_id=most_recent_run.id,
+ status=most_recent_run.status,
+ error=most_recent_run.latest_error,
+ data_interval_start=most_recent_run.data_interval_start,
+ data_interval_end=most_recent_run.data_interval_end,
+ created_at=most_recent_run.created_at,
+ last_updated_at=most_recent_run.last_updated_at,
+ )
+
+
+def create_migration(
+ *,
+ team_id: int,
+ interval: str,
+ start_at: str,
+ dest_token: str,
+ dest_region: str,
+ end_days_from_now: int,
+ include_events: list[str] | None = None,
+ exclude_events: list[str] | None = None,
+):
+ if interval not in VALID_INTERVALS:
+ raise CommandError("invalid interval, choices are: %s" % VALID_INTERVALS)
+
+ if not dest_token.startswith("phc_"):
+ raise CommandError("invalid destination token, must start with 'phc_'")
+
+ dest_region = dest_region.lower()
+ if dest_region not in REGION_URLS:
+ raise CommandError("invalid destination region, choices are: 'us', 'eu'")
+ url = REGION_URLS[dest_region]
+
+ try:
+ start_at_datetime = parse_to_utc(start_at)
+ except ValueError as e:
+ raise CommandError("couldn't parse start_at: %s" % e)
+
+ display(
+ "Creating migration",
+ interval=interval,
+ start_at=start_at_datetime,
+ dest_token=dest_token,
+ dest_region=dest_region,
+ url=url,
+ exclude_events=exclude_events,
+ include_events=include_events,
+ )
+ result = input("Enter [y] to continue creating a new migration (Ctrl+C to cancel) ")
+ if result.lower() != "y":
+ raise CommandError("Didn't receive 'y', exiting")
+ print() # noqa: T201
+
+ now = dt.datetime.now(dt.timezone.utc)
+ # This is a precaution so we don't accidentally leave the export running indefinitely.
+ end_at = now + dt.timedelta(days=end_days_from_now)
+
+ destination = BatchExportDestination(
+ type=BatchExportDestination.Destination.HTTP,
+ config={"url": url, "token": dest_token, "include_events": include_events, "exclude_events": exclude_events},
+ )
+ batch_export = BatchExport(
+ team_id=team_id,
+ destination=destination,
+ name=EXPORT_NAME,
+ interval=interval,
+ paused=True,
+ end_at=end_at,
+ )
+ sync_batch_export(batch_export, created=True)
+
+ with transaction.atomic():
+ destination.save()
+ batch_export.save()
+
+ temporal = sync_connect()
+ backfill_id = backfill_export(temporal, str(batch_export.pk), team_id, start_at_datetime, end_at=None)
+ display("Backfill started", batch_export_id=batch_export.id, backfill_id=backfill_id)
+
+
+def display(message, **kwargs):
+ print(message) # noqa: T201
+ for key, value in kwargs.items():
+ if isinstance(value, dt.datetime):
+ value = value.strftime("%Y-%m-%d %H:%M:%S")
+ print(f" {key} = {value}") # noqa: T201
+ print() # noqa: T201
+
+
+def parse_to_utc(date_str: str) -> dt.datetime:
+ try:
+ parsed_datetime = dt.datetime.strptime(date_str, "%Y-%m-%d")
+ except ValueError:
+ try:
+ parsed_datetime = dt.datetime.strptime(date_str, "%Y-%m-%d %H:%M:%S")
+ except ValueError:
+ raise ValueError("Invalid date format. Expected 'YYYY-MM-DD' or 'YYYY-MM-DD HH:MM:SS'.")
+
+ utc_datetime = parsed_datetime.replace(tzinfo=dt.timezone.utc)
+ return utc_datetime
diff --git a/posthog/migrations/0396_projects_and_environments.py b/posthog/migrations/0396_projects_and_environments.py
new file mode 100644
index 0000000000000..c571200b4ec51
--- /dev/null
+++ b/posthog/migrations/0396_projects_and_environments.py
@@ -0,0 +1,59 @@
+# Generated by Django 4.1.13 on 2024-03-08 22:47
+
+import django.core.validators
+from django.db import migrations, models
+import django.db.models.deletion
+
+
+class Migration(migrations.Migration):
+ dependencies = [
+ ("posthog", "0395_alter_batchexportbackfill_end_at"),
+ ]
+
+ operations = [
+ migrations.AlterField(
+ model_name="team",
+ name="name",
+ field=models.CharField(
+ default="Default project",
+ max_length=200,
+ validators=[django.core.validators.MinLengthValidator(1, "Project must have a name!")],
+ ),
+ ),
+ migrations.CreateModel(
+ name="Project",
+ fields=[
+ ("id", models.BigIntegerField(primary_key=True, serialize=False, verbose_name="ID")),
+ (
+ "name",
+ models.CharField(
+ default="Default project",
+ max_length=200,
+ validators=[django.core.validators.MinLengthValidator(1, "Project must have a name!")],
+ ),
+ ),
+ ("created_at", models.DateTimeField(auto_now_add=True)),
+ (
+ "organization",
+ models.ForeignKey(
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="projects",
+ related_query_name="project",
+ to="posthog.organization",
+ ),
+ ),
+ ],
+ ),
+ migrations.AddField(
+ model_name="team",
+ name="project",
+ field=models.ForeignKey(
+ null=True,
+ blank=False,
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="teams",
+ related_query_name="team",
+ to="posthog.project",
+ ),
+ ),
+ ]
diff --git a/posthog/migrations/0397_projects_backfill.py b/posthog/migrations/0397_projects_backfill.py
new file mode 100644
index 0000000000000..5b3da405af116
--- /dev/null
+++ b/posthog/migrations/0397_projects_backfill.py
@@ -0,0 +1,41 @@
+# Generated by Django 4.1.13 on 2024-03-12 23:14
+
+import django.db.models.deletion
+from django.db import migrations, models
+
+
+class Migration(migrations.Migration):
+ dependencies = [
+ ("posthog", "0396_projects_and_environments"),
+ ]
+
+ operations = [
+ migrations.SeparateDatabaseAndState(
+ database_operations=[
+ migrations.RunSQL(
+ sql="""
+ -- For each team without a parent project, create such a project
+ INSERT INTO posthog_project (id, name, created_at, organization_id)
+ SELECT id, name, created_at, organization_id
+ FROM posthog_team
+ WHERE project_id IS NULL;
+ -- At this point, all teams have a parent project, so we can safely set project_id on every team
+ UPDATE posthog_team
+ SET project_id = id;""",
+ reverse_sql=migrations.RunSQL.noop,
+ )
+ ],
+ state_operations=[
+ migrations.AlterField(
+ model_name="team",
+ name="project",
+ field=models.ForeignKey(
+ on_delete=django.db.models.deletion.CASCADE,
+ related_name="teams",
+ related_query_name="team",
+ to="posthog.project",
+ ),
+ ),
+ ],
+ )
+ ]
diff --git a/posthog/models/__init__.py b/posthog/models/__init__.py
index d9634fbbdd4c1..9bbaa713e53bf 100644
--- a/posthog/models/__init__.py
+++ b/posthog/models/__init__.py
@@ -56,6 +56,7 @@
PluginLogEntry,
PluginSourceFile,
)
+from .project import Project
from .property import Property
from .property_definition import PropertyDefinition
from .sharing_configuration import SharingConfiguration
@@ -122,6 +123,7 @@
"PluginConfig",
"PluginLogEntry",
"PluginSourceFile",
+ "Project",
"Property",
"PropertyDefinition",
"RetentionFilter",
diff --git a/posthog/models/filters/test/__snapshots__/test_filter.ambr b/posthog/models/filters/test/__snapshots__/test_filter.ambr
index 534870c348300..1bd4315507dde 100644
--- a/posthog/models/filters/test/__snapshots__/test_filter.ambr
+++ b/posthog/models/filters/test/__snapshots__/test_filter.ambr
@@ -4,6 +4,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -63,6 +64,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -122,6 +124,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -181,6 +184,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -240,6 +244,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
diff --git a/posthog/models/organization.py b/posthog/models/organization.py
index 254ba7ae64d9b..8740a0f34c453 100644
--- a/posthog/models/organization.py
+++ b/posthog/models/organization.py
@@ -60,11 +60,11 @@ def bootstrap(
**kwargs,
) -> Tuple["Organization", Optional["OrganizationMembership"], "Team"]:
"""Instead of doing the legwork of creating an organization yourself, delegate the details with bootstrap."""
- from .team import Team # Avoiding circular import
+ from .project import Project # Avoiding circular import
with transaction.atomic():
organization = Organization.objects.create(**kwargs)
- team = Team.objects.create(organization=organization, **(team_fields or {}))
+ _, team = Project.objects.create_with_team(organization=organization, team_fields=team_fields)
organization_membership: Optional[OrganizationMembership] = None
if user is not None:
organization_membership = OrganizationMembership.objects.create(
diff --git a/posthog/models/person/sql.py b/posthog/models/person/sql.py
index bc3565ab3d722..b9302a5f7cba6 100644
--- a/posthog/models/person/sql.py
+++ b/posthog/models/person/sql.py
@@ -520,3 +520,23 @@
GET_PERSON_COUNT_FOR_TEAM = "SELECT count() AS count FROM person WHERE team_id = %(team_id)s"
GET_PERSON_DISTINCT_ID2_COUNT_FOR_TEAM = "SELECT count() AS count FROM person_distinct_id2 WHERE team_id = %(team_id)s"
+
+
+CREATE_PERSON_DISTINCT_ID_OVERRIDES_DICTIONARY = """
+CREATE OR REPLACE DICTIONARY {database}.person_distinct_id_overrides_dict ON CLUSTER {cluster} (
+ `team_id` Int64, -- team_id could be made hierarchical to save some space.
+ `distinct_id` String,
+ `person_id` UUID
+)
+PRIMARY KEY team_id, distinct_id
+-- For our own sanity, we explicitly write out the group by query.
+SOURCE(CLICKHOUSE(
+ query 'SELECT team_id, distinct_id, argMax(person_id, version) AS person_id FROM {database}.person_distinct_id_overrides GROUP BY team_id, distinct_id'
+))
+LAYOUT(complex_key_hashed())
+-- ClickHouse will choose a time uniformly within 1 to 5 hours to reload the dictionary (update if necessary to meet SLAs).
+LIFETIME(MIN 3600 MAX 18000)
+""".format(
+ cluster=CLICKHOUSE_CLUSTER,
+ database=CLICKHOUSE_DATABASE,
+)
diff --git a/posthog/models/project.py b/posthog/models/project.py
new file mode 100644
index 0000000000000..c4ead260fb780
--- /dev/null
+++ b/posthog/models/project.py
@@ -0,0 +1,43 @@
+from typing import TYPE_CHECKING, Optional, Tuple
+from django.db import models
+from django.db import transaction
+from django.core.validators import MinLengthValidator
+
+if TYPE_CHECKING:
+ from .team import Team
+
+
+class ProjectManager(models.Manager):
+ def create_with_team(self, team_fields: Optional[dict] = None, **kwargs) -> Tuple["Project", "Team"]:
+ from .team import Team
+
+ with transaction.atomic():
+ common_id = Team.objects.increment_id_sequence()
+ project = self.create(id=common_id, **kwargs)
+ team = Team.objects.create(
+ id=common_id, organization=project.organization, project=project, **(team_fields or {})
+ )
+ return project, team
+
+
+class Project(models.Model):
+ """DO NOT USE YET - you probably mean the `Team` model instead.
+
+ `Project` is part of the environemnts feature, which is a work in progress.
+ """
+
+ id: models.BigIntegerField = models.BigIntegerField(primary_key=True, verbose_name="ID")
+ organization: models.ForeignKey = models.ForeignKey(
+ "posthog.Organization",
+ on_delete=models.CASCADE,
+ related_name="projects",
+ related_query_name="project",
+ )
+ name: models.CharField = models.CharField(
+ max_length=200,
+ default="Default project",
+ validators=[MinLengthValidator(1, "Project must have a name!")],
+ )
+ created_at: models.DateTimeField = models.DateTimeField(auto_now_add=True)
+
+ objects: ProjectManager = ProjectManager()
diff --git a/posthog/models/sessions/sql.py b/posthog/models/sessions/sql.py
index 1850827acd75c..ea9ef27034c72 100644
--- a/posthog/models/sessions/sql.py
+++ b/posthog/models/sessions/sql.py
@@ -158,7 +158,7 @@ def source_column(column_name: str) -> str:
sumIf(1, event='$autocapture') as autocapture_count
FROM {database}.sharded_events
-WHERE `$session_id` IS NOT NULL AND `$session_id` != '' AND toStartOfDay(timestamp) >= '2024-03-08'
+WHERE `$session_id` IS NOT NULL AND `$session_id` != ''
GROUP BY `$session_id`, team_id
""".format(
table_name=f"{TABLE_BASE_NAME}_mv",
@@ -188,6 +188,7 @@ def source_column(column_name: str) -> str:
)
)
+
# Distributed engine tables are only created if CLICKHOUSE_REPLICATED
# This table is responsible for writing to sharded_sessions based on a sharding key.
diff --git a/posthog/models/team/team.py b/posthog/models/team/team.py
index c815e75811456..66b4a3ed51415 100644
--- a/posthog/models/team/team.py
+++ b/posthog/models/team/team.py
@@ -13,8 +13,9 @@
MaxValueValidator,
MinValueValidator,
)
-from django.db import models
+from django.db import models, connection
from django.db.models.signals import post_delete, post_save
+from django.db import transaction
from zoneinfo import ZoneInfo
from posthog.clickhouse.query_tagging import tag_queries
from posthog.cloud_utils import is_cloud
@@ -98,9 +99,23 @@ def create_with_data(self, user: Any = None, default_dashboards: bool = True, **
return team
def create(self, *args, **kwargs) -> "Team":
- if kwargs.get("organization") is None and kwargs.get("organization_id") is None:
- raise ValueError("Creating organization-less projects is prohibited")
- return super().create(*args, **kwargs)
+ from ..project import Project
+
+ with transaction.atomic():
+ if "id" not in kwargs:
+ kwargs["id"] = self.increment_id_sequence()
+ if kwargs.get("project") is None and kwargs.get("project_id") is None:
+ # If a parent project is not provided for this team, ensure there is one
+ # This should be removed once environments are fully rolled out
+ project_kwargs = {}
+ if organization := kwargs.get("organization"):
+ project_kwargs["organization"] = organization
+ elif organization_id := kwargs.get("organization_id"):
+ project_kwargs["organization_id"] = organization_id
+ if name := kwargs.get("name"):
+ project_kwargs["name"] = name
+ kwargs["project"] = Project.objects.create(id=kwargs["id"], **project_kwargs)
+ return super().create(*args, **kwargs)
def get_team_from_token(self, token: Optional[str]) -> Optional["Team"]:
if not token:
@@ -125,6 +140,15 @@ def get_team_from_cache_or_token(self, token: Optional[str]) -> Optional["Team"]
except Team.DoesNotExist:
return None
+ def increment_id_sequence(self) -> int:
+ """Increment the `Team.id` field's sequence and return the latest value.
+
+ Use only when actually neeeded to avoid wasting sequence values."""
+ cursor = connection.cursor()
+ cursor.execute("SELECT nextval('posthog_team_id_seq')")
+ result = cursor.fetchone()
+ return result[0]
+
def get_default_data_attributes() -> List[str]:
return ["data-attr"]
@@ -146,6 +170,12 @@ class Team(UUIDClassicModel):
related_name="teams",
related_query_name="team",
)
+ project: models.ForeignKey = models.ForeignKey(
+ "posthog.Project",
+ on_delete=models.CASCADE,
+ related_name="teams",
+ related_query_name="team",
+ )
api_token: models.CharField = models.CharField(
max_length=200,
unique=True,
@@ -155,7 +185,7 @@ class Team(UUIDClassicModel):
app_urls: ArrayField = ArrayField(models.CharField(max_length=200, null=True), default=list, blank=True)
name: models.CharField = models.CharField(
max_length=200,
- default="Default Project",
+ default="Default project",
validators=[MinLengthValidator(1, "Project must have a name!")],
)
slack_incoming_webhook: models.CharField = models.CharField(max_length=500, null=True, blank=True)
diff --git a/posthog/models/test/test_project.py b/posthog/models/test/test_project.py
new file mode 100644
index 0000000000000..d6bfe0ed3a36a
--- /dev/null
+++ b/posthog/models/test/test_project.py
@@ -0,0 +1,73 @@
+from unittest import mock
+from posthog.models.project import Project
+from posthog.models.team.team import Team
+from posthog.test.base import BaseTest
+
+
+class TestProject(BaseTest):
+ def test_create_project_with_team_no_team_fields(self):
+ project, team = Project.objects.create_with_team(
+ organization=self.organization,
+ name="Test project",
+ )
+
+ self.assertEqual(project.id, team.id)
+ self.assertEqual(project.name, "Test project")
+ self.assertEqual(project.organization, self.organization)
+
+ self.assertEqual(
+ team.name,
+ "Default project", # TODO: When Environments are rolled out, ensure this says "Default environment"
+ )
+ self.assertEqual(team.organization, self.organization)
+ self.assertEqual(team.project, project)
+
+ def test_create_project_with_team_with_team_fields(self):
+ project, team = Project.objects.create_with_team(
+ organization=self.organization,
+ name="Test project",
+ team_fields={"name": "Test team", "access_control": True},
+ )
+
+ self.assertEqual(project.id, team.id)
+ self.assertEqual(project.name, "Test project")
+ self.assertEqual(project.organization, self.organization)
+
+ self.assertEqual(team.name, "Test team")
+ self.assertEqual(team.organization, self.organization)
+ self.assertEqual(team.project, project)
+ self.assertEqual(team.access_control, True)
+
+ def test_create_project_with_team_uses_team_id_sequence(self):
+ expected_common_id = Team.objects.increment_id_sequence() + 1
+
+ project, team = Project.objects.create_with_team(
+ organization=self.organization,
+ name="Test project",
+ team_fields={"name": "Test team", "access_control": True},
+ )
+
+ self.assertEqual(project.id, expected_common_id)
+ self.assertEqual(project.name, "Test project")
+ self.assertEqual(project.organization, self.organization)
+
+ self.assertEqual(team.id, expected_common_id)
+ self.assertEqual(team.name, "Test team")
+ self.assertEqual(team.organization, self.organization)
+ self.assertEqual(team.project, project)
+ self.assertEqual(team.access_control, True)
+
+ @mock.patch("posthog.models.team.team.Team.objects.create", side_effect=Exception)
+ def test_create_project_with_team_does_not_create_if_team_fails(self, mock_create):
+ initial_team_count = Team.objects.count()
+ initial_project_count = Project.objects.count()
+
+ with self.assertRaises(Exception):
+ Project.objects.create_with_team(
+ organization=self.organization,
+ name="Test project",
+ team_fields={"name": "Test team", "access_control": True},
+ )
+
+ self.assertEqual(Team.objects.count(), initial_team_count)
+ self.assertEqual(Project.objects.count(), initial_project_count)
diff --git a/posthog/permissions.py b/posthog/permissions.py
index 0daa4ec31e2cd..4d42f165020bf 100644
--- a/posthog/permissions.py
+++ b/posthog/permissions.py
@@ -3,6 +3,7 @@
from django.db.models import Model
from django.core.exceptions import ImproperlyConfigured
+from django.views import View
from rest_framework.exceptions import PermissionDenied
from rest_framework.exceptions import NotFound
from rest_framework.permissions import SAFE_METHODS, BasePermission, IsAdminUser
@@ -19,7 +20,15 @@
CREATE_METHODS = ["POST", "PUT"]
-def extract_organization(object: Model) -> Organization:
+def extract_organization(object: Model, view: View) -> Organization:
+ # This is set as part of the TeamAndOrgViewSetMixin to allow models that are not directly related to an organization
+ organization_id_rewrite = getattr(view, "filter_rewrite_rules", {}).get("organization_id")
+ if organization_id_rewrite:
+ for part in organization_id_rewrite.split("__"):
+ if part == "organization_id":
+ break
+ object = getattr(object, part)
+
if isinstance(object, Organization):
return object
try:
@@ -89,8 +98,8 @@ def has_permission(self, request: Request, view) -> bool:
return OrganizationMembership.objects.filter(user=cast(User, request.user), organization=organization).exists()
- def has_object_permission(self, request: Request, view, object: Model) -> bool:
- organization = extract_organization(object)
+ def has_object_permission(self, request: Request, view: View, object: Model) -> bool:
+ organization = extract_organization(object, view)
return OrganizationMembership.objects.filter(user=cast(User, request.user), organization=organization).exists()
@@ -119,12 +128,12 @@ def has_permission(self, request: Request, view) -> bool:
>= OrganizationMembership.Level.ADMIN
)
- def has_object_permission(self, request: Request, view, object: Model) -> bool:
+ def has_object_permission(self, request: Request, view: View, object: Model) -> bool:
if request.method in SAFE_METHODS:
return True
# TODO: Optimize so that this computation is only done once, on `OrganizationMemberPermissions`
- organization = extract_organization(object)
+ organization = extract_organization(object, view)
return (
OrganizationMembership.objects.get(user=cast(User, request.user), organization=organization).level
diff --git a/posthog/schema.py b/posthog/schema.py
index 0e1f24c8a1fc1..dc77da163db17 100644
--- a/posthog/schema.py
+++ b/posthog/schema.py
@@ -151,6 +151,16 @@ class CountPerActorMathType(str, Enum):
p99_count_per_actor = "p99_count_per_actor"
+class DataWarehouseEventsModifier(BaseModel):
+ model_config = ConfigDict(
+ extra="forbid",
+ )
+ distinct_id_field: str
+ id_field: str
+ table_name: str
+ timestamp_field: str
+
+
class DatabaseSchemaQueryResponseField(BaseModel):
model_config = ConfigDict(
extra="forbid",
@@ -210,6 +220,31 @@ class EntityType(str, Enum):
new_entity = "new_entity"
+class EventDefinition(BaseModel):
+ model_config = ConfigDict(
+ extra="forbid",
+ )
+ elements: List
+ event: str
+ properties: Dict[str, Any]
+
+
+class CorrelationType(str, Enum):
+ success = "success"
+ failure = "failure"
+
+
+class EventOddsRatioSerialized(BaseModel):
+ model_config = ConfigDict(
+ extra="forbid",
+ )
+ correlation_type: CorrelationType
+ event: EventDefinition
+ failure_count: int
+ odds_ratio: float
+ success_count: int
+
+
class Person(BaseModel):
model_config = ConfigDict(
extra="forbid",
@@ -274,6 +309,20 @@ class FunnelConversionWindowTimeUnit(str, Enum):
month = "month"
+class FunnelCorrelationResult(BaseModel):
+ model_config = ConfigDict(
+ extra="forbid",
+ )
+ events: List[EventOddsRatioSerialized]
+ skewed: bool
+
+
+class FunnelCorrelationResultsType(str, Enum):
+ events = "events"
+ properties = "properties"
+ event_with_properties = "event_with_properties"
+
+
class FunnelExclusionLegacy(BaseModel):
model_config = ConfigDict(
extra="forbid",
@@ -316,7 +365,7 @@ class FunnelTimeToConvertResults(BaseModel):
model_config = ConfigDict(
extra="forbid",
)
- average_conversion_time: int
+ average_conversion_time: Optional[float] = None
bins: List[List[int]]
@@ -375,6 +424,7 @@ class HogQLQueryModifiers(BaseModel):
model_config = ConfigDict(
extra="forbid",
)
+ dataWarehouseEventsModifiers: Optional[List[DataWarehouseEventsModifier]] = None
inCohortVia: Optional[InCohortVia] = None
materializationMode: Optional[MaterializationMode] = None
personsArgMaxVersion: Optional[PersonsArgMaxVersion] = None
@@ -499,6 +549,8 @@ class NodeKind(str, Enum):
HogQLMetadata = "HogQLMetadata"
HogQLAutocomplete = "HogQLAutocomplete"
ActorsQuery = "ActorsQuery"
+ FunnelsActorsQuery = "FunnelsActorsQuery"
+ FunnelCorrelationActorsQuery = "FunnelCorrelationActorsQuery"
SessionsTimelineQuery = "SessionsTimelineQuery"
DataTableNode = "DataTableNode"
DataVisualizationNode = "DataVisualizationNode"
@@ -512,6 +564,7 @@ class NodeKind(str, Enum):
LifecycleQuery = "LifecycleQuery"
InsightActorsQuery = "InsightActorsQuery"
InsightActorsQueryOptions = "InsightActorsQueryOptions"
+ FunnelCorrelationQuery = "FunnelCorrelationQuery"
WebOverviewQuery = "WebOverviewQuery"
WebTopClicksQuery = "WebTopClicksQuery"
WebStatsTableQuery = "WebStatsTableQuery"
@@ -1131,6 +1184,20 @@ class FeaturePropertyFilter(BaseModel):
value: Optional[Union[str, float, List[Union[str, float]]]] = None
+class FunnelCorrelationResponse(BaseModel):
+ model_config = ConfigDict(
+ extra="forbid",
+ )
+ columns: Optional[List] = None
+ hasMore: Optional[bool] = None
+ hogql: Optional[str] = None
+ limit: Optional[int] = None
+ offset: Optional[int] = None
+ results: FunnelCorrelationResult
+ timings: Optional[List[QueryTiming]] = None
+ types: Optional[List] = None
+
+
class FunnelsFilterLegacy(BaseModel):
model_config = ConfigDict(
extra="forbid",
@@ -1448,6 +1515,20 @@ class QueryResponseAlternative13(BaseModel):
timings: Optional[List[QueryTiming]] = None
+class QueryResponseAlternative17(BaseModel):
+ model_config = ConfigDict(
+ extra="forbid",
+ )
+ columns: Optional[List] = None
+ hasMore: Optional[bool] = None
+ hogql: Optional[str] = None
+ limit: Optional[int] = None
+ offset: Optional[int] = None
+ results: FunnelCorrelationResult
+ timings: Optional[List[QueryTiming]] = None
+ types: Optional[List] = None
+
+
class RetentionFilter(BaseModel):
model_config = ConfigDict(
extra="forbid",
@@ -1573,6 +1654,7 @@ class WebAnalyticsQueryBase(BaseModel):
dateRange: Optional[DateRange] = None
properties: List[Union[EventPropertyFilter, PersonPropertyFilter]]
sampling: Optional[Sampling] = None
+ useSessionsTable: Optional[bool] = None
class WebOverviewQuery(BaseModel):
@@ -1585,6 +1667,7 @@ class WebOverviewQuery(BaseModel):
properties: List[Union[EventPropertyFilter, PersonPropertyFilter]]
response: Optional[WebOverviewQueryResponse] = None
sampling: Optional[Sampling] = None
+ useSessionsTable: Optional[bool] = None
class WebStatsTableQuery(BaseModel):
@@ -1601,6 +1684,7 @@ class WebStatsTableQuery(BaseModel):
properties: List[Union[EventPropertyFilter, PersonPropertyFilter]]
response: Optional[WebStatsTableQueryResponse] = None
sampling: Optional[Sampling] = None
+ useSessionsTable: Optional[bool] = None
class WebTopClicksQuery(BaseModel):
@@ -1612,6 +1696,7 @@ class WebTopClicksQuery(BaseModel):
properties: List[Union[EventPropertyFilter, PersonPropertyFilter]]
response: Optional[WebTopClicksQueryResponse] = None
sampling: Optional[Sampling] = None
+ useSessionsTable: Optional[bool] = None
class AnyResponseType(
@@ -1666,6 +1751,7 @@ class DataWarehouseNode(BaseModel):
extra="forbid",
)
custom_name: Optional[str] = None
+ distinct_id_field: str
fixedProperties: Optional[
List[
Union[
@@ -2152,6 +2238,7 @@ class QueryResponseAlternative(
QueryResponseAlternative12,
QueryResponseAlternative13,
QueryResponseAlternative14,
+ QueryResponseAlternative17,
Dict[str, List[DatabaseSchemaQueryResponseField]],
]
]
@@ -2172,6 +2259,7 @@ class QueryResponseAlternative(
QueryResponseAlternative12,
QueryResponseAlternative13,
QueryResponseAlternative14,
+ QueryResponseAlternative17,
Dict[str, List[DatabaseSchemaQueryResponseField]],
]
@@ -2668,7 +2756,7 @@ class FunnelsActorsQuery(BaseModel):
description="Used together with `funnelTrendsDropOff` for funnels time conversion date for the persons modal.",
)
includeRecordings: Optional[bool] = None
- kind: Literal["InsightActorsQuery"] = "InsightActorsQuery"
+ kind: Literal["FunnelsActorsQuery"] = "FunnelsActorsQuery"
response: Optional[ActorsQueryResponse] = None
source: FunnelsQuery
@@ -2697,6 +2785,21 @@ class InsightVizNode(BaseModel):
vizSpecificOptions: Optional[VizSpecificOptions] = None
+class FunnelCorrelationQuery(BaseModel):
+ model_config = ConfigDict(
+ extra="forbid",
+ )
+ funnelCorrelationEventExcludePropertyNames: Optional[List[str]] = None
+ funnelCorrelationEventNames: Optional[List[str]] = None
+ funnelCorrelationExcludeEventNames: Optional[List[str]] = None
+ funnelCorrelationExcludeNames: Optional[List[str]] = None
+ funnelCorrelationNames: Optional[List[str]] = None
+ funnelCorrelationType: FunnelCorrelationResultsType
+ kind: Literal["FunnelCorrelationQuery"] = "FunnelCorrelationQuery"
+ response: Optional[FunnelCorrelationResponse] = None
+ source: FunnelsActorsQuery
+
+
class InsightActorsQuery(BaseModel):
model_config = ConfigDict(
extra="forbid",
@@ -2717,13 +2820,42 @@ class InsightActorsQuery(BaseModel):
status: Optional[str] = None
+class FunnelCorrelationActorsQuery(BaseModel):
+ model_config = ConfigDict(
+ extra="forbid",
+ )
+ funnelCorrelationPersonConverted: Optional[bool] = None
+ funnelCorrelationPersonEntity: Optional[Union[EventsNode, ActionsNode, DataWarehouseNode]] = None
+ funnelCorrelationPropertyValues: Optional[
+ List[
+ Union[
+ EventPropertyFilter,
+ PersonPropertyFilter,
+ ElementPropertyFilter,
+ SessionPropertyFilter,
+ CohortPropertyFilter,
+ RecordingDurationFilter,
+ GroupPropertyFilter,
+ FeaturePropertyFilter,
+ HogQLPropertyFilter,
+ EmptyPropertyFilter,
+ DataWarehousePropertyFilter,
+ ]
+ ]
+ ] = None
+ includeRecordings: Optional[bool] = None
+ kind: Literal["FunnelCorrelationActorsQuery"] = "FunnelCorrelationActorsQuery"
+ response: Optional[ActorsQueryResponse] = None
+ source: FunnelCorrelationQuery
+
+
class InsightActorsQueryOptions(BaseModel):
model_config = ConfigDict(
extra="forbid",
)
kind: Literal["InsightActorsQueryOptions"] = "InsightActorsQueryOptions"
response: Optional[InsightActorsQueryOptionsResponse] = None
- source: Union[InsightActorsQuery, FunnelsActorsQuery]
+ source: Union[InsightActorsQuery, FunnelsActorsQuery, FunnelCorrelationActorsQuery]
class ActorsQuery(BaseModel):
@@ -2771,7 +2903,7 @@ class ActorsQuery(BaseModel):
response: Optional[ActorsQueryResponse] = Field(default=None, description="Cached query response")
search: Optional[str] = None
select: Optional[List[str]] = None
- source: Optional[Union[InsightActorsQuery, FunnelsActorsQuery, HogQLQuery]] = None
+ source: Optional[Union[InsightActorsQuery, FunnelsActorsQuery, FunnelCorrelationActorsQuery, HogQLQuery]] = None
class DataTableNode(BaseModel):
@@ -2909,6 +3041,7 @@ class QueryRequest(BaseModel):
PathsQuery,
StickinessQuery,
LifecycleQuery,
+ FunnelCorrelationQuery,
DatabaseSchemaQuery,
] = Field(
...,
@@ -2947,6 +3080,7 @@ class QuerySchemaRoot(
PathsQuery,
StickinessQuery,
LifecycleQuery,
+ FunnelCorrelationQuery,
DatabaseSchemaQuery,
]
]
@@ -2978,6 +3112,7 @@ class QuerySchemaRoot(
PathsQuery,
StickinessQuery,
LifecycleQuery,
+ FunnelCorrelationQuery,
DatabaseSchemaQuery,
] = Field(..., discriminator="kind")
diff --git a/posthog/session_recordings/sql/session_replay_embeddings_migrations.py b/posthog/session_recordings/sql/session_replay_embeddings_migrations.py
index 84fa531959ba9..0cb5273303559 100644
--- a/posthog/session_recordings/sql/session_replay_embeddings_migrations.py
+++ b/posthog/session_recordings/sql/session_replay_embeddings_migrations.py
@@ -25,3 +25,29 @@
cluster=settings.CLICKHOUSE_CLUSTER,
)
)
+
+ALTER_SESSION_REPLAY_EMBEDDINGS_ADD_INPUT_COLUMN = """
+ ALTER TABLE {table_name} on CLUSTER '{cluster}'
+ ADD COLUMN IF NOT EXISTS input String
+"""
+
+DISTRIBUTED_TABLE_ALTER_SESSION_REPLAY_EMBEDDINGS_ADD_INPUT_COLUMN = (
+ lambda: ALTER_SESSION_REPLAY_EMBEDDINGS_ADD_INPUT_COLUMN.format(
+ table_name="session_replay_embeddings",
+ cluster=settings.CLICKHOUSE_CLUSTER,
+ )
+)
+
+WRITEABLE_TABLE_ALTER_SESSION_REPLAY_EMBEDDINGS_ADD_INPUT_COLUMN = (
+ lambda: ALTER_SESSION_REPLAY_EMBEDDINGS_ADD_INPUT_COLUMN.format(
+ table_name="writable_session_replay_embeddings",
+ cluster=settings.CLICKHOUSE_CLUSTER,
+ )
+)
+
+SHARDED_TABLE_ALTER_SESSION_REPLAY_EMBEDDINGS_ADD_INPUT_COLUMN = (
+ lambda: ALTER_SESSION_REPLAY_EMBEDDINGS_ADD_INPUT_COLUMN.format(
+ table_name="sharded_session_replay_embeddings",
+ cluster=settings.CLICKHOUSE_CLUSTER,
+ )
+)
diff --git a/posthog/session_recordings/test/__snapshots__/test_session_recordings.ambr b/posthog/session_recordings/test/__snapshots__/test_session_recordings.ambr
index e681525e1bf50..fad3c08168d0b 100644
--- a/posthog/session_recordings/test/__snapshots__/test_session_recordings.ambr
+++ b/posthog/session_recordings/test/__snapshots__/test_session_recordings.ambr
@@ -4,6 +4,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -63,6 +64,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -122,6 +124,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -181,6 +184,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -240,6 +244,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -330,6 +335,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -458,6 +464,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -627,6 +634,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -779,6 +787,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -838,6 +847,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -897,6 +907,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -956,6 +967,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -1015,6 +1027,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -1074,6 +1087,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -1164,6 +1178,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -1412,6 +1427,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -1502,6 +1518,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -1850,6 +1867,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -1940,6 +1958,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -2290,6 +2309,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -2391,6 +2411,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -2743,6 +2764,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -2833,6 +2855,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -3216,6 +3239,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -3306,6 +3330,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -3401,6 +3426,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -3730,6 +3756,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -3820,6 +3847,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -3872,6 +3900,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -4496,6 +4525,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -4586,6 +4616,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -4928,6 +4959,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -5018,6 +5050,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -5373,6 +5406,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -5463,6 +5497,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
diff --git a/posthog/settings/__init__.py b/posthog/settings/__init__.py
index 3593d44f40c57..455b7e8dc34a1 100644
--- a/posthog/settings/__init__.py
+++ b/posthog/settings/__init__.py
@@ -9,6 +9,7 @@
For the full list of settings and their values, see
https://docs.djangoproject.com/en/2.2/ref/settings/
"""
+
# isort: skip_file
import os
@@ -44,6 +45,8 @@
from posthog.settings.utils import get_from_env, str_to_bool
+from posthog.settings.base_variables import DEBUG, TEST
+from posthog.settings.web import INSTALLED_APPS
# Instance configuration preferences
# https://posthog.com/docs/self-host/configure/environment-variables
diff --git a/posthog/settings/sentry.py b/posthog/settings/sentry.py
index 545d4ed94db2e..225e6dc61f51a 100644
--- a/posthog/settings/sentry.py
+++ b/posthog/settings/sentry.py
@@ -73,9 +73,13 @@ def traces_sampler(sampling_context: dict) -> float:
if op == "http.server":
path = sampling_context.get("wsgi_environ", {}).get("PATH_INFO")
+ force_sample = bool(sampling_context.get("wsgi_environ", {}).get("HTTP_FORCE_SAMPLE"))
+ # HTTP header to force sampling set
+ if force_sample:
+ return 1.0 # 100%
# Ingestion endpoints (high volume)
- if path.startswith("/batch"):
+ elif path.startswith("/batch"):
return 0.00000001 # 0.000001%
# Ingestion endpoints (high volume)
elif path.startswith(("/capture", "/track", "/s", "/e")):
diff --git a/posthog/tasks/test/__snapshots__/test_process_scheduled_changes.ambr b/posthog/tasks/test/__snapshots__/test_process_scheduled_changes.ambr
index bf055e2fda0ee..d4ed303a0dda1 100644
--- a/posthog/tasks/test/__snapshots__/test_process_scheduled_changes.ambr
+++ b/posthog/tasks/test/__snapshots__/test_process_scheduled_changes.ambr
@@ -75,6 +75,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -331,6 +332,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
diff --git a/posthog/temporal/batch_exports/__init__.py b/posthog/temporal/batch_exports/__init__.py
index 5f39d98c51c38..8debe181fb82f 100644
--- a/posthog/temporal/batch_exports/__init__.py
+++ b/posthog/temporal/batch_exports/__init__.py
@@ -34,7 +34,15 @@
SnowflakeBatchExportWorkflow,
insert_into_snowflake_activity,
)
-from posthog.temporal.batch_exports.squash_person_overrides import *
+from posthog.temporal.batch_exports.squash_person_overrides import (
+ SquashPersonOverridesWorkflow,
+ create_table,
+ drop_table,
+ optimize_person_distinct_id_overrides,
+ submit_mutation,
+ wait_for_mutation,
+ wait_for_table,
+)
WORKFLOWS = [
BackfillBatchExportWorkflow,
diff --git a/posthog/temporal/batch_exports/squash_person_overrides.py b/posthog/temporal/batch_exports/squash_person_overrides.py
index 4ab5fcfef3d9e..0f99f6bbb73ba 100644
--- a/posthog/temporal/batch_exports/squash_person_overrides.py
+++ b/posthog/temporal/batch_exports/squash_person_overrides.py
@@ -5,7 +5,7 @@
import json
import typing
from dataclasses import dataclass, field
-from datetime import datetime, timedelta, timezone
+from datetime import date, datetime, timedelta, timezone
from temporalio import activity, workflow
from temporalio.common import RetryPolicy
@@ -664,9 +664,10 @@ class SquashPersonOverridesInputs:
"""Inputs for the SquashPersonOverrides workflow.
Attributes:
- team_ids: List of team ids to squash. If None, will squash all.
- partition_ids: Partitions to squash, preferred over last_n_months.
- last_n_months: Execute the squash on the partitions for the last_n_months.
+ team_ids: List of team ids to squash. If `None`, will squash all.
+ partition_ids: Partitions to squash, preferred over `last_n_months`.
+ last_n_months: Execute the squash on the last n month partitions.
+ offset: Start from offset month when generating partitions to squash with `last_n_months`
delete_grace_period_seconds: Number of seconds until an override can be deleted. This grace
period works on top of checking if the override was applied to all partitions. Defaults
to 24h.
@@ -676,6 +677,7 @@ class SquashPersonOverridesInputs:
team_ids: list[int] = field(default_factory=list)
partition_ids: list[str] | None = None
last_n_months: int = 1
+ offset: int = 0
delete_grace_period_seconds: int = 24 * 3600
dry_run: bool = True
@@ -692,20 +694,24 @@ def iter_partition_ids(self) -> collections.abc.Iterator[str]:
for month in self.iter_last_n_months():
yield month.strftime("%Y%m")
- def iter_last_n_months(self) -> collections.abc.Iterator[datetime]:
- """Iterate over the last N months.
+ def iter_last_n_months(self) -> collections.abc.Iterator[date]:
+ """Iterate over beginning of the month dates of the last N months.
- Returns the first day of the last N months. The current month
- counts as the first month.
+ If `self.offset` is 0, then the first day of the current month will be the
+ first month yielded. Otherwise, `self.offset` will be subtracted from the
+ current month to land on the first month to yield.
"""
- current_month = datetime.now()
+ now = date.today()
+ start_month = (now.month - self.offset) % 12
+ start_year = now.year + (now.month - self.offset) // 12
+ current_date = date(year=start_year, month=start_month, day=1)
- for _ in range(self.last_n_months):
- current_month = current_month.replace(day=1)
+ for _ in range(0, self.last_n_months):
+ current_date = current_date.replace(day=1)
- yield current_month
+ yield current_date
- current_month = current_month - timedelta(days=1)
+ current_date = current_date - timedelta(days=1)
@workflow.defn(name="squash-person-overrides")
diff --git a/posthog/temporal/data_imports/__init__.py b/posthog/temporal/data_imports/__init__.py
index aad09fae8047c..35e20f0ffc50e 100644
--- a/posthog/temporal/data_imports/__init__.py
+++ b/posthog/temporal/data_imports/__init__.py
@@ -1,4 +1,10 @@
-from posthog.temporal.data_imports.external_data_job import *
+from posthog.temporal.data_imports.external_data_job import (
+ ExternalDataJobWorkflow,
+ create_external_data_job_model,
+ update_external_data_job_model,
+ run_external_data_job,
+ validate_schema_activity,
+)
WORKFLOWS = [ExternalDataJobWorkflow]
diff --git a/posthog/temporal/data_imports/external_data_job.py b/posthog/temporal/data_imports/external_data_job.py
index ae003af973787..3288d9a313c9d 100644
--- a/posthog/temporal/data_imports/external_data_job.py
+++ b/posthog/temporal/data_imports/external_data_job.py
@@ -4,6 +4,7 @@
import uuid
from asgiref.sync import sync_to_async
+from dlt.common.schema.typing import TSchemaTables
from temporalio import activity, exceptions, workflow
from temporalio.common import RetryPolicy
@@ -37,7 +38,7 @@ class CreateExternalDataJobInputs:
@activity.defn
-async def create_external_data_job_model(inputs: CreateExternalDataJobInputs) -> Tuple[str, list[str]]:
+async def create_external_data_job_model(inputs: CreateExternalDataJobInputs) -> Tuple[str, list[Tuple[str, str]]]:
run = await sync_to_async(create_external_data_job)(
team_id=inputs.team_id,
external_data_source_id=inputs.external_data_source_id,
@@ -105,7 +106,8 @@ async def update_external_data_job_model(inputs: UpdateExternalDataJobStatusInpu
class ValidateSchemaInputs:
run_id: str
team_id: int
- schemas: list[str]
+ schemas: list[Tuple[str, str]]
+ table_schema: TSchemaTables
@activity.defn
@@ -114,6 +116,7 @@ async def validate_schema_activity(inputs: ValidateSchemaInputs) -> None:
run_id=inputs.run_id,
team_id=inputs.team_id,
schemas=inputs.schemas,
+ table_schema=inputs.table_schema,
)
logger = await bind_temporal_worker_logger(team_id=inputs.team_id)
@@ -133,11 +136,11 @@ class ExternalDataJobInputs:
team_id: int
source_id: uuid.UUID
run_id: str
- schemas: list[str]
+ schemas: list[Tuple[str, str]]
@activity.defn
-async def run_external_data_job(inputs: ExternalDataJobInputs) -> None:
+async def run_external_data_job(inputs: ExternalDataJobInputs) -> TSchemaTables:
model: ExternalDataJob = await get_external_data_job(
job_id=inputs.run_id,
)
@@ -153,6 +156,8 @@ async def run_external_data_job(inputs: ExternalDataJobInputs) -> None:
dataset_name=model.folder_path,
)
+ endpoints = [schema[1] for schema in inputs.schemas]
+
source = None
if model.pipeline.source_type == ExternalDataSource.Type.STRIPE:
from posthog.temporal.data_imports.pipelines.stripe.helpers import stripe_source
@@ -162,7 +167,7 @@ async def run_external_data_job(inputs: ExternalDataJobInputs) -> None:
raise ValueError(f"Stripe secret key not found for job {model.id}")
source = stripe_source(
api_key=stripe_secret_key,
- endpoints=tuple(inputs.schemas),
+ endpoints=tuple(endpoints),
team_id=inputs.team_id,
job_id=inputs.run_id,
)
@@ -181,7 +186,7 @@ async def run_external_data_job(inputs: ExternalDataJobInputs) -> None:
source = hubspot(
api_key=hubspot_access_code,
refresh_token=refresh_token,
- endpoints=tuple(inputs.schemas),
+ endpoints=tuple(endpoints),
)
elif model.pipeline.source_type == ExternalDataSource.Type.POSTGRES:
from posthog.temporal.data_imports.pipelines.postgres import postgres_source
@@ -201,7 +206,7 @@ async def run_external_data_job(inputs: ExternalDataJobInputs) -> None:
database=database,
sslmode="prefer" if settings.TEST or settings.DEBUG else "require",
schema=schema,
- table_names=inputs.schemas,
+ table_names=endpoints,
)
else:
@@ -221,6 +226,8 @@ async def heartbeat() -> None:
heartbeat_task.cancel()
await asyncio.wait([heartbeat_task])
+ return source.schema.tables
+
# TODO: update retry policies
@workflow.defn(name="external-data-job")
@@ -264,7 +271,7 @@ async def run(self, inputs: ExternalDataWorkflowInputs):
schemas=schemas,
)
- await workflow.execute_activity(
+ table_schemas = await workflow.execute_activity(
run_external_data_job,
job_inputs,
start_to_close_timeout=dt.timedelta(hours=4),
@@ -273,7 +280,9 @@ async def run(self, inputs: ExternalDataWorkflowInputs):
)
# check schema first
- validate_inputs = ValidateSchemaInputs(run_id=run_id, team_id=inputs.team_id, schemas=schemas)
+ validate_inputs = ValidateSchemaInputs(
+ run_id=run_id, team_id=inputs.team_id, schemas=schemas, table_schema=table_schemas
+ )
await workflow.execute_activity(
validate_schema_activity,
diff --git a/posthog/temporal/data_imports/pipelines/pipeline.py b/posthog/temporal/data_imports/pipelines/pipeline.py
index ad6d53aa3a9e6..5297f2e39ac29 100644
--- a/posthog/temporal/data_imports/pipelines/pipeline.py
+++ b/posthog/temporal/data_imports/pipelines/pipeline.py
@@ -16,7 +16,7 @@
class PipelineInputs:
source_id: UUID
run_id: str
- schemas: list[str]
+ schemas: list[tuple[str, str]]
dataset_name: str
job_type: str
team_id: int
diff --git a/posthog/temporal/tests/external_data/test_external_data_job.py b/posthog/temporal/tests/external_data/test_external_data_job.py
index c92b81772a190..9f0ca2d9a0d32 100644
--- a/posthog/temporal/tests/external_data/test_external_data_job.py
+++ b/posthog/temporal/tests/external_data/test_external_data_job.py
@@ -1,6 +1,6 @@
import uuid
from unittest import mock
-
+from typing import Optional
import pytest
from asgiref.sync import sync_to_async
from django.test import override_settings
@@ -32,6 +32,7 @@
from posthog.temporal.data_imports.pipelines.schemas import (
PIPELINE_TYPE_SCHEMA_DEFAULT_MAPPING,
)
+from posthog.models import Team
from posthog.temporal.data_imports.pipelines.pipeline import DataImportPipeline
from temporalio.testing import WorkflowEnvironment
from temporalio.common import RetryPolicy
@@ -118,6 +119,15 @@ async def postgres_connection(postgres_config, setup_postgres_test_db):
await connection.close()
+async def _create_schema(schema_name: str, source: ExternalDataSource, team: Team, table_id: Optional[str] = None):
+ return await sync_to_async(ExternalDataSchema.objects.create)(
+ name=schema_name,
+ team_id=team.id,
+ source_id=source.pk,
+ table_id=table_id,
+ )
+
+
@pytest.mark.django_db(transaction=True)
@pytest.mark.asyncio
async def test_create_external_job_activity(activity_environment, team, **kwargs):
@@ -232,7 +242,9 @@ async def setup_job_1():
new_job = await sync_to_async(ExternalDataJob.objects.filter(id=new_job.id).prefetch_related("pipeline").get)()
- schemas = ["Customer"]
+ customer_schema = await _create_schema("Customer", new_source, team)
+ schemas = [(customer_schema.id, "Customer")]
+
inputs = ExternalDataJobInputs(
team_id=team.id,
run_id=new_job.pk,
@@ -262,7 +274,9 @@ async def setup_job_2():
new_job = await sync_to_async(ExternalDataJob.objects.filter(id=new_job.id).prefetch_related("pipeline").get)()
- schemas = ["Customer", "Invoice"]
+ customer_schema = await _create_schema("Customer", new_source, team)
+ invoice_schema = await _create_schema("Invoice", new_source, team)
+ schemas = [(customer_schema.id, "Customer"), (invoice_schema.id, "Invoice")]
inputs = ExternalDataJobInputs(
team_id=team.id,
run_id=new_job.pk,
@@ -350,7 +364,8 @@ async def setup_job_1():
new_job = await sync_to_async(ExternalDataJob.objects.filter(id=new_job.id).prefetch_related("pipeline").get)()
- schemas = ["Customer"]
+ customer_schema = await _create_schema("Customer", new_source, team)
+ schemas = [(customer_schema.id, "Customer")]
inputs = ExternalDataJobInputs(
team_id=team.id,
run_id=new_job.pk,
@@ -414,7 +429,8 @@ async def setup_job_1():
new_job = await sync_to_async(ExternalDataJob.objects.filter(id=new_job.id).prefetch_related("pipeline").get)()
- schemas = ["Customer"]
+ customer_schema = await _create_schema("Customer", new_source, team)
+ schemas = [(customer_schema.id, "Customer")]
inputs = ExternalDataJobInputs(
team_id=team.id,
run_id=new_job.pk,
@@ -476,6 +492,19 @@ async def test_validate_schema_and_update_table_activity(activity_environment, t
rows_synced=0,
)
+ test_1_schema = await _create_schema("test-1", new_source, team)
+ test_2_schema = await _create_schema("test-2", new_source, team)
+ test_3_schema = await _create_schema("test-3", new_source, team)
+ test_4_schema = await _create_schema("test-4", new_source, team)
+ test_5_schema = await _create_schema("test-5", new_source, team)
+ schemas = [
+ (test_1_schema.id, "test-1"),
+ (test_2_schema.id, "test-2"),
+ (test_3_schema.id, "test-3"),
+ (test_4_schema.id, "test-4"),
+ (test_5_schema.id, "test-5"),
+ ]
+
with mock.patch(
"posthog.warehouse.models.table.DataWarehouseTable.get_columns"
) as mock_get_columns, override_settings(**AWS_BUCKET_MOCK_SETTINGS):
@@ -483,7 +512,16 @@ async def test_validate_schema_and_update_table_activity(activity_environment, t
await activity_environment.run(
validate_schema_activity,
ValidateSchemaInputs(
- run_id=new_job.pk, team_id=team.id, schemas=["test-1", "test-2", "test-3", "test-4", "test-5"]
+ run_id=new_job.pk,
+ team_id=team.id,
+ schemas=schemas,
+ table_schema={
+ "test-1": {"name": "test-1", "resource": "test-1", "columns": {"id": {"data_type": "text"}}},
+ "test-2": {"name": "test-2", "resource": "test-2", "columns": {"id": {"data_type": "text"}}},
+ "test-3": {"name": "test-3", "resource": "test-3", "columns": {"id": {"data_type": "text"}}},
+ "test-4": {"name": "test-4", "resource": "test-4", "columns": {"id": {"data_type": "text"}}},
+ "test-5": {"name": "test-5", "resource": "test-5", "columns": {"id": {"data_type": "text"}}},
+ },
),
)
@@ -504,6 +542,7 @@ async def test_validate_schema_and_update_table_activity_with_existing(activity_
status="running",
source_type="Stripe",
job_inputs={"stripe_secret_key": "test-key"},
+ prefix="stripe_",
)
old_job: ExternalDataJob = await sync_to_async(ExternalDataJob.objects.create)(
@@ -521,7 +560,7 @@ async def test_validate_schema_and_update_table_activity_with_existing(activity_
url_pattern = await sync_to_async(old_job.url_pattern_by_schema)("test-1")
- await sync_to_async(DataWarehouseTable.objects.create)(
+ existing_table = await sync_to_async(DataWarehouseTable.objects.create)(
credential=old_credential,
name="stripe_test-1",
format="Parquet",
@@ -537,6 +576,19 @@ async def test_validate_schema_and_update_table_activity_with_existing(activity_
rows_synced=0,
)
+ test_1_schema = await _create_schema("test-1", new_source, team, table_id=existing_table.id)
+ test_2_schema = await _create_schema("test-2", new_source, team)
+ test_3_schema = await _create_schema("test-3", new_source, team)
+ test_4_schema = await _create_schema("test-4", new_source, team)
+ test_5_schema = await _create_schema("test-5", new_source, team)
+ schemas = [
+ (test_1_schema.id, "test-1"),
+ (test_2_schema.id, "test-2"),
+ (test_3_schema.id, "test-3"),
+ (test_4_schema.id, "test-4"),
+ (test_5_schema.id, "test-5"),
+ ]
+
with mock.patch(
"posthog.warehouse.models.table.DataWarehouseTable.get_columns"
) as mock_get_columns, override_settings(**AWS_BUCKET_MOCK_SETTINGS):
@@ -544,7 +596,16 @@ async def test_validate_schema_and_update_table_activity_with_existing(activity_
await activity_environment.run(
validate_schema_activity,
ValidateSchemaInputs(
- run_id=new_job.pk, team_id=team.id, schemas=["test-1", "test-2", "test-3", "test-4", "test-5"]
+ run_id=new_job.pk,
+ team_id=team.id,
+ schemas=schemas,
+ table_schema={
+ "test-1": {"name": "test-1", "resource": "test-1", "columns": {"id": {"data_type": "text"}}},
+ "test-2": {"name": "test-2", "resource": "test-2", "columns": {"id": {"data_type": "text"}}},
+ "test-3": {"name": "test-3", "resource": "test-3", "columns": {"id": {"data_type": "text"}}},
+ "test-4": {"name": "test-4", "resource": "test-4", "columns": {"id": {"data_type": "text"}}},
+ "test-5": {"name": "test-5", "resource": "test-5", "columns": {"id": {"data_type": "text"}}},
+ },
),
)
@@ -595,9 +656,29 @@ async def test_validate_schema_and_update_table_activity_half_run(activity_envir
},
]
+ broken_schema = await _create_schema("broken_schema", new_source, team)
+ test_schema = await _create_schema("test_schema", new_source, team)
+ schemas = [(broken_schema.id, "broken_schema"), (test_schema.id, "test_schema")]
+
await activity_environment.run(
validate_schema_activity,
- ValidateSchemaInputs(run_id=new_job.pk, team_id=team.id, schemas=["broken_schema", "test_schema"]),
+ ValidateSchemaInputs(
+ run_id=new_job.pk,
+ team_id=team.id,
+ schemas=schemas,
+ table_schema={
+ "broken_schema": {
+ "name": "broken_schema",
+ "resource": "broken_schema",
+ "columns": {"id": {"data_type": "text"}},
+ },
+ "test_schema": {
+ "name": "test_schema",
+ "resource": "test_schema",
+ "columns": {"id": {"data_type": "text"}},
+ },
+ },
+ ),
)
assert mock_get_columns.call_count == 1
@@ -626,6 +707,19 @@ async def test_create_schema_activity(activity_environment, team, **kwargs):
rows_synced=0,
)
+ test_1_schema = await _create_schema("test-1", new_source, team)
+ test_2_schema = await _create_schema("test-2", new_source, team)
+ test_3_schema = await _create_schema("test-3", new_source, team)
+ test_4_schema = await _create_schema("test-4", new_source, team)
+ test_5_schema = await _create_schema("test-5", new_source, team)
+ schemas = [
+ (test_1_schema.id, "test-1"),
+ (test_2_schema.id, "test-2"),
+ (test_3_schema.id, "test-3"),
+ (test_4_schema.id, "test-4"),
+ (test_5_schema.id, "test-5"),
+ ]
+
with mock.patch(
"posthog.warehouse.models.table.DataWarehouseTable.get_columns"
) as mock_get_columns, override_settings(**AWS_BUCKET_MOCK_SETTINGS):
@@ -633,7 +727,16 @@ async def test_create_schema_activity(activity_environment, team, **kwargs):
await activity_environment.run(
validate_schema_activity,
ValidateSchemaInputs(
- run_id=new_job.pk, team_id=team.id, schemas=["test-1", "test-2", "test-3", "test-4", "test-5"]
+ run_id=new_job.pk,
+ team_id=team.id,
+ schemas=schemas,
+ table_schema={
+ "test-1": {"name": "test-1", "resource": "test-1", "columns": {"id": {"data_type": "text"}}},
+ "test-2": {"name": "test-2", "resource": "test-2", "columns": {"id": {"data_type": "text"}}},
+ "test-3": {"name": "test-3", "resource": "test-3", "columns": {"id": {"data_type": "text"}}},
+ "test-4": {"name": "test-4", "resource": "test-4", "columns": {"id": {"data_type": "text"}}},
+ "test-5": {"name": "test-5", "resource": "test-5", "columns": {"id": {"data_type": "text"}}},
+ },
),
)
@@ -802,7 +905,8 @@ async def setup_job_1():
new_job = await sync_to_async(ExternalDataJob.objects.filter(id=new_job.id).prefetch_related("pipeline").get)()
- schemas = ["posthog_test"]
+ posthog_test_schema = await _create_schema("posthog_test", new_source, team)
+ schemas = [(posthog_test_schema.id, "posthog_test")]
inputs = ExternalDataJobInputs(
team_id=team.id,
run_id=new_job.pk,
diff --git a/posthog/temporal/tests/persons_on_events_squash/test_squash_person_overrides_workflow.py b/posthog/temporal/tests/persons_on_events_squash/test_squash_person_overrides_workflow.py
index dc5a98c6df143..2010b13345aee 100644
--- a/posthog/temporal/tests/persons_on_events_squash/test_squash_person_overrides_workflow.py
+++ b/posthog/temporal/tests/persons_on_events_squash/test_squash_person_overrides_workflow.py
@@ -38,6 +38,10 @@
["202303", "202302", "202301", "202212", "202211"],
),
({"last_n_months": 1}, ["202303"]),
+ (
+ {"partition_ids": None, "last_n_months": 3},
+ ["202303", "202302", "202301"],
+ ),
(
{"partition_ids": ["202303", "202302"], "last_n_months": 3},
["202303", "202302"],
@@ -54,6 +58,47 @@ def test_workflow_inputs_yields_partition_ids(inputs, expected):
assert list(workflow_inputs.iter_partition_ids()) == expected
+@freeze_time("2023-03-14")
+@pytest.mark.parametrize(
+ "inputs,expected",
+ [
+ (
+ {"partition_ids": None, "last_n_months": 5, "offset": 12},
+ ["202203", "202202", "202201", "202112", "202111"],
+ ),
+ ({"last_n_months": 1, "offset": 1}, ["202302"]),
+ (
+ {"partition_ids": None, "last_n_months": 3, "offset": 6},
+ ["202209", "202208", "202207"],
+ ),
+ (
+ {"partition_ids": None, "last_n_months": 1, "offset": 12},
+ ["202203"],
+ ),
+ (
+ {"partition_ids": None, "last_n_months": 1, "offset": 24},
+ ["202103"],
+ ),
+ (
+ {"partition_ids": None, "last_n_months": 1, "offset": 638},
+ ["197001"],
+ ),
+ (
+ {"partition_ids": ["202303", "202302"], "last_n_months": 3, "offset": 6},
+ ["202303", "202302"],
+ ),
+ (
+ {"partition_ids": ["202303", "202302"], "last_n_months": None, "offset": 12},
+ ["202303", "202302"],
+ ),
+ ],
+)
+def test_workflow_inputs_yields_partition_ids_with_offset(inputs, expected):
+ """Assert partition keys generated by iter_partition_ids."""
+ workflow_inputs = SquashPersonOverridesInputs(**inputs)
+ assert list(workflow_inputs.iter_partition_ids()) == expected
+
+
@pytest.fixture
def activity_environment():
"""Return a testing temporal ActivityEnvironment."""
diff --git a/posthog/test/__snapshots__/test_feature_flag.ambr b/posthog/test/__snapshots__/test_feature_flag.ambr
index 7e048cf5536d2..2c2169fe4c5f2 100644
--- a/posthog/test/__snapshots__/test_feature_flag.ambr
+++ b/posthog/test/__snapshots__/test_feature_flag.ambr
@@ -95,6 +95,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -260,6 +261,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
@@ -589,6 +591,7 @@
SELECT "posthog_team"."id",
"posthog_team"."uuid",
"posthog_team"."organization_id",
+ "posthog_team"."project_id",
"posthog_team"."api_token",
"posthog_team"."app_urls",
"posthog_team"."name",
diff --git a/posthog/test/base.py b/posthog/test/base.py
index 20ad2848c607d..8e914dd2b59d5 100644
--- a/posthog/test/base.py
+++ b/posthog/test/base.py
@@ -59,6 +59,7 @@
TRUNCATE_PERSON_STATIC_COHORT_TABLE_SQL,
)
from posthog.models.person.util import bulk_create_persons, create_person
+from posthog.models.project import Project
from posthog.models.sessions.sql import (
DROP_SESSION_TABLE_SQL,
DROP_SESSION_MATERIALIZED_VIEW_SQL,
@@ -92,18 +93,20 @@
def _setup_test_data(klass):
klass.organization = Organization.objects.create(name=klass.CONFIG_ORGANIZATION_NAME)
- klass.team = Team.objects.create(
+ klass.project, klass.team = Project.objects.create_with_team(
organization=klass.organization,
- api_token=klass.CONFIG_API_TOKEN,
- test_account_filters=[
- {
- "key": "email",
- "value": "@posthog.com",
- "operator": "not_icontains",
- "type": "person",
- }
- ],
- has_completed_onboarding_for={"product_analytics": True},
+ team_fields=dict(
+ api_token=klass.CONFIG_API_TOKEN,
+ test_account_filters=[
+ {
+ "key": "email",
+ "value": "@posthog.com",
+ "operator": "not_icontains",
+ "type": "person",
+ }
+ ],
+ has_completed_onboarding_for={"product_analytics": True},
+ ),
)
if klass.CONFIG_EMAIL:
klass.user = User.objects.create_and_join(klass.organization, klass.CONFIG_EMAIL, klass.CONFIG_PASSWORD)
@@ -206,6 +209,7 @@ class PostHogTestCase(SimpleTestCase):
# Test data definition stubs
organization: Organization = None # type: ignore
+ project: Project = None # type: ignore
team: Team = None # type: ignore
user: User = None # type: ignore
organization_membership: OrganizationMembership = None # type: ignore
diff --git a/posthog/test/test_team.py b/posthog/test/test_team.py
index ac95e5c8cc7e7..25f73dcfa87a9 100644
--- a/posthog/test/test_team.py
+++ b/posthog/test/test_team.py
@@ -12,6 +12,7 @@
User,
)
from posthog.models.instance_setting import override_instance_config
+from posthog.models.project import Project
from posthog.models.team import get_team_in_cache, util
from posthog.plugins.test.mock import mocked_plugin_requests_get
from posthog.utils import PersonOnEventsMode
@@ -45,7 +46,7 @@ def test_save_updates_cache(self):
self.assertEqual(cached_team.api_token, api_token)
self.assertEqual(cached_team.uuid, str(team.uuid))
self.assertEqual(cached_team.id, team.id)
- self.assertEqual(cached_team.name, "Default Project")
+ self.assertEqual(cached_team.name, "Default project")
team.name = "New name"
team.session_recording_opt_in = True
@@ -165,3 +166,40 @@ def test_team_on_self_hosted_uses_instance_setting_to_determine_person_on_events
team = Team.objects.create_with_data(organization=self.organization)
self.assertEqual(team.person_on_events_mode, PersonOnEventsMode.DISABLED)
mock_feature_enabled.assert_not_called()
+
+ def test_each_team_gets_project_with_default_name_and_same_id(self):
+ # Can be removed once environments are fully rolled out
+ team = Team.objects.create_with_data(organization=self.organization)
+
+ project = Project.objects.filter(id=team.id).first()
+
+ assert project is not None
+ self.assertEqual(project.name, "Default project")
+
+ def test_each_team_gets_project_with_custom_name_and_same_id(self):
+ # Can be removed once environments are fully rolled out
+ team = Team.objects.create_with_data(organization=self.organization, name="Hogflix")
+
+ project = Project.objects.filter(id=team.id).first()
+
+ assert project is not None
+ self.assertEqual(project.organization, team.organization)
+ self.assertEqual(project.name, "Hogflix")
+
+ @mock.patch("posthog.models.project.Project.objects.create", side_effect=Exception)
+ def test_team_not_created_if_project_creation_fails(self, mock_create):
+ # Can be removed once environments are fully rolled out
+ initial_team_count = Team.objects.count()
+ initial_project_count = Project.objects.count()
+
+ with self.assertRaises(Exception):
+ Team.objects.create_with_data(organization=self.organization, name="Hogflix")
+
+ self.assertEqual(Team.objects.count(), initial_team_count)
+ self.assertEqual(Project.objects.count(), initial_project_count)
+
+ def test_increment_id_sequence(self):
+ initial = Team.objects.increment_id_sequence()
+ subsequent = Team.objects.increment_id_sequence()
+
+ self.assertEqual(subsequent, initial + 1)
diff --git a/posthog/types.py b/posthog/types.py
index c3fa98e0f3ea1..3b434f16417ac 100644
--- a/posthog/types.py
+++ b/posthog/types.py
@@ -14,6 +14,7 @@
EventsNode,
DataWarehousePropertyFilter,
FeaturePropertyFilter,
+ FunnelCorrelationActorsQuery,
FunnelExclusionActionsNode,
FunnelExclusionEventsNode,
FunnelsActorsQuery,
@@ -43,7 +44,7 @@
LifecycleQuery,
]
-InsightActorsQueryNode: TypeAlias = Union[InsightActorsQuery, FunnelsActorsQuery]
+InsightActorsQueryNode: TypeAlias = Union[InsightActorsQuery, FunnelsActorsQuery, FunnelCorrelationActorsQuery]
AnyPropertyFilter: TypeAlias = Union[
EventPropertyFilter,
diff --git a/posthog/utils.py b/posthog/utils.py
index 769d289c93854..111ed0237cbcd 100644
--- a/posthog/utils.py
+++ b/posthog/utils.py
@@ -1329,3 +1329,7 @@ def label_for_team_id_to_track(team_id: int) -> str:
pass
return "unknown"
+
+
+def camel_to_snake_case(name: str) -> str:
+ return re.sub(r"(? str:
+ hogql_type: Type[DatabaseField] = DatabaseField
+
+ if dlt_type is None:
+ hogql_type = StringDatabaseField
+ elif dlt_type == "text":
+ hogql_type = StringDatabaseField
+ elif dlt_type == "double":
+ hogql_type = IntegerDatabaseField
+ elif dlt_type == "bool":
+ hogql_type = BooleanDatabaseField
+ elif dlt_type == "timestamp":
+ hogql_type = DateTimeDatabaseField
+ elif dlt_type == "bigint":
+ hogql_type = IntegerDatabaseField
+ elif dlt_type == "binary":
+ raise Exception("DLT type 'binary' is not a supported column type")
+ elif dlt_type == "complex":
+ hogql_type = StringJSONDatabaseField
+ elif dlt_type == "decimal":
+ hogql_type = IntegerDatabaseField
+ elif dlt_type == "wei":
+ raise Exception("DLT type 'wei' is not a supported column type")
+ elif dlt_type == "date":
+ hogql_type = DateTimeDatabaseField
+ elif dlt_type == "time":
+ hogql_type = DateTimeDatabaseField
+ else:
+ raise Exception(f"DLT type '{dlt_type}' is not a supported column type")
+
+ return hogql_type.__name__
async def validate_schema(
@@ -42,7 +86,9 @@ async def validate_schema(
}
-async def validate_schema_and_update_table(run_id: str, team_id: int, schemas: list[str]) -> None:
+async def validate_schema_and_update_table(
+ run_id: str, team_id: int, schemas: list[Tuple[str, str]], table_schema: TSchemaTables
+) -> None:
"""
Validates the schemas of data that has been synced by external data job.
@@ -65,9 +111,12 @@ async def validate_schema_and_update_table(run_id: str, team_id: int, schemas: l
access_secret=settings.AIRBYTE_BUCKET_SECRET,
)
- for _schema_name in schemas:
+ for _schema in schemas:
+ _schema_id = _schema[0]
+ _schema_name = _schema[1]
+
table_name = f"{job.pipeline.prefix or ''}{job.pipeline.source_type}_{_schema_name}".lower()
- new_url_pattern = job.url_pattern_by_schema(_schema_name)
+ new_url_pattern = job.url_pattern_by_schema(camel_to_snake_case(_schema_name))
# Check
try:
@@ -92,11 +141,10 @@ async def validate_schema_and_update_table(run_id: str, team_id: int, schemas: l
# create or update
table_created = None
if last_successful_job:
- old_url_pattern = last_successful_job.url_pattern_by_schema(_schema_name)
try:
- table_created = await get_table_by_url_pattern_and_source(
- team_id=job.team_id, source_id=job.pipeline.id, url_pattern=old_url_pattern
- )
+ table_created = await get_table_by_schema_id(_schema_id, team_id)
+ if not table_created:
+ raise DataWarehouseTable.DoesNotExist
except Exception:
table_created = None
else:
@@ -106,14 +154,31 @@ async def validate_schema_and_update_table(run_id: str, team_id: int, schemas: l
if not table_created:
table_created = await acreate_datawarehousetable(external_data_source_id=job.pipeline.id, **data)
- # TODO: this should be async too
- table_created.columns = await sync_to_async(table_created.get_columns)()
+ for schema in table_schema.values():
+ if schema.get("resource") == _schema_name:
+ schema_columns = schema.get("columns") or {}
+ db_columns: Dict[str, str] = await sync_to_async(table_created.get_columns)()
+
+ columns = {}
+ for column_name, db_column_type in db_columns.items():
+ dlt_column = schema_columns.get(column_name)
+ if dlt_column is not None:
+ dlt_data_type = dlt_column.get("data_type")
+ hogql_type = dlt_to_hogql_type(dlt_data_type)
+ else:
+ hogql_type = dlt_to_hogql_type(None)
+
+ columns[column_name] = {
+ "clickhouse": db_column_type,
+ "hogql": hogql_type,
+ }
+ table_created.columns = columns
+ break
+
await asave_datawarehousetable(table_created)
# schema could have been deleted by this point
- schema_model = await aget_schema_if_exists(
- schema_name=_schema_name, team_id=job.team_id, source_id=job.pipeline.id
- )
+ schema_model = await aget_schema_by_id(schema_id=_schema_id, team_id=job.team_id)
if schema_model:
schema_model.table = table_created
diff --git a/posthog/warehouse/models/external_data_schema.py b/posthog/warehouse/models/external_data_schema.py
index 3d4423b24778e..ad668abc8589a 100644
--- a/posthog/warehouse/models/external_data_schema.py
+++ b/posthog/warehouse/models/external_data_schema.py
@@ -41,9 +41,14 @@ def aget_schema_if_exists(schema_name: str, team_id: int, source_id: uuid.UUID)
return get_schema_if_exists(schema_name=schema_name, team_id=team_id, source_id=source_id)
+@database_sync_to_async
+def aget_schema_by_id(schema_id: str, team_id: int) -> ExternalDataSchema | None:
+ return ExternalDataSchema.objects.get(id=schema_id, team_id=team_id)
+
+
def get_active_schemas_for_source_id(source_id: uuid.UUID, team_id: int):
schemas = ExternalDataSchema.objects.filter(team_id=team_id, source_id=source_id, should_sync=True).values().all()
- return [val["name"] for val in schemas]
+ return [(val["id"], val["name"]) for val in schemas]
def get_all_schemas_for_source_id(source_id: uuid.UUID, team_id: int):
diff --git a/posthog/warehouse/models/table.py b/posthog/warehouse/models/table.py
index f8cd3cf73e127..5fbe84b3f34d9 100644
--- a/posthog/warehouse/models/table.py
+++ b/posthog/warehouse/models/table.py
@@ -1,3 +1,4 @@
+from typing import Dict
from django.db import models
from posthog.client import sync_execute
@@ -20,6 +21,7 @@
sane_repr,
)
from posthog.warehouse.models.util import remove_named_tuples
+from posthog.warehouse.models.external_data_schema import ExternalDataSchema
from django.db.models import Q
from .credential import DataWarehouseCredential
from uuid import UUID
@@ -53,6 +55,16 @@
"Decimal": IntegerDatabaseField,
}
+STR_TO_HOGQL_MAPPING = {
+ "BooleanDatabaseField": BooleanDatabaseField,
+ "DateDatabaseField": DateDatabaseField,
+ "DateTimeDatabaseField": DateTimeDatabaseField,
+ "IntegerDatabaseField": IntegerDatabaseField,
+ "StringArrayDatabaseField": StringArrayDatabaseField,
+ "StringDatabaseField": StringDatabaseField,
+ "StringJSONDatabaseField": StringJSONDatabaseField,
+}
+
ExtractErrors = {
"The AWS Access Key Id you provided does not exist": "The Access Key you provided does not exist",
}
@@ -86,7 +98,7 @@ class TableFormat(models.TextChoices):
__repr__ = sane_repr("name")
- def get_columns(self, safe_expose_ch_error=True):
+ def get_columns(self, safe_expose_ch_error=True) -> Dict[str, str]:
try:
result = sync_execute(
"""DESCRIBE TABLE (
@@ -117,17 +129,29 @@ def hogql_definition(self) -> S3Table:
fields = {}
structure = []
for column, type in self.columns.items():
- if type.startswith("Nullable("):
- type = type.replace("Nullable(", "")[:-1]
+ # Support for 'old' style columns
+ if isinstance(type, str):
+ clickhouse_type = type
+ else:
+ clickhouse_type = type["clickhouse"]
+
+ if clickhouse_type.startswith("Nullable("):
+ clickhouse_type = clickhouse_type.replace("Nullable(", "")[:-1]
# TODO: remove when addressed https://github.com/ClickHouse/ClickHouse/issues/37594
- if type.startswith("Array("):
- type = remove_named_tuples(type)
+ if clickhouse_type.startswith("Array("):
+ clickhouse_type = remove_named_tuples(clickhouse_type)
+
+ structure.append(f"{column} {clickhouse_type}")
- structure.append(f"{column} {type}")
- type = type.partition("(")[0]
- type = CLICKHOUSE_HOGQL_MAPPING[type]
- fields[column] = type(name=column)
+ # Support for 'old' style columns
+ if isinstance(type, str):
+ hogql_type_str = clickhouse_type.partition("(")[0]
+ hogql_type = CLICKHOUSE_HOGQL_MAPPING[hogql_type_str]
+ else:
+ hogql_type = STR_TO_HOGQL_MAPPING[type["hogql"]]
+
+ fields[column] = hogql_type(name=column)
return S3Table(
name=self.name,
@@ -154,6 +178,11 @@ def get_table_by_url_pattern_and_source(url_pattern: str, source_id: UUID, team_
)
+@database_sync_to_async
+def get_table_by_schema_id(schema_id: str, team_id: int):
+ return ExternalDataSchema.objects.get(id=schema_id, team_id=team_id).table
+
+
@database_sync_to_async
def acreate_datawarehousetable(**kwargs):
return DataWarehouseTable.objects.create(**kwargs)
diff --git a/posthog/warehouse/models/test/test_table.py b/posthog/warehouse/models/test/test_table.py
index b456c43fdfb61..7ccf9a14ad090 100644
--- a/posthog/warehouse/models/test/test_table.py
+++ b/posthog/warehouse/models/test/test_table.py
@@ -1,3 +1,4 @@
+from posthog.hogql.database.models import DateTimeDatabaseField, IntegerDatabaseField, StringDatabaseField
from posthog.test.base import BaseTest
from posthog.warehouse.models import DataWarehouseCredential, DataWarehouseTable
@@ -19,7 +20,7 @@ class TestTable(BaseTest):
# )
# table.get_columns()
- def test_hogql_definition(self):
+ def test_hogql_definition_old_style(self):
credential = DataWarehouseCredential.objects.create(access_key="test", access_secret="test", team=self.team)
table = DataWarehouseTable.objects.create(
name="bla",
@@ -39,6 +40,46 @@ def test_hogql_definition(self):
["id", "timestamp", "mrr", "offset"],
)
+ self.assertEqual(
+ list(table.hogql_definition().fields.values()),
+ [
+ StringDatabaseField(name="id"),
+ DateTimeDatabaseField(name="timestamp"),
+ IntegerDatabaseField(name="mrr"),
+ IntegerDatabaseField(name="offset"),
+ ],
+ )
+
+ def test_hogql_definition_new_style(self):
+ credential = DataWarehouseCredential.objects.create(access_key="test", access_secret="test", team=self.team)
+ table = DataWarehouseTable.objects.create(
+ name="bla",
+ url_pattern="https://databeach-hackathon.s3.amazonaws.com/tim_test/test_events6.pqt",
+ format=DataWarehouseTable.TableFormat.Parquet,
+ team=self.team,
+ columns={
+ "id": {"clickhouse": "String", "hogql": "StringDatabaseField"},
+ "timestamp": {"clickhouse": "DateTime64(3, 'UTC')", "hogql": "DateTimeDatabaseField"},
+ "mrr": {"clickhouse": "Nullable(Int64)", "hogql": "IntegerDatabaseField"},
+ "offset": {"clickhouse": "UInt32", "hogql": "IntegerDatabaseField"},
+ },
+ credential=credential,
+ )
+ self.assertEqual(
+ list(table.hogql_definition().fields.keys()),
+ ["id", "timestamp", "mrr", "offset"],
+ )
+
+ self.assertEqual(
+ list(table.hogql_definition().fields.values()),
+ [
+ StringDatabaseField(name="id"),
+ DateTimeDatabaseField(name="timestamp"),
+ IntegerDatabaseField(name="mrr"),
+ IntegerDatabaseField(name="offset"),
+ ],
+ )
+
def test_hogql_definition_tuple_patch(self):
credential = DataWarehouseCredential.objects.create(access_key="test", access_secret="test", team=self.team)
table = DataWarehouseTable.objects.create(
diff --git a/production.Dockerfile b/production.Dockerfile
index 9b71e97f34b69..6d43498a5350b 100644
--- a/production.Dockerfile
+++ b/production.Dockerfile
@@ -21,9 +21,9 @@
#
# ---------------------------------------------------------
#
-FROM node:18.12.1-bullseye-slim AS frontend-build
+FROM node:18.19.1-bullseye-slim AS frontend-build
WORKDIR /code
-SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+SHELL ["/bin/bash", "-e", "-o", "pipefail", "-c"]
COPY package.json pnpm-lock.yaml ./
COPY patches/ patches/
@@ -42,9 +42,9 @@ RUN pnpm build
#
# ---------------------------------------------------------
#
-FROM node:18.12.1-bullseye-slim AS plugin-server-build
+FROM node:18.19.1-bullseye-slim AS plugin-server-build
WORKDIR /code/plugin-server
-SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+SHELL ["/bin/bash", "-e", "-o", "pipefail", "-c"]
# Compile and install Node.js dependencies.
COPY ./plugin-server/package.json ./plugin-server/pnpm-lock.yaml ./plugin-server/tsconfig.json ./
@@ -85,7 +85,7 @@ RUN corepack enable && \
#
FROM python:3.10.10-slim-bullseye AS posthog-build
WORKDIR /code
-SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+SHELL ["/bin/bash", "-e", "-o", "pipefail", "-c"]
# Compile and install Python dependencies.
# We install those dependencies on a custom folder that we will
@@ -120,7 +120,7 @@ RUN SKIP_SERVICE_VERSION_REQUIREMENTS=1 SECRET_KEY='unsafe secret key for collec
#
FROM debian:bullseye-slim AS fetch-geoip-db
WORKDIR /code
-SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+SHELL ["/bin/bash", "-e", "-o", "pipefail", "-c"]
# Fetch the GeoLite2-City database that will be used for IP geolocation within Django.
RUN apt-get update && \
@@ -166,19 +166,19 @@ RUN set -ex \
&& CC_OPT="$(DEB_BUILD_MAINT_OPTIONS="hardening=+all,-pie" DEB_CFLAGS_MAINT_APPEND="-Wp,-D_FORTIFY_SOURCE=2 -fPIC" dpkg-buildflags --get CFLAGS)" \
&& LD_OPT="$(DEB_BUILD_MAINT_OPTIONS="hardening=+all,-pie" DEB_LDFLAGS_MAINT_APPEND="-Wl,--as-needed -pie" dpkg-buildflags --get LDFLAGS)" \
&& CONFIGURE_ARGS_MODULES="--prefix=/usr \
- --statedir=/var/lib/unit \
- --control=unix:/var/run/control.unit.sock \
- --runstatedir=/var/run \
- --pid=/var/run/unit.pid \
- --logdir=/var/log \
- --log=/var/log/unit.log \
- --tmpdir=/var/tmp \
- --user=unit \
- --group=unit \
- --openssl \
- --libdir=/usr/lib/$DEB_HOST_MULTIARCH" \
+ --statedir=/var/lib/unit \
+ --control=unix:/var/run/control.unit.sock \
+ --runstatedir=/var/run \
+ --pid=/var/run/unit.pid \
+ --logdir=/var/log \
+ --log=/var/log/unit.log \
+ --tmpdir=/var/tmp \
+ --user=unit \
+ --group=unit \
+ --openssl \
+ --libdir=/usr/lib/$DEB_HOST_MULTIARCH" \
&& CONFIGURE_ARGS="$CONFIGURE_ARGS_MODULES \
- --njs" \
+ --njs" \
&& make -j $NCPU -C pkg/contrib .njs \
&& export PKG_CONFIG_PATH=$(pwd)/pkg/contrib/njs/build \
&& ./configure $CONFIGURE_ARGS --cc-opt="$CC_OPT" --ld-opt="$LD_OPT" --modulesdir=/usr/lib/unit/debug-modules --debug \
@@ -200,8 +200,8 @@ RUN set -ex \
&& cd \
&& rm -rf /usr/src/unit \
&& for f in /usr/sbin/unitd /usr/lib/unit/modules/*.unit.so; do \
- ldd $f | awk '/=>/{print $(NF-1)}' | while read n; do dpkg-query -S $n; done | sed 's/^\([^:]\+\):.*$/\1/' | sort | uniq >> /requirements.apt; \
- done \
+ ldd $f | awk '/=>/{print $(NF-1)}' | while read n; do dpkg-query -S $n; done | sed 's/^\([^:]\+\):.*$/\1/' | sort | uniq >> /requirements.apt; \
+ done \
&& apt-mark showmanual | xargs apt-mark auto > /dev/null \
&& { [ -z "$savedAptMark" ] || apt-mark manual $savedAptMark; } \
&& /bin/true \
@@ -209,13 +209,13 @@ RUN set -ex \
&& mkdir -p /docker-entrypoint.d/ \
&& groupadd --gid 998 unit \
&& useradd \
- --uid 998 \
- --gid unit \
- --no-create-home \
- --home /nonexistent \
- --comment "unit user" \
- --shell /bin/false \
- unit \
+ --uid 998 \
+ --gid unit \
+ --no-create-home \
+ --home /nonexistent \
+ --comment "unit user" \
+ --shell /bin/false \
+ unit \
&& apt-get update \
&& apt-get --no-install-recommends --no-install-suggests -y install curl $(cat /requirements.apt) \
&& apt-get purge -y --auto-remove build-essential \
@@ -237,7 +237,7 @@ CMD ["unitd", "--no-daemon", "--control", "unix:/var/run/control.unit.sock"]
#
FROM unit-131-python-310
WORKDIR /code
-SHELL ["/bin/bash", "-o", "pipefail", "-c"]
+SHELL ["/bin/bash", "-e", "-o", "pipefail", "-c"]
ENV PYTHONUNBUFFERED 1
# Install OS runtime dependencies.
diff --git a/pyproject.toml b/pyproject.toml
index 689a4c32bf2a2..a4fffb70b1853 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -33,7 +33,6 @@ ignore = [
"E722",
"E731",
"F403",
- "F405",
"F541",
"F601",
]
diff --git a/requirements-dev.txt b/requirements-dev.txt
index 6b4db6f518eb6..7fed5d4cc5008 100644
--- a/requirements-dev.txt
+++ b/requirements-dev.txt
@@ -59,7 +59,9 @@ coreapi==2.3.3
coreschema==0.0.4
# via coreapi
coverage[toml]==5.5
- # via pytest-cov
+ # via
+ # coverage
+ # pytest-cov
datamodel-code-generator==0.25.2
# via -r requirements-dev.in
django==4.1.13
@@ -90,7 +92,9 @@ exceptiongroup==1.2.0
faker==17.5.0
# via -r requirements-dev.in
fakeredis[lua]==2.11.0
- # via -r requirements-dev.in
+ # via
+ # -r requirements-dev.in
+ # fakeredis
flaky==3.7.0
# via -r requirements-dev.in
freezegun==1.2.2
@@ -156,8 +160,10 @@ pathspec==0.9.0
# via black
pip-tools==7.3.0
# via -r requirements-dev.in
-platformdirs==2.5.2
- # via black
+platformdirs==3.11.0
+ # via
+ # -c requirements.txt
+ # black
pluggy==0.13.1
# via pytest
pprintpp==0.4.0
@@ -166,6 +172,7 @@ pydantic[email]==2.5.3
# via
# -c requirements.txt
# datamodel-code-generator
+ # pydantic
pydantic-core==2.14.6
# via
# -c requirements.txt
diff --git a/requirements.in b/requirements.in
index c4ba56022b103..800abfa76f036 100644
--- a/requirements.in
+++ b/requirements.in
@@ -83,7 +83,7 @@ sentry-sdk==1.14.0
semantic_version==2.8.5
scikit-learn==1.4.0
slack_sdk==3.17.1
-snowflake-connector-python==3.0.4
+snowflake-connector-python==3.6.0
social-auth-app-django==5.0.0
social-auth-core==4.3.0
statshog==1.0.6
diff --git a/requirements.txt b/requirements.txt
index eee1ff8f936a2..a08984a29ff22 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -38,9 +38,7 @@ anyio==4.2.0
asgiref==3.7.2
# via django
asn1crypto==1.5.1
- # via
- # oscrypto
- # snowflake-connector-python
+ # via snowflake-connector-python
astunparse==1.6.3
# via dlt
async-generator==1.10
@@ -400,8 +398,6 @@ orjson==3.9.15
# via
# -r requirements.in
# dlt
-oscrypto==1.3.0
- # via snowflake-connector-python
outcome==1.1.0
# via trio
packaging==23.1
@@ -430,6 +426,8 @@ pickleshare==0.7.5
# via -r requirements.in
pillow==10.2.0
# via -r requirements.in
+platformdirs==3.11.0
+ # via snowflake-connector-python
ply==3.11
# via jsonpath-ng
posthoganalytics==3.5.0
@@ -468,8 +466,6 @@ pyasn1-modules==0.3.0
# via google-auth
pycparser==2.20
# via cffi
-pycryptodomex==3.18.0
- # via snowflake-connector-python
pydantic==2.5.3
# via
# -r requirements.in
@@ -614,7 +610,7 @@ sniffio==1.2.0
# httpx
# openai
# trio
-snowflake-connector-python==3.0.4
+snowflake-connector-python==3.6.0
# via -r requirements.in
social-auth-app-django==5.0.0
# via -r requirements.in
@@ -653,7 +649,9 @@ tiktoken==0.6.0
token-bucket==0.3.0
# via -r requirements.in
tomlkit==0.12.3
- # via dlt
+ # via
+ # dlt
+ # snowflake-connector-python
toronado==0.1.0
# via -r requirements.in
tqdm==4.64.1
@@ -709,7 +707,6 @@ urllib3[secure,socks]==1.26.18
# requests
# selenium
# sentry-sdk
- # snowflake-connector-python
# urllib3
urllib3-secure-extra==0.1.0
# via urllib3
diff --git a/unit.json b/unit.json
index 0b8de8774edf1..3982169eec719 100644
--- a/unit.json
+++ b/unit.json
@@ -6,7 +6,7 @@
},
"listeners": {
"*:8000": {
- "pass": "applications/posthog"
+ "pass": "routes/posthog"
},
"*:8001": {
"pass": "routes/metrics"
@@ -16,6 +16,25 @@
}
},
"routes": {
+ "posthog": [
+ {
+ "match": {
+ "uri": [
+ "/_health",
+ "/_readyz",
+ "/_livez"
+ ]
+ },
+ "action": {
+ "pass": "applications/posthog-health"
+ }
+ },
+ {
+ "action": {
+ "pass": "applications/posthog"
+ }
+ }
+ ],
"metrics": [
{
"match": {
@@ -38,6 +57,17 @@
]
},
"applications": {
+ "posthog-health": {
+ "type": "python 3.10",
+ "processes": 1,
+ "working_directory": "/code",
+ "path": ".",
+ "module": "posthog.wsgi",
+ "user": "nobody",
+ "limits": {
+ "requests": 5000
+ }
+ },
"posthog": {
"type": "python 3.10",
"processes": 4,