diff --git a/cypress/e2e/auth.cy.ts b/cypress/e2e/auth.cy.ts index dd514f1121c23..5355f3011b12e 100644 --- a/cypress/e2e/auth.cy.ts +++ b/cypress/e2e/auth.cy.ts @@ -84,7 +84,7 @@ describe('Auth', () => { cy.visit('/signup') cy.location('pathname').should('eq', '/project/1') }) - + it('Logout in another tab results in logout in the current tab too', () => { cy.window().then(async (win) => { // Hit /logout *in the background* by using fetch() diff --git a/cypress/productAnalytics/index.ts b/cypress/productAnalytics/index.ts index cf94691bf657b..b523a4e970efb 100644 --- a/cypress/productAnalytics/index.ts +++ b/cypress/productAnalytics/index.ts @@ -209,14 +209,14 @@ export const dashboard = { cy.get('[data-attr="prop-val-0"]').click({ force: true }) cy.get('.PropertyFilterButton').should('have.length', 1) }, - addPropertyFilter(type: string = "Browser", value: string = "Chrome"): void { + addPropertyFilter(type: string = 'Browser', value: string = 'Chrome'): void { cy.get('.PropertyFilterButton').should('have.length', 0) cy.get('[data-attr="property-filter-0"]').click() - cy.get('[data-attr="taxonomic-filter-searchfield"]').click().type("Browser").wait(1000) + cy.get('[data-attr="taxonomic-filter-searchfield"]').click().type('Browser').wait(1000) cy.get('[data-attr="prop-filter-event_properties-0"]').click({ force: true }) cy.get('.ant-select-selector').type(value) cy.get('.ant-select-item-option-content').click({ force: true }) - } + }, } export function createInsight(insightName: string): void { diff --git a/frontend/__snapshots__/scenes-app-insights--user-paths-edit--light--webkit.png b/frontend/__snapshots__/scenes-app-insights--user-paths-edit--light--webkit.png index 93d52b7379a03..b7e136c7058b1 100644 Binary files a/frontend/__snapshots__/scenes-app-insights--user-paths-edit--light--webkit.png and b/frontend/__snapshots__/scenes-app-insights--user-paths-edit--light--webkit.png differ diff --git a/frontend/src/scenes/data-warehouse/ViewLinkModal.tsx b/frontend/src/scenes/data-warehouse/ViewLinkModal.tsx index b34b566b7411e..548c9c5695ef1 100644 --- a/frontend/src/scenes/data-warehouse/ViewLinkModal.tsx +++ b/frontend/src/scenes/data-warehouse/ViewLinkModal.tsx @@ -1,11 +1,21 @@ import './ViewLinkModal.scss' import { IconTrash } from '@posthog/icons' -import { LemonButton, LemonDivider, LemonInput, LemonModal, LemonSelect, LemonTag } from '@posthog/lemon-ui' +import { + LemonButton, + LemonDivider, + LemonDropdown, + LemonInput, + LemonModal, + LemonSelect, + LemonTag, +} from '@posthog/lemon-ui' import { useActions, useValues } from 'kea' import { Field, Form } from 'kea-forms' import { CodeSnippet, Language } from 'lib/components/CodeSnippet' +import { HogQLEditor } from 'lib/components/HogQLEditor/HogQLEditor' import { IconSwapHoriz } from 'lib/lemon-ui/icons' +import { useState } from 'react' import { viewLinkLogic } from 'scenes/data-warehouse/viewLinkLogic' import { DatabaseSchemaQueryResponseField } from '~/queries/schema' @@ -44,8 +54,19 @@ export function ViewLinkForm(): JSX.Element { error, fieldName, isNewJoin, + selectedSourceKey, + selectedJoiningKey, + sourceIsUsingHogQLExpression, + joiningIsUsingHogQLExpression, } = useValues(viewLinkLogic) - const { selectJoiningTable, toggleJoinTableModal, selectSourceTable, setFieldName } = useActions(viewLinkLogic) + const { + selectJoiningTable, + toggleJoinTableModal, + selectSourceTable, + setFieldName, + selectSourceKey, + selectJoiningKey, + } = useActions(viewLinkLogic) return (
@@ -82,12 +103,22 @@ export function ViewLinkForm(): JSX.Element {
Source Table Key - + <> + HogQL Expression }]} + placeholder="Select a key" + /> + {sourceIsUsingHogQLExpression && ( + + )} +
@@ -96,12 +127,22 @@ export function ViewLinkForm(): JSX.Element {
Joining Table Key - + <> + HogQL Expression }]} + placeholder="Select a key" + /> + {joiningIsUsingHogQLExpression && ( + + )} +
@@ -151,6 +192,47 @@ export function ViewLinkForm(): JSX.Element { ) } +const HogQLDropdown = ({ + hogQLValue, + onHogQLValueChange, +}: { + hogQLValue: string + onHogQLValueChange: (hogQLValue: string) => void +}): JSX.Element => { + const [isHogQLDropdownVisible, setIsHogQLDropdownVisible] = useState(false) + + return ( +
+ setIsHogQLDropdownVisible(false)} + overlay={ + // eslint-disable-next-line react/forbid-dom-props +
+ { + onHogQLValueChange(currentValue) + setIsHogQLDropdownVisible(false) + }} + /> +
+ } + > + setIsHogQLDropdownVisible(!isHogQLDropdownVisible)} + > + {hogQLValue} + +
+
+ ) +} + interface ViewLinkDeleteButtonProps { table: string column: string diff --git a/frontend/src/scenes/data-warehouse/viewLinkLogic.tsx b/frontend/src/scenes/data-warehouse/viewLinkLogic.tsx index ac23c43e7ff53..34e63deaf130d 100644 --- a/frontend/src/scenes/data-warehouse/viewLinkLogic.tsx +++ b/frontend/src/scenes/data-warehouse/viewLinkLogic.tsx @@ -15,9 +15,7 @@ import { ViewLinkKeyLabel } from './ViewLinkModal' const NEW_VIEW_LINK: DataWarehouseViewLink = { id: 'new', source_table_name: undefined, - source_table_key: undefined, joining_table_name: undefined, - joining_table_key: undefined, field_name: undefined, } @@ -37,9 +35,11 @@ export const viewLinkLogic = kea([ ], actions: [databaseTableListLogic, ['loadDatabase'], dataWarehouseJoinsLogic, ['loadJoins']], }), - actions({ + actions(({ values }) => ({ selectJoiningTable: (selectedTableName: string) => ({ selectedTableName }), selectSourceTable: (selectedTableName: string) => ({ selectedTableName }), + selectSourceKey: (selectedKey: string) => ({ selectedKey, sourceTable: values.selectedSourceTable }), + selectJoiningKey: (selectedKey: string) => ({ selectedKey, joiningTable: values.selectedJoiningTable }), toggleJoinTableModal: true, toggleEditJoinModal: (join: DataWarehouseViewLink) => ({ join }), toggleNewJoinModal: true, @@ -48,7 +48,7 @@ export const viewLinkLogic = kea([ setError: (error: string) => ({ error }), setFieldName: (fieldName: string) => ({ fieldName }), clearModalFields: true, - }), + })), reducers({ joinToEdit: [ null as DataWarehouseViewLink | null, @@ -84,6 +84,20 @@ export const viewLinkLogic = kea([ clearModalFields: () => null, }, ], + selectedSourceKey: [ + null as string | null, + { + selectSourceKey: (_, { selectedKey }) => selectedKey, + toggleEditJoinModal: (_, { join }) => join.source_table_key ?? null, + }, + ], + selectedJoiningKey: [ + null as string | null, + { + selectJoiningKey: (_, { selectedKey }) => selectedKey, + toggleEditJoinModal: (_, { join }) => join.joining_table_key ?? null, + }, + ], fieldName: [ '' as string, { @@ -112,44 +126,21 @@ export const viewLinkLogic = kea([ forms(({ actions, values }) => ({ viewLink: { defaults: NEW_VIEW_LINK, - errors: ({ source_table_name, joining_table_name, joining_table_key, source_table_key }) => { - let joining_table_key_err: string | undefined = undefined - let source_table_key_err: string | undefined = undefined - - if (!joining_table_key) { - joining_table_key_err = 'Must select a join key' - } - - if (!source_table_key) { - source_table_key_err = 'Must select a join key' - } - - if ( - joining_table_key && - source_table_key && - values.selectedJoiningTable?.columns?.find((n) => n.key == joining_table_key)?.type !== - values.selectedSourceTable?.columns?.find((n) => n.key == source_table_key)?.type - ) { - joining_table_key_err = 'Join key types must match' - source_table_key_err = 'Join key types must match' - } - + errors: ({ source_table_name, joining_table_name }) => { return { source_table_name: values.isNewJoin && !source_table_name ? 'Must select a table' : undefined, joining_table_name: !joining_table_name ? 'Must select a table' : undefined, - source_table_key: source_table_key_err, - joining_table_key: joining_table_key_err, } }, - submit: async ({ joining_table_name, source_table_name, source_table_key, joining_table_key }) => { + submit: async ({ joining_table_name, source_table_name }) => { if (values.joinToEdit?.id && values.selectedSourceTable) { // Edit join try { await api.dataWarehouseViewLinks.update(values.joinToEdit.id, { source_table_name: source_table_name ?? values.selectedSourceTable.name, - source_table_key, + source_table_key: values.selectedSourceKey ?? undefined, joining_table_name, - joining_table_key, + joining_table_key: values.selectedJoiningKey ?? undefined, field_name: values.fieldName, }) @@ -164,9 +155,9 @@ export const viewLinkLogic = kea([ try { await api.dataWarehouseViewLinks.create({ source_table_name: source_table_name ?? values.selectedSourceTable.name, - source_table_key, + source_table_key: values.selectedSourceKey ?? undefined, joining_table_name, - joining_table_key, + joining_table_key: values.selectedJoiningKey ?? undefined, field_name: values.fieldName, }) @@ -222,6 +213,26 @@ export const viewLinkLogic = kea([ (s) => [s.selectedJoiningTableName, s.tables], (selectedJoiningTableName, tables) => tables.find((row) => row.name === selectedJoiningTableName), ], + sourceIsUsingHogQLExpression: [ + (s) => [s.selectedSourceKey, s.selectedSourceTable], + (sourceKey, sourceTable) => { + if (sourceKey === null) { + return false + } + const column = sourceTable?.columns.find((n) => n.key == sourceKey) + return !column + }, + ], + joiningIsUsingHogQLExpression: [ + (s) => [s.selectedJoiningKey, s.selectedJoiningTable], + (joiningKey, joiningTable) => { + if (joiningKey === null) { + return false + } + const column = joiningTable?.columns.find((n) => n.key == joiningKey) + return !column + }, + ], tableOptions: [ (s) => [s.tables], (tables) => diff --git a/mypy-baseline.txt b/mypy-baseline.txt index d4beb524d4186..1c1de99e0bdaf 100644 --- a/mypy-baseline.txt +++ b/mypy-baseline.txt @@ -4,6 +4,7 @@ posthog/temporal/common/utils.py:0: error: Argument 2 to "__get__" of "classmeth posthog/hogql/database/argmax.py:0: error: Argument "chain" to "Field" has incompatible type "list[str]"; expected "list[str | int]" [arg-type] posthog/hogql/database/argmax.py:0: note: "List" is invariant -- see https://mypy.readthedocs.io/en/stable/common_issues.html#variance posthog/hogql/database/argmax.py:0: note: Consider using "Sequence" instead, which is covariant +posthog/hogql/database/argmax.py:0: error: Unsupported operand types for + ("list[str]" and "list[str | int]") [operator] posthog/hogql/database/schema/numbers.py:0: error: Incompatible types in assignment (expression has type "dict[str, IntegerDatabaseField]", variable has type "dict[str, FieldOrTable]") [assignment] posthog/hogql/database/schema/numbers.py:0: note: "Dict" is invariant -- see https://mypy.readthedocs.io/en/stable/common_issues.html#variance posthog/hogql/database/schema/numbers.py:0: note: Consider using "Mapping" instead, which is covariant in the value type @@ -55,9 +56,11 @@ posthog/hogql/visitor.py:0: error: Argument 1 to "visit" of "Visitor" has incomp posthog/hogql/database/schema/log_entries.py:0: error: Argument "chain" to "Field" has incompatible type "list[str]"; expected "list[str | int]" [arg-type] posthog/hogql/database/schema/log_entries.py:0: note: "List" is invariant -- see https://mypy.readthedocs.io/en/stable/common_issues.html#variance posthog/hogql/database/schema/log_entries.py:0: note: Consider using "Sequence" instead, which is covariant +posthog/hogql/database/schema/log_entries.py:0: error: Unsupported operand types for + ("list[str]" and "list[str | int]") [operator] posthog/hogql/database/schema/log_entries.py:0: error: Argument "chain" to "Field" has incompatible type "list[str]"; expected "list[str | int]" [arg-type] posthog/hogql/database/schema/log_entries.py:0: note: "List" is invariant -- see https://mypy.readthedocs.io/en/stable/common_issues.html#variance posthog/hogql/database/schema/log_entries.py:0: note: Consider using "Sequence" instead, which is covariant +posthog/hogql/database/schema/log_entries.py:0: error: Unsupported operand types for + ("list[str]" and "list[str | int]") [operator] posthog/hogql/database/schema/groups.py:0: error: Incompatible types in assignment (expression has type "dict[str, DatabaseField]", variable has type "dict[str, FieldOrTable]") [assignment] posthog/hogql/database/schema/groups.py:0: note: "Dict" is invariant -- see https://mypy.readthedocs.io/en/stable/common_issues.html#variance posthog/hogql/database/schema/groups.py:0: note: Consider using "Mapping" instead, which is covariant in the value type @@ -67,24 +70,25 @@ posthog/hogql/database/schema/groups.py:0: note: Consider using "Mapping" instea posthog/hogql/resolver_utils.py:0: error: Argument 1 to "lookup_field_by_name" has incompatible type "SelectQueryType | SelectUnionQueryType"; expected "SelectQueryType" [arg-type] posthog/hogql/database/schema/persons.py:0: error: Item "SelectUnionQuery" of "SelectQuery | SelectUnionQuery" has no attribute "settings" [union-attr] posthog/hogql/database/schema/persons.py:0: error: Item "SelectUnionQuery" of "SelectQuery | SelectUnionQuery" has no attribute "select" [union-attr] -posthog/hogql/database/schema/persons.py:0: error: Argument "chain" to "Field" has incompatible type "list[str]"; expected "list[str | int]" [arg-type] -posthog/hogql/database/schema/persons.py:0: note: "List" is invariant -- see https://mypy.readthedocs.io/en/stable/common_issues.html#variance -posthog/hogql/database/schema/persons.py:0: note: Consider using "Sequence" instead, which is covariant posthog/hogql/parser.py:0: error: Key expression in dictionary comprehension has incompatible type "str"; expected type "Literal['expr', 'order_expr', 'select']" [misc] posthog/hogql/parser.py:0: error: Statement is unreachable [unreachable] posthog/hogql/parser.py:0: error: Item "None" of "list[Expr] | None" has no attribute "__iter__" (not iterable) [union-attr] posthog/hogql/parser.py:0: error: "None" has no attribute "text" [attr-defined] posthog/hogql/parser.py:0: error: "None" has no attribute "text" [attr-defined] posthog/hogql/parser.py:0: error: Statement is unreachable [unreachable] +posthog/hogql/database/schema/person_distinct_ids.py:0: error: Argument 1 to "select_from_person_distinct_ids_table" has incompatible type "dict[str, list[str]]"; expected "dict[str, list[str | int]]" [arg-type] posthog/hogql/database/schema/cohort_people.py:0: error: Argument "chain" to "Field" has incompatible type "list[str]"; expected "list[str | int]" [arg-type] posthog/hogql/database/schema/cohort_people.py:0: note: "List" is invariant -- see https://mypy.readthedocs.io/en/stable/common_issues.html#variance posthog/hogql/database/schema/cohort_people.py:0: note: Consider using "Sequence" instead, which is covariant +posthog/hogql/database/schema/cohort_people.py:0: error: Unsupported operand types for + ("list[str]" and "list[str | int]") [operator] posthog/hogql/database/schema/session_replay_events.py:0: error: Argument "chain" to "Field" has incompatible type "list[str]"; expected "list[str | int]" [arg-type] posthog/hogql/database/schema/session_replay_events.py:0: note: "List" is invariant -- see https://mypy.readthedocs.io/en/stable/common_issues.html#variance posthog/hogql/database/schema/session_replay_events.py:0: note: Consider using "Sequence" instead, which is covariant +posthog/hogql/database/schema/session_replay_events.py:0: error: Unsupported operand types for + ("list[str]" and "list[str | int]") [operator] posthog/hogql/database/schema/session_replay_events.py:0: error: Argument "chain" to "Field" has incompatible type "list[str]"; expected "list[str | int]" [arg-type] posthog/hogql/database/schema/session_replay_events.py:0: note: "List" is invariant -- see https://mypy.readthedocs.io/en/stable/common_issues.html#variance posthog/hogql/database/schema/session_replay_events.py:0: note: Consider using "Sequence" instead, which is covariant +posthog/hogql/database/schema/session_replay_events.py:0: error: Unsupported operand types for + ("list[str]" and "list[str | int]") [operator] posthog/hogql/database/schema/event_sessions.py:0: error: Statement is unreachable [unreachable] posthog/plugins/utils.py:0: error: Subclass of "str" and "bytes" cannot exist: would have incompatible method signatures [unreachable] posthog/plugins/utils.py:0: error: Statement is unreachable [unreachable] @@ -243,27 +247,16 @@ posthog/temporal/data_imports/external_data_job.py:0: error: Argument 2 to "Data posthog/hogql/transforms/lazy_tables.py:0: error: Incompatible default for argument "context" (default has type "None", argument has type "HogQLContext") [assignment] posthog/hogql/transforms/lazy_tables.py:0: note: PEP 484 prohibits implicit Optional. Accordingly, mypy has changed its default to no_implicit_optional=True posthog/hogql/transforms/lazy_tables.py:0: note: Use https://github.com/hauntsaninja/no_implicit_optional to automatically upgrade your codebase +posthog/hogql/transforms/lazy_tables.py:0: error: Incompatible types in assignment (expression has type "dict[Never, Never]", variable has type "list[ConstraintOverride]") [assignment] posthog/hogql/transforms/lazy_tables.py:0: error: Incompatible default for argument "context" (default has type "None", argument has type "HogQLContext") [assignment] posthog/hogql/transforms/lazy_tables.py:0: note: PEP 484 prohibits implicit Optional. Accordingly, mypy has changed its default to no_implicit_optional=True posthog/hogql/transforms/lazy_tables.py:0: note: Use https://github.com/hauntsaninja/no_implicit_optional to automatically upgrade your codebase -posthog/hogql/transforms/lazy_tables.py:0: error: Argument 1 to "append" of "list" has incompatible type "list[FieldType]"; expected "list[FieldType | PropertyType]" [arg-type] -posthog/hogql/transforms/lazy_tables.py:0: note: "List" is invariant -- see https://mypy.readthedocs.io/en/stable/common_issues.html#variance -posthog/hogql/transforms/lazy_tables.py:0: note: Consider using "Sequence" instead, which is covariant -posthog/hogql/transforms/lazy_tables.py:0: error: Item "None" of "SelectQuery | SelectUnionQuery | Field | None" has no attribute "type" [union-attr] -posthog/hogql/transforms/lazy_tables.py:0: error: Item "None" of "SelectQuery | SelectUnionQuery | Field | None" has no attribute "type" [union-attr] -posthog/hogql/transforms/lazy_tables.py:0: error: Statement is unreachable [unreachable] -posthog/hogql/transforms/lazy_tables.py:0: error: Item "None" of "SelectQuery | SelectUnionQuery | Field | None" has no attribute "type" [union-attr] -posthog/hogql/transforms/lazy_tables.py:0: error: Item "None" of "SelectQuery | SelectUnionQuery | Field | None" has no attribute "type" [union-attr] -posthog/hogql/transforms/lazy_tables.py:0: error: Incompatible types in assignment (expression has type "PropertyType | FieldType", variable has type "FieldType") [assignment] -posthog/hogql/transforms/lazy_tables.py:0: error: Statement is unreachable [unreachable] -posthog/hogql/transforms/lazy_tables.py:0: error: Statement is unreachable [unreachable] -posthog/hogql/transforms/lazy_tables.py:0: error: Statement is unreachable [unreachable] -posthog/hogql/transforms/lazy_tables.py:0: error: List item 0 has incompatible type "SelectQueryType | None"; expected "SelectQueryType" [list-item] -posthog/hogql/transforms/lazy_tables.py:0: error: Item "None" of "SelectQuery | SelectUnionQuery | Field | None" has no attribute "type" [union-attr] -posthog/hogql/transforms/lazy_tables.py:0: error: List item 0 has incompatible type "SelectQueryType | None"; expected "SelectQueryType" [list-item] -posthog/hogql/transforms/lazy_tables.py:0: error: Incompatible types in assignment (expression has type "BaseTableType | SelectUnionQueryType | SelectQueryType | SelectQueryAliasType | None", target has type "BaseTableType | SelectUnionQueryType | SelectQueryType | SelectQueryAliasType") [assignment] -posthog/hogql/transforms/lazy_tables.py:0: error: Statement is unreachable [unreachable] +posthog/hogql/transforms/lazy_tables.py:0: error: Non-overlapping equality check (left operand type: "TableType", right operand type: "LazyTableType") [comparison-overlap] +posthog/hogql/transforms/lazy_tables.py:0: error: Non-overlapping equality check (left operand type: "TableType", right operand type: "LazyTableType") [comparison-overlap] +posthog/hogql/transforms/lazy_tables.py:0: error: Name "chain" already defined on line 0 [no-redef] +posthog/hogql/transforms/lazy_tables.py:0: error: Subclass of "TableType" and "LazyTableType" cannot exist: would have incompatible method signatures [unreachable] posthog/hogql/transforms/lazy_tables.py:0: error: Statement is unreachable [unreachable] +posthog/hogql/transforms/lazy_tables.py:0: error: Incompatible types in assignment (expression has type "BaseTableType | SelectUnionQueryType | SelectQueryType | SelectQueryAliasType", variable has type "SelectQueryAliasType | None") [assignment] posthog/hogql/transforms/in_cohort.py:0: error: Incompatible default for argument "context" (default has type "None", argument has type "HogQLContext") [assignment] posthog/hogql/transforms/in_cohort.py:0: note: PEP 484 prohibits implicit Optional. Accordingly, mypy has changed its default to no_implicit_optional=True posthog/hogql/transforms/in_cohort.py:0: note: Use https://github.com/hauntsaninja/no_implicit_optional to automatically upgrade your codebase diff --git a/posthog/api/test/__snapshots__/test_query.ambr b/posthog/api/test/__snapshots__/test_query.ambr index 34651d435fb9b..e52c9362b4398 100644 --- a/posthog/api/test/__snapshots__/test_query.ambr +++ b/posthog/api/test/__snapshots__/test_query.ambr @@ -375,7 +375,8 @@ concat(ifNull(toString(events.event), ''), ' ', ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'key'), ''), 'null'), '^"|"$', '')), '')) FROM events INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS events__pdi___person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 2) @@ -390,7 +391,7 @@ FROM person WHERE equals(person.team_id, 2) GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id) + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.events__pdi___person_id, events__pdi__person.id) WHERE and(equals(events.team_id, 2), ifNull(equals(events__pdi__person.properties___email, 'tom@posthog.com'), 0), less(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-01-10 12:14:05.000000', 6, 'UTC')), greater(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-01-09 12:14:00.000000', 6, 'UTC'))) ORDER BY events.event ASC LIMIT 101 @@ -409,7 +410,8 @@ concat(ifNull(toString(events.event), ''), ' ', ifNull(toString(nullIf(nullIf(events.mat_key, ''), 'null')), '')) FROM events INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS events__pdi___person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 2) @@ -424,7 +426,7 @@ FROM person WHERE equals(person.team_id, 2) GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id) + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.events__pdi___person_id, events__pdi__person.id) WHERE and(equals(events.team_id, 2), ifNull(equals(events__pdi__person.properties___email, 'tom@posthog.com'), 0), less(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-01-10 12:14:05.000000', 6, 'UTC')), greater(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-01-09 12:14:00.000000', 6, 'UTC'))) ORDER BY events.event ASC LIMIT 101 diff --git a/posthog/hogql/database/argmax.py b/posthog/hogql/database/argmax.py index 0302ac14ddb26..c6e479db07951 100644 --- a/posthog/hogql/database/argmax.py +++ b/posthog/hogql/database/argmax.py @@ -3,7 +3,7 @@ def argmax_select( table_name: str, - select_fields: Dict[str, List[str]], + select_fields: Dict[str, List[str | int]], group_fields: List[str], argmax_field: str, deleted_field: Optional[str] = None, diff --git a/posthog/hogql/database/database.py b/posthog/hogql/database/database.py index 3ed88e35555f8..aec1800c71eb1 100644 --- a/posthog/hogql/database/database.py +++ b/posthog/hogql/database/database.py @@ -1,8 +1,8 @@ from typing import TYPE_CHECKING, Any, ClassVar, Dict, List, Literal, Optional, TypedDict from zoneinfo import ZoneInfo, ZoneInfoNotFoundError from pydantic import ConfigDict, BaseModel +from posthog.hogql import ast from posthog.hogql.context import HogQLContext - from posthog.hogql.database.models import ( FieldTraverser, StringDatabaseField, @@ -167,7 +167,7 @@ def create_hogql_database( elif modifiers.personsOnEventsMode == PersonsOnEventsMode.v2_enabled: database.events.fields["event_person_id"] = StringDatabaseField(name="person_id") database.events.fields["override"] = LazyJoin( - from_field="event_person_id", + from_field=["event_person_id"], join_table=PersonOverridesTable(), join_function=join_with_person_overrides_table, ) @@ -203,8 +203,19 @@ def create_hogql_database( source_table = database.get_table(join.source_table_name) joining_table = database.get_table(join.joining_table_name) - source_table.fields[join.joining_table_name] = LazyJoin( - from_field=join.joining_table_key, + field = parse_expr(join.source_table_key) + if not isinstance(field, ast.Field): + raise HogQLException("Data Warehouse Join HogQL expression should be a Field node") + from_field = field.chain + + field = parse_expr(join.joining_table_key) + if not isinstance(field, ast.Field): + raise HogQLException("Data Warehouse Join HogQL expression should be a Field node") + to_field = field.chain + + source_table.fields[join.field_name] = LazyJoin( + from_field=from_field, + to_field=to_field, join_table=joining_table, join_function=join.join_function, ) diff --git a/posthog/hogql/database/models.py b/posthog/hogql/database/models.py index 1aa7beeede0da..e95a26614bed8 100644 --- a/posthog/hogql/database/models.py +++ b/posthog/hogql/database/models.py @@ -114,7 +114,8 @@ class LazyJoin(FieldOrTable): join_function: Callable[[str, str, Dict[str, Any], "HogQLContext", "SelectQuery"], Any] join_table: Table | str - from_field: str + from_field: List[str | int] + to_field: Optional[List[str | int]] = None def resolve_table(self, context: "HogQLContext") -> Table: if isinstance(self.join_table, Table): @@ -133,7 +134,7 @@ class LazyTable(Table): model_config = ConfigDict(extra="forbid") - def lazy_select(self, requested_fields: Dict[str, List[str]], modifiers: HogQLQueryModifiers) -> Any: + def lazy_select(self, requested_fields: Dict[str, List[str | int]], modifiers: HogQLQueryModifiers) -> Any: raise NotImplementedException("LazyTable.lazy_select not overridden") diff --git a/posthog/hogql/database/schema/cohort_people.py b/posthog/hogql/database/schema/cohort_people.py index da11fc2fcafea..72080419b7355 100644 --- a/posthog/hogql/database/schema/cohort_people.py +++ b/posthog/hogql/database/schema/cohort_people.py @@ -1,4 +1,4 @@ -from typing import Dict, Any, List +from typing import Dict, List from posthog.hogql.database.models import ( StringDatabaseField, @@ -16,14 +16,14 @@ "cohort_id": IntegerDatabaseField(name="cohort_id"), "team_id": IntegerDatabaseField(name="team_id"), "person": LazyJoin( - from_field="person_id", + from_field=["person_id"], join_table="persons", join_function=join_with_persons_table, ), } -def select_from_cohort_people_table(requested_fields: Dict[str, List[str]]): +def select_from_cohort_people_table(requested_fields: Dict[str, List[str | int]]): from posthog.hogql import ast table_name = "raw_cohort_people" @@ -34,12 +34,14 @@ def select_from_cohort_people_table(requested_fields: Dict[str, List[str]]): "cohort_id": ["cohort_id"], **requested_fields, } - fields: List[ast.Expr] = [ast.Field(chain=[table_name] + chain) for name, chain in requested_fields.items()] + fields: List[ast.Expr] = [ + ast.Alias(alias=name, expr=ast.Field(chain=[table_name] + chain)) for name, chain in requested_fields.items() + ] return ast.SelectQuery( select=fields, select_from=ast.JoinExpr(table=ast.Field(chain=[table_name])), - group_by=fields, + group_by=[ast.Field(chain=[name]) for name, chain in requested_fields.items()], having=ast.CompareOperation( op=ast.CompareOperationOp.Gt, left=ast.Call(name="sum", args=[ast.Field(chain=[table_name, "sign"])]), @@ -65,7 +67,7 @@ def to_printed_hogql(self): class CohortPeople(LazyTable): fields: Dict[str, FieldOrTable] = COHORT_PEOPLE_FIELDS - def lazy_select(self, requested_fields: Dict[str, Any], modifiers: HogQLQueryModifiers): + def lazy_select(self, requested_fields: Dict[str, List[str | int]], modifiers: HogQLQueryModifiers): return select_from_cohort_people_table(requested_fields) def to_printed_clickhouse(self, context): diff --git a/posthog/hogql/database/schema/events.py b/posthog/hogql/database/schema/events.py index 5a4a7f132af80..825393654f127 100644 --- a/posthog/hogql/database/schema/events.py +++ b/posthog/hogql/database/schema/events.py @@ -70,7 +70,7 @@ class EventsTable(Table): "$window_id": StringDatabaseField(name="$window_id"), # Lazy table that adds a join to the persons table "pdi": LazyJoin( - from_field="distinct_id", + from_field=["distinct_id"], join_table=PersonDistinctIdsTable(), join_function=join_with_person_distinct_ids_table, ), @@ -86,36 +86,36 @@ class EventsTable(Table): "person_id": FieldTraverser(chain=["pdi", "person_id"]), "$group_0": StringDatabaseField(name="$group_0"), "group_0": LazyJoin( - from_field="$group_0", + from_field=["$group_0"], join_table=GroupsTable(), join_function=join_with_group_n_table(0), ), "$group_1": StringDatabaseField(name="$group_1"), "group_1": LazyJoin( - from_field="$group_1", + from_field=["$group_1"], join_table=GroupsTable(), join_function=join_with_group_n_table(1), ), "$group_2": StringDatabaseField(name="$group_2"), "group_2": LazyJoin( - from_field="$group_2", + from_field=["$group_2"], join_table=GroupsTable(), join_function=join_with_group_n_table(2), ), "$group_3": StringDatabaseField(name="$group_3"), "group_3": LazyJoin( - from_field="$group_3", + from_field=["$group_3"], join_table=GroupsTable(), join_function=join_with_group_n_table(3), ), "$group_4": StringDatabaseField(name="$group_4"), "group_4": LazyJoin( - from_field="$group_4", + from_field=["$group_4"], join_table=GroupsTable(), join_function=join_with_group_n_table(4), ), "session": LazyJoin( - from_field="$session_id", + from_field=["$session_id"], join_table=EventsSessionSubTable(), join_function=join_with_events_table_session_duration, ), diff --git a/posthog/hogql/database/schema/groups.py b/posthog/hogql/database/schema/groups.py index 39382b246349b..bb237d68e8070 100644 --- a/posthog/hogql/database/schema/groups.py +++ b/posthog/hogql/database/schema/groups.py @@ -25,7 +25,7 @@ } -def select_from_groups_table(requested_fields: Dict[str, List[str]]): +def select_from_groups_table(requested_fields: Dict[str, List[str | int]]): return argmax_select( table_name="raw_groups", select_fields=requested_fields, @@ -83,7 +83,7 @@ def to_printed_hogql(self): class GroupsTable(LazyTable): fields: Dict[str, FieldOrTable] = GROUPS_TABLE_FIELDS - def lazy_select(self, requested_fields: Dict[str, List[str]], modifiers: HogQLQueryModifiers): + def lazy_select(self, requested_fields: Dict[str, List[str | int]], modifiers: HogQLQueryModifiers): return select_from_groups_table(requested_fields) def to_printed_clickhouse(self, context): diff --git a/posthog/hogql/database/schema/log_entries.py b/posthog/hogql/database/schema/log_entries.py index a7ac459aab4ab..c14e90e26da50 100644 --- a/posthog/hogql/database/schema/log_entries.py +++ b/posthog/hogql/database/schema/log_entries.py @@ -35,7 +35,7 @@ def to_printed_hogql(self): class ReplayConsoleLogsLogEntriesTable(LazyTable): fields: Dict[str, FieldOrTable] = LOG_ENTRIES_FIELDS - def lazy_select(self, requested_fields: Dict[str, List[str]], modifiers: HogQLQueryModifiers): + def lazy_select(self, requested_fields: Dict[str, List[str | int]], modifiers: HogQLQueryModifiers): fields: List[ast.Expr] = [ast.Field(chain=["log_entries"] + chain) for name, chain in requested_fields.items()] return ast.SelectQuery( @@ -58,7 +58,7 @@ def to_printed_hogql(self): class BatchExportLogEntriesTable(LazyTable): fields: Dict[str, FieldOrTable] = LOG_ENTRIES_FIELDS - def lazy_select(self, requested_fields: Dict[str, List[str]], modifiers: HogQLQueryModifiers): + def lazy_select(self, requested_fields: Dict[str, List[str | int]], modifiers: HogQLQueryModifiers): fields: List[ast.Expr] = [ast.Field(chain=["log_entries"] + chain) for name, chain in requested_fields.items()] return ast.SelectQuery( diff --git a/posthog/hogql/database/schema/person_distinct_ids.py b/posthog/hogql/database/schema/person_distinct_ids.py index 65c0fb22a6722..02144b35fc3d8 100644 --- a/posthog/hogql/database/schema/person_distinct_ids.py +++ b/posthog/hogql/database/schema/person_distinct_ids.py @@ -21,14 +21,14 @@ "distinct_id": StringDatabaseField(name="distinct_id"), "person_id": StringDatabaseField(name="person_id"), "person": LazyJoin( - from_field="person_id", + from_field=["person_id"], join_table="persons", join_function=join_with_persons_table, ), } -def select_from_person_distinct_ids_table(requested_fields: Dict[str, List[str]]): +def select_from_person_distinct_ids_table(requested_fields: Dict[str, List[str | int]]): # Always include "person_id", as it's the key we use to make further joins, and it'd be great if it's available if "person_id" not in requested_fields: requested_fields = {**requested_fields, "person_id": ["person_id"]} @@ -82,7 +82,7 @@ def to_printed_hogql(self): class PersonDistinctIdsTable(LazyTable): fields: Dict[str, FieldOrTable] = PERSON_DISTINCT_IDS_FIELDS - def lazy_select(self, requested_fields: Dict[str, List[str]], modifiers: HogQLQueryModifiers): + def lazy_select(self, requested_fields: Dict[str, List[str | int]], modifiers: HogQLQueryModifiers): return select_from_person_distinct_ids_table(requested_fields) def to_printed_clickhouse(self, context): diff --git a/posthog/hogql/database/schema/person_overrides.py b/posthog/hogql/database/schema/person_overrides.py index 5be6dd1e7d5ae..a33a7439b4982 100644 --- a/posthog/hogql/database/schema/person_overrides.py +++ b/posthog/hogql/database/schema/person_overrides.py @@ -24,7 +24,7 @@ } -def select_from_person_overrides_table(requested_fields: Dict[str, List[str]]): +def select_from_person_overrides_table(requested_fields: Dict[str, List[str | int]]): return argmax_select( table_name="raw_person_overrides", select_fields=requested_fields, @@ -74,7 +74,7 @@ def to_printed_hogql(self): class PersonOverridesTable(Table): fields: Dict[str, FieldOrTable] = PERSON_OVERRIDES_FIELDS - def lazy_select(self, requested_fields: Dict[str, Any], modifiers: HogQLQueryModifiers): + def lazy_select(self, requested_fields: Dict[str, List[str | int]], modifiers: HogQLQueryModifiers): return select_from_person_overrides_table(requested_fields) def to_printed_clickhouse(self, context): diff --git a/posthog/hogql/database/schema/persons.py b/posthog/hogql/database/schema/persons.py index f823f1ce3c9f4..a248da56b7307 100644 --- a/posthog/hogql/database/schema/persons.py +++ b/posthog/hogql/database/schema/persons.py @@ -26,14 +26,14 @@ "properties": StringJSONDatabaseField(name="properties"), "is_identified": BooleanDatabaseField(name="is_identified"), "pdi": LazyJoin( - from_field="id", + from_field=["id"], join_table=PersonsPDITable(), join_function=persons_pdi_join, ), } -def select_from_persons_table(requested_fields: Dict[str, List[str]], modifiers: HogQLQueryModifiers): +def select_from_persons_table(requested_fields: Dict[str, List[str | int]], modifiers: HogQLQueryModifiers): version = modifiers.personsArgMaxVersion if version == PersonsArgMaxVersion.auto: version = PersonsArgMaxVersion.v1 @@ -85,7 +85,7 @@ def select_from_persons_table(requested_fields: Dict[str, List[str]], modifiers: def join_with_persons_table( from_table: str, to_table: str, - requested_fields: Dict[str, List[str]], + requested_fields: Dict[str, List[str | int]], context: HogQLContext, node: SelectQuery, ): @@ -123,7 +123,7 @@ def to_printed_hogql(self): class PersonsTable(LazyTable): fields: Dict[str, FieldOrTable] = PERSONS_FIELDS - def lazy_select(self, requested_fields: Dict[str, List[str]], modifiers: HogQLQueryModifiers): + def lazy_select(self, requested_fields: Dict[str, List[str | int]], modifiers: HogQLQueryModifiers): return select_from_persons_table(requested_fields, modifiers) def to_printed_clickhouse(self, context): diff --git a/posthog/hogql/database/schema/persons_pdi.py b/posthog/hogql/database/schema/persons_pdi.py index 9c7fcf9e03e43..9f476f407b4d2 100644 --- a/posthog/hogql/database/schema/persons_pdi.py +++ b/posthog/hogql/database/schema/persons_pdi.py @@ -15,7 +15,7 @@ # :NOTE: We already have person_distinct_ids.py, which most tables link to. This persons_pdi.py is a hack to # make "select persons.pdi.distinct_id from persons" work while avoiding circular imports. Don't use directly. -def persons_pdi_select(requested_fields: Dict[str, List[str]]): +def persons_pdi_select(requested_fields: Dict[str, List[str | int]]): # Always include "person_id", as it's the key we use to make further joins, and it'd be great if it's available if "person_id" not in requested_fields: requested_fields = {**requested_fields, "person_id": ["person_id"]} @@ -33,7 +33,7 @@ def persons_pdi_select(requested_fields: Dict[str, List[str]]): def persons_pdi_join( from_table: str, to_table: str, - requested_fields: Dict[str, List[str]], + requested_fields: Dict[str, List[str | int]], context: HogQLContext, node: SelectQuery, ): @@ -63,7 +63,7 @@ class PersonsPDITable(LazyTable): "person_id": StringDatabaseField(name="person_id"), } - def lazy_select(self, requested_fields: Dict[str, List[str]], modifiers: HogQLQueryModifiers): + def lazy_select(self, requested_fields: Dict[str, List[str | int]], modifiers: HogQLQueryModifiers): return persons_pdi_select(requested_fields) def to_printed_clickhouse(self, context): diff --git a/posthog/hogql/database/schema/session_replay_events.py b/posthog/hogql/database/schema/session_replay_events.py index 7a2097c2b8b5f..c9d564c7d4588 100644 --- a/posthog/hogql/database/schema/session_replay_events.py +++ b/posthog/hogql/database/schema/session_replay_events.py @@ -37,7 +37,7 @@ "event_count": IntegerDatabaseField(name="event_count"), "message_count": IntegerDatabaseField(name="message_count"), "pdi": LazyJoin( - from_field="distinct_id", + from_field=["distinct_id"], join_table=PersonDistinctIdsTable(), join_function=join_with_person_distinct_ids_table, ), @@ -64,7 +64,7 @@ def to_printed_hogql(self): return "raw_session_replay_events" -def select_from_session_replay_events_table(requested_fields: Dict[str, List[str]]): +def select_from_session_replay_events_table(requested_fields: Dict[str, List[str | int]]): from posthog.hogql import ast table_name = "raw_session_replay_events" @@ -115,7 +115,7 @@ class SessionReplayEventsTable(LazyTable): "first_url": StringDatabaseField(name="first_url"), } - def lazy_select(self, requested_fields: Dict[str, List[str]], modifiers: HogQLQueryModifiers): + def lazy_select(self, requested_fields: Dict[str, List[str | int]], modifiers: HogQLQueryModifiers): return select_from_session_replay_events_table(requested_fields) def to_printed_clickhouse(self, context): diff --git a/posthog/hogql/database/schema/static_cohort_people.py b/posthog/hogql/database/schema/static_cohort_people.py index f209b1186e55e..97d90cbd6dcac 100644 --- a/posthog/hogql/database/schema/static_cohort_people.py +++ b/posthog/hogql/database/schema/static_cohort_people.py @@ -16,7 +16,7 @@ class StaticCohortPeople(Table): "cohort_id": IntegerDatabaseField(name="cohort_id"), "team_id": IntegerDatabaseField(name="team_id"), "person": LazyJoin( - from_field="person_id", + from_field=["person_id"], join_table="persons", join_function=join_with_persons_table, ), diff --git a/posthog/hogql/test/__snapshots__/test_query.ambr b/posthog/hogql/test/__snapshots__/test_query.ambr index 4a900f6fad538..6ee77080d738f 100644 --- a/posthog/hogql/test/__snapshots__/test_query.ambr +++ b/posthog/hogql/test/__snapshots__/test_query.ambr @@ -128,8 +128,12 @@ -- ClickHouse SELECT e.event AS event, s.session_id AS session_id - FROM events AS e LEFT JOIN session_replay_events AS s ON equals(s.session_id, nullIf(nullIf(e.`$session_id`, ''), 'null')) - WHERE and(equals(s.team_id, 420), equals(e.team_id, 420), isNotNull(nullIf(nullIf(e.`$session_id`, ''), 'null'))) + FROM events AS e LEFT JOIN ( + SELECT session_replay_events.session_id AS session_id + FROM session_replay_events + WHERE equals(session_replay_events.team_id, 420) + GROUP BY session_replay_events.session_id) AS s ON equals(s.session_id, nullIf(nullIf(e.`$session_id`, ''), 'null')) + WHERE and(equals(e.team_id, 420), isNotNull(nullIf(nullIf(e.`$session_id`, ''), 'null'))) LIMIT 10 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -155,8 +159,12 @@ -- ClickHouse SELECT e.event AS event, s.session_id AS session_id - FROM session_replay_events AS s LEFT JOIN events AS e ON equals(nullIf(nullIf(e.`$session_id`, ''), 'null'), s.session_id) - WHERE and(equals(e.team_id, 420), equals(s.team_id, 420), isNotNull(nullIf(nullIf(e.`$session_id`, ''), 'null'))) + FROM ( + SELECT session_replay_events.session_id AS session_id + FROM session_replay_events + WHERE equals(session_replay_events.team_id, 420) + GROUP BY session_replay_events.session_id) AS s LEFT JOIN events AS e ON equals(nullIf(nullIf(e.`$session_id`, ''), 'null'), s.session_id) + WHERE and(equals(e.team_id, 420), isNotNull(nullIf(nullIf(e.`$session_id`, ''), 'null'))) LIMIT 10 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -182,8 +190,12 @@ -- ClickHouse SELECT e.event AS event, s.session_id AS session_id - FROM events AS e LEFT JOIN session_replay_events AS s ON equals(s.session_id, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', '')) - WHERE and(equals(s.team_id, 420), equals(e.team_id, 420), isNotNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, %(hogql_val_1)s), ''), 'null'), '^"|"$', ''))) + FROM events AS e LEFT JOIN ( + SELECT session_replay_events.session_id AS session_id + FROM session_replay_events + WHERE equals(session_replay_events.team_id, 420) + GROUP BY session_replay_events.session_id) AS s ON equals(s.session_id, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', '')) + WHERE and(equals(e.team_id, 420), isNotNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, %(hogql_val_1)s), ''), 'null'), '^"|"$', ''))) LIMIT 10 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -209,8 +221,12 @@ -- ClickHouse SELECT e.event AS event, s.session_id AS session_id - FROM session_replay_events AS s LEFT JOIN events AS e ON equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', ''), s.session_id) - WHERE and(equals(e.team_id, 420), equals(s.team_id, 420), isNotNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, %(hogql_val_1)s), ''), 'null'), '^"|"$', ''))) + FROM ( + SELECT session_replay_events.session_id AS session_id + FROM session_replay_events + WHERE equals(session_replay_events.team_id, 420) + GROUP BY session_replay_events.session_id) AS s LEFT JOIN events AS e ON equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', ''), s.session_id) + WHERE and(equals(e.team_id, 420), isNotNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, %(hogql_val_1)s), ''), 'null'), '^"|"$', ''))) LIMIT 10 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -450,7 +466,7 @@ SELECT e.event AS event, toTimeZone(e.timestamp, %(hogql_val_1)s) AS timestamp, e__pdi.distinct_id AS distinct_id, e__pdi__person.properties___sneaky_mail AS sneaky_mail FROM events AS e INNER JOIN ( - SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 420) GROUP BY person_distinct_id2.distinct_id @@ -463,7 +479,7 @@ WHERE equals(person.team_id, 420) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) - SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) WHERE equals(e.team_id, 420) LIMIT 10 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -481,7 +497,7 @@ SELECT events.event AS event, toTimeZone(events.timestamp, %(hogql_val_0)s) AS timestamp, events__pdi.distinct_id AS distinct_id, events__pdi__person.id AS id FROM events INNER JOIN ( - SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS events__pdi___person_id, argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 420) GROUP BY person_distinct_id2.distinct_id @@ -491,7 +507,7 @@ WHERE equals(person.team_id, 420) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) - SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id) + SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.events__pdi___person_id, events__pdi__person.id) WHERE equals(events.team_id, 420) LIMIT 10 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -509,7 +525,7 @@ SELECT events.event AS event, toTimeZone(events.timestamp, %(hogql_val_1)s) AS timestamp, events__pdi.distinct_id AS distinct_id, events__pdi__person.properties___sneaky_mail AS sneaky_mail FROM events INNER JOIN ( - SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS events__pdi___person_id, argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 420) GROUP BY person_distinct_id2.distinct_id @@ -522,7 +538,7 @@ WHERE equals(person.team_id, 420) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) - SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id) + SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.events__pdi___person_id, events__pdi__person.id) WHERE equals(events.team_id, 420) LIMIT 10 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -540,7 +556,7 @@ SELECT e.event AS event, toTimeZone(e.timestamp, %(hogql_val_1)s) AS timestamp, e__pdi__person.properties___sneaky_mail AS sneaky_mail FROM events AS e INNER JOIN ( - SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 420) GROUP BY person_distinct_id2.distinct_id @@ -553,7 +569,7 @@ WHERE equals(person.team_id, 420) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) - SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) WHERE equals(e.team_id, 420) LIMIT 10 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -571,7 +587,7 @@ SELECT s__pdi__person.properties___sneaky_mail AS sneaky_mail, count() FROM events AS s INNER JOIN ( - SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS s__pdi___person_id, argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 420) GROUP BY person_distinct_id2.distinct_id @@ -584,7 +600,7 @@ WHERE equals(person.team_id, 420) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) - SETTINGS optimize_aggregation_in_order=1) AS s__pdi__person ON equals(s__pdi.person_id, s__pdi__person.id) + SETTINGS optimize_aggregation_in_order=1) AS s__pdi__person ON equals(s__pdi.s__pdi___person_id, s__pdi__person.id) WHERE equals(s.team_id, 420) GROUP BY s__pdi__person.properties___sneaky_mail LIMIT 10 @@ -629,7 +645,12 @@ -- ClickHouse SELECT pdi.distinct_id AS distinct_id, pdi__person.properties___sneaky_mail AS sneaky_mail - FROM person_distinct_id2 AS pdi INNER JOIN ( + FROM ( + SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS pdi___person_id, argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 420) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS pdi INNER JOIN ( SELECT person.id AS id, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', '') AS properties___sneaky_mail FROM person WHERE and(equals(person.team_id, 420), ifNull(in(tuple(person.id, person.version), ( @@ -638,8 +659,7 @@ WHERE equals(person.team_id, 420) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) - SETTINGS optimize_aggregation_in_order=1) AS pdi__person ON equals(pdi.person_id, pdi__person.id) - WHERE equals(pdi.team_id, 420) + SETTINGS optimize_aggregation_in_order=1) AS pdi__person ON equals(pdi.pdi___person_id, pdi__person.id) LIMIT 10 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -655,14 +675,18 @@ -- ClickHouse SELECT pdi.distinct_id AS distinct_id, toTimeZone(pdi__person.created_at, %(hogql_val_0)s) AS created_at - FROM person_distinct_id2 AS pdi INNER JOIN ( + FROM ( + SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS pdi___person_id, argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 420) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS pdi INNER JOIN ( SELECT argMax(person.created_at, person.version) AS created_at, person.id AS id FROM person WHERE equals(person.team_id, 420) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) - SETTINGS optimize_aggregation_in_order=1) AS pdi__person ON equals(pdi.person_id, pdi__person.id) - WHERE equals(pdi.team_id, 420) + SETTINGS optimize_aggregation_in_order=1) AS pdi__person ON equals(pdi.pdi___person_id, pdi__person.id) LIMIT 10 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -677,9 +701,23 @@ ''' -- ClickHouse - SELECT e.event AS event, toTimeZone(e.timestamp, %(hogql_val_0)s) AS timestamp, pdi.distinct_id AS distinct_id, p.id AS id, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(p.properties, %(hogql_val_1)s), ''), 'null'), '^"|"$', '') AS sneaky_mail - FROM events AS e LEFT JOIN person_distinct_id2 AS pdi ON equals(pdi.distinct_id, e.distinct_id) LEFT JOIN person AS p ON equals(p.id, pdi.person_id) - WHERE and(equals(p.team_id, 420), equals(pdi.team_id, 420), equals(e.team_id, 420)) + SELECT e.event AS event, toTimeZone(e.timestamp, %(hogql_val_1)s) AS timestamp, pdi.distinct_id AS distinct_id, p.id AS id, p.properties___sneaky_mail AS sneaky_mail + FROM events AS e LEFT JOIN ( + SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + FROM person_distinct_id2 + WHERE equals(person_distinct_id2.team_id, 420) + GROUP BY person_distinct_id2.distinct_id + HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0)) AS pdi ON equals(pdi.distinct_id, e.distinct_id) LEFT JOIN ( + SELECT person.id AS id, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', '') AS properties___sneaky_mail + FROM person + WHERE and(equals(person.team_id, 420), ifNull(in(tuple(person.id, person.version), ( + SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 420) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) + SETTINGS optimize_aggregation_in_order=1) AS p ON equals(p.id, pdi.person_id) + WHERE equals(e.team_id, 420) LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -717,7 +755,7 @@ SELECT events.event AS event, toTimeZone(events.timestamp, %(hogql_val_1)s) AS timestamp, events__pdi__person.id AS id, events__pdi__person.properties___sneaky_mail AS sneaky_mail FROM events INNER JOIN ( - SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS events__pdi___person_id, argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 420) GROUP BY person_distinct_id2.distinct_id @@ -730,7 +768,7 @@ WHERE equals(person.team_id, 420) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) - SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id) + SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.events__pdi___person_id, events__pdi__person.id) WHERE equals(events.team_id, 420) LIMIT 10 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 diff --git a/posthog/hogql/test/__snapshots__/test_resolver.ambr b/posthog/hogql/test/__snapshots__/test_resolver.ambr index 78eba9a1eb515..1b086d067a621 100644 --- a/posthog/hogql/test/__snapshots__/test_resolver.ambr +++ b/posthog/hogql/test/__snapshots__/test_resolver.ambr @@ -3266,14 +3266,19 @@ table_type: { field: "person" lazy_join: { - from_field: "person_id", + from_field: [ + "person_id" + ], join_function: , - join_table: "persons" + join_table: "persons", + to_field: None } table_type: { field: "pdi" lazy_join: { - from_field: "distinct_id", + from_field: [ + "distinct_id" + ], join_function: , join_table: { fields: { @@ -3282,7 +3287,8 @@ person_id: {}, team_id: {} } - } + }, + to_field: None } table_type: } @@ -3393,14 +3399,19 @@ table_type: { field: "person" lazy_join: { - from_field: "person_id", + from_field: [ + "person_id" + ], join_function: , - join_table: "persons" + join_table: "persons", + to_field: None } table_type: { field: "pdi" lazy_join: { - from_field: "distinct_id", + from_field: [ + "distinct_id" + ], join_function: , join_table: { fields: { @@ -3409,7 +3420,8 @@ person_id: {}, team_id: {} } - } + }, + to_field: None } table_type: } @@ -3516,7 +3528,9 @@ table_type: { field: "pdi" lazy_join: { - from_field: "distinct_id", + from_field: [ + "distinct_id" + ], join_function: , join_table: { fields: { @@ -3525,7 +3539,8 @@ person_id: {}, team_id: {} } - } + }, + to_field: None } table_type: } @@ -3634,7 +3649,9 @@ table_type: { field: "pdi" lazy_join: { - from_field: "distinct_id", + from_field: [ + "distinct_id" + ], join_function: , join_table: { fields: { @@ -3643,7 +3660,8 @@ person_id: {}, team_id: {} } - } + }, + to_field: None } table_type: } @@ -3723,9 +3741,12 @@ table_type: { field: "person" lazy_join: { - from_field: "person_id", + from_field: [ + "person_id" + ], join_function: , - join_table: "persons" + join_table: "persons", + to_field: None } table_type: } diff --git a/posthog/hogql/transforms/lazy_tables.py b/posthog/hogql/transforms/lazy_tables.py index 4734fed012c91..bdbb322d54397 100644 --- a/posthog/hogql/transforms/lazy_tables.py +++ b/posthog/hogql/transforms/lazy_tables.py @@ -21,7 +21,7 @@ def resolve_lazy_tables( @dataclasses.dataclass class JoinToAdd: - fields_accessed: Dict[str, List[str]] + fields_accessed: Dict[str, List[str | int]] lazy_join: LazyJoin from_table: str to_table: str @@ -29,11 +29,43 @@ class JoinToAdd: @dataclasses.dataclass class TableToAdd: - fields_accessed: Dict[str, List[str]] + fields_accessed: Dict[str, List[str | int]] lazy_table: LazyTable +@dataclasses.dataclass +class ConstraintOverride: + alias: str + table_name: str + chain_to_replace: List[str | int] + + +class FieldChainReplacer(TraversingVisitor): + overrides: List[ConstraintOverride] = {} + + def __init__(self, overrides: List[ConstraintOverride]) -> None: + super().__init__() + self.overrides = overrides + + def visit_field(self, node: ast.Field): + for constraint in self.overrides: + if node.chain == constraint.chain_to_replace: + node.chain = [constraint.table_name, constraint.alias] + + +class LazyFinder(TraversingVisitor): + found_lazy: bool = False + + def visit_lazy_join_type(self, node: ast.LazyJoinType): + self.found_lazy = True + + def visit_lazy_table_type(self, node: ast.TableType): + self.found_lazy = True + + class LazyTableResolver(TraversingVisitor): + lazy_finder_counter = 0 + def __init__( self, dialect: Literal["hogql", "clickhouse"], @@ -43,15 +75,19 @@ def __init__( super().__init__() self.stack_of_fields: List[List[ast.FieldType | ast.PropertyType]] = [[]] if stack else [] self.context = context - self.dialect = dialect + self.dialect: Literal["hogql", "clickhouse"] = dialect def visit_property_type(self, node: ast.PropertyType): if node.joined_subquery is not None: # we have already visited this property return - if isinstance(node.field_type.table_type, ast.LazyJoinType) or isinstance( - node.field_type.table_type, ast.LazyTableType - ): + + if isinstance(node.field_type.table_type, ast.TableAliasType): + table_type: ast.TableOrSelectType | ast.TableAliasType = node.field_type.table_type.table_type + else: + table_type = node.field_type.table_type + + if isinstance(table_type, ast.LazyJoinType) or isinstance(table_type, ast.LazyTableType): if self.context and self.context.within_non_hogql_query: # If we're in a non-HogQL query, traverse deeper, just like we normally would have. self.visit(node.field_type) @@ -62,7 +98,12 @@ def visit_property_type(self, node: ast.PropertyType): self.stack_of_fields[-1].append(node) def visit_field_type(self, node: ast.FieldType): - if isinstance(node.table_type, ast.LazyJoinType) or isinstance(node.table_type, ast.LazyTableType): + if isinstance(node.table_type, ast.TableAliasType): + table_type: ast.TableOrSelectType | ast.TableAliasType = node.table_type.table_type + else: + table_type = node.table_type + + if isinstance(table_type, ast.LazyJoinType) or isinstance(table_type, ast.LazyTableType): # Each time we find a field, we place it in a list for processing in "visit_select_query" if len(self.stack_of_fields) == 0: raise HogQLException("Can't access a lazy field when not in a SelectQuery context") @@ -73,8 +114,11 @@ def visit_select_query(self, node: ast.SelectQuery): if not select_type: raise HogQLException("Select query must have a type") + assert node.type is not None + assert select_type is not None + # Collect each `ast.Field` with `ast.LazyJoinType` - field_collector: List[ast.FieldType] = [] + field_collector: List[ast.FieldType | ast.PropertyType] = [] self.stack_of_fields.append(field_collector) # Collect all visited fields on lazy tables into field_collector @@ -96,15 +140,23 @@ def visit_select_query(self, node: ast.SelectQuery): # Look for tables without requested fields to support cases like `select count() from table` join = node.select_from while join: - if isinstance(join.table.type, ast.LazyTableType): - fields = [] + if join.table is not None and isinstance(join.table.type, ast.LazyTableType): + fields: List[ast.FieldType | ast.PropertyType] = [] for field_or_property in field_collector: if isinstance(field_or_property, ast.FieldType): - if field_or_property.table_type == join.table.type: - fields.append(field_or_property) + if isinstance(field_or_property.table_type, ast.TableAliasType): + if field_or_property.table_type.table_type == join.table.type: + fields.append(field_or_property) + else: + if field_or_property.table_type == join.table.type: + fields.append(field_or_property) elif isinstance(field_or_property, ast.PropertyType): - if field_or_property.field_type.table_type == join.table.type: - fields.append(field_or_property) + if isinstance(field_or_property.field_type.table_type, ast.TableAliasType): + if field_or_property.field_type.table_type.table_type == join.table.type: + fields.append(field_or_property) + else: + if field_or_property.field_type.table_type == join.table.type: + fields.append(field_or_property) if len(fields) == 0: table_name = join.alias or get_long_table_name(select_type, join.table.type) tables_to_add[table_name] = TableToAdd(fields_accessed={}, lazy_table=join.table.type.table) @@ -123,8 +175,16 @@ def visit_select_query(self, node: ast.SelectQuery): # Traverse the lazy tables until we reach a real table, collecting them in a list. # Usually there's just one or two. - table_types: List[ast.LazyJoinType | ast.LazyTableType] = [] - while isinstance(table_type, ast.LazyJoinType) or isinstance(table_type, ast.LazyTableType): + table_types: List[ast.LazyJoinType | ast.LazyTableType | ast.TableAliasType] = [] + while ( + isinstance(table_type, ast.TableAliasType) + or isinstance(table_type, ast.LazyJoinType) + or isinstance(table_type, ast.LazyTableType) + ): + if isinstance(table_type, ast.TableAliasType): + table_types.append(table_type) + table_type = table_type.table_type + break if isinstance(table_type, ast.LazyJoinType): table_types.append(table_type) table_type = table_type.table_type @@ -146,11 +206,13 @@ def visit_select_query(self, node: ast.SelectQuery): ) new_join = joins_to_add[to_table] if table_type == field.table_type: - chain = [] + chain: List[str | int] = [] chain.append(field.name) if property is not None: chain.extend(property.chain) - property.joined_subquery_field_name = f"{field.name}___{'___'.join(property.chain)}" + property.joined_subquery_field_name = ( + f"{field.name}___{'___'.join(map(lambda x: str(x), property.chain))}" + ) new_join.fields_accessed[property.joined_subquery_field_name] = chain else: new_join.fields_accessed[field.name] = chain @@ -167,18 +229,83 @@ def visit_select_query(self, node: ast.SelectQuery): chain.append(field.name) if property is not None: chain.extend(property.chain) - property.joined_subquery_field_name = f"{field.name}___{'___'.join(property.chain)}" + property.joined_subquery_field_name = ( + f"{field.name}___{'___'.join(map(lambda x: str(x), property.chain))}" + ) new_table.fields_accessed[property.joined_subquery_field_name] = chain else: new_table.fields_accessed[field.name] = chain + elif isinstance(table_type, ast.TableAliasType): + if isinstance(table_type.table_type, ast.LazyJoinType): + from_table = get_long_table_name(select_type, table_type.table_type) + to_table = get_long_table_name(select_type, table_type) + if to_table not in joins_to_add: + joins_to_add[to_table] = JoinToAdd( + fields_accessed={}, # collect here all fields accessed on this table + lazy_join=table_type.table_type.lazy_join, + from_table=from_table, + to_table=to_table, + ) + new_join = joins_to_add[to_table] + if table_type == field.table_type: + chain: List[str | int] = [] + chain.append(field.name) + if property is not None: + chain.extend(property.chain) + property.joined_subquery_field_name = ( + f"{field.name}___{'___'.join(map(lambda x: str(x), property.chain))}" + ) + new_join.fields_accessed[property.joined_subquery_field_name] = chain + else: + new_join.fields_accessed[field.name] = chain + elif isinstance(table_type.table_type, ast.LazyTableType): + table_name = get_long_table_name(select_type, table_type) + if table_name not in tables_to_add: + tables_to_add[table_name] = TableToAdd( + fields_accessed={}, # collect here all fields accessed on this table + lazy_table=cast(ast.LazyTable, table_type.table_type.table), + ) + new_table = tables_to_add[table_name] + if table_type == field.table_type: + chain = [] + chain.append(field.name) + if property is not None: + chain.extend(property.chain) + property.joined_subquery_field_name = ( + f"{field.name}___{'___'.join(map(lambda x: str(x), property.chain))}" + ) + new_table.fields_accessed[property.joined_subquery_field_name] = chain + else: + new_table.fields_accessed[field.name] = chain # Make sure we also add fields we will use for the join's "ON" condition into the list of fields accessed. # Without this "pdi.person.id" won't work if you did not ALSO select "pdi.person_id" explicitly for the join. + join_constraint_overrides: Dict[str, List[ConstraintOverride]] = {} + + def create_override(table_name: str, field_chain: List[str | int]) -> None: + alias = f"{table_name}___{'___'.join(map(lambda x: str(x), field_chain))}" + + if table_name in tables_to_add: + tables_to_add[table_name].fields_accessed[alias] = field_chain + else: + joins_to_add[table_name].fields_accessed[alias] = field_chain + + join_constraint_overrides[table_name] = [ + *join_constraint_overrides.get(table_name, []), + ConstraintOverride( + alias=alias, + table_name=table_name, + chain_to_replace=[table_name, *field_chain], + ), + ] + for new_join in joins_to_add.values(): - if new_join.from_table in joins_to_add: - joins_to_add[new_join.from_table].fields_accessed[new_join.lazy_join.from_field] = [ - new_join.lazy_join.from_field - ] + if new_join.from_table in joins_to_add or new_join.from_table in tables_to_add: + create_override(new_join.from_table, new_join.lazy_join.from_field) + if new_join.lazy_join.to_field is not None and ( + new_join.to_table in joins_to_add or new_join.to_table in tables_to_add + ): + create_override(new_join.to_table, new_join.lazy_join.to_field) # For all the collected tables, create the subqueries, and add them to the table. for table_name, table_to_add in tables_to_add.items(): @@ -190,7 +317,13 @@ def visit_select_query(self, node: ast.SelectQuery): join_ptr = node.select_from while join_ptr: - if join_ptr.table.type == old_table_type: + if join_ptr.table is not None and ( + join_ptr.table.type == old_table_type + or ( + isinstance(old_table_type, ast.TableAliasType) + and join_ptr.table.type == old_table_type.table_type + ) + ): join_ptr.table = subquery join_ptr.type = select_type.tables[table_name] join_ptr.alias = table_name @@ -206,10 +339,19 @@ def visit_select_query(self, node: ast.SelectQuery): self.context, node, ) - join_to_add = cast(ast.JoinExpr, clone_expr(join_to_add, clear_locations=True)) + + overrides = [ + *join_constraint_overrides.get(join_scope.to_table, []), + *join_constraint_overrides.get(join_scope.from_table, []), + ] + if len(overrides) != 0: + FieldChainReplacer(overrides).visit(join_to_add) + + join_to_add = cast(ast.JoinExpr, clone_expr(join_to_add, clear_locations=True, clear_types=True)) join_to_add = cast(ast.JoinExpr, resolve_types(join_to_add, self.context, self.dialect, [node.type])) - select_type.tables[to_table] = join_to_add.type + if join_to_add.type is not None: + select_type.tables[to_table] = join_to_add.type join_ptr = node.select_from added = False @@ -252,3 +394,12 @@ def visit_select_query(self, node: ast.SelectQuery): field_or_property.joined_subquery = table_type self.stack_of_fields.pop() + + # When joining a lazy table to another lazy table, the joined table doesn't get resolved + # Doing another pass solves this for us + if self.lazy_finder_counter < 20: + lazy_finder = LazyFinder() + lazy_finder.visit(node) + if lazy_finder.found_lazy: + self.lazy_finder_counter = self.lazy_finder_counter + 1 + self.visit_select_query(node) diff --git a/posthog/hogql/transforms/test/__snapshots__/test_lazy_tables.ambr b/posthog/hogql/transforms/test/__snapshots__/test_lazy_tables.ambr index 5c75c0ab02cca..0f20b0df0bd44 100644 --- a/posthog/hogql/transforms/test/__snapshots__/test_lazy_tables.ambr +++ b/posthog/hogql/transforms/test/__snapshots__/test_lazy_tables.ambr @@ -1,4 +1,63 @@ # serializer version: 1 +# name: TestLazyJoins.test_lazy_join_on_lazy_table + ''' + + SELECT cohort_people__new_person.id AS id + FROM ( + SELECT cohortpeople.person_id AS person_id, cohortpeople.cohort_id AS cohort_id, cohortpeople.person_id AS cohort_people___person_id + FROM cohortpeople + WHERE equals(cohortpeople.team_id, 420) + GROUP BY person_id, cohort_id, cohort_people___person_id + HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0)) AS cohort_people LEFT JOIN ( + SELECT persons.id AS id, id AS cohort_people__new_person___id + FROM ( + SELECT person.id AS id + FROM person + WHERE equals(person.team_id, 420) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) + SETTINGS optimize_aggregation_in_order=1) AS persons) AS cohort_people__new_person ON equals(cohort_people.cohort_people___person_id, cohort_people__new_person.cohort_people__new_person___id) + LIMIT 10000 + ''' +# --- +# name: TestLazyJoins.test_lazy_join_on_lazy_table_with_person_properties + ''' + + SELECT persons__events.event AS event + FROM ( + SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', ''), person.version) AS persons___properties___email, person.id AS id + FROM person + WHERE equals(person.team_id, 420) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) + SETTINGS optimize_aggregation_in_order=1) AS persons LEFT JOIN ( + SELECT events.event AS event, event AS persons__events___event + FROM events + WHERE equals(events.team_id, 420)) AS persons__events ON equals(persons.persons___properties___email, persons__events.persons__events___event) + LIMIT 10000 + ''' +# --- +# name: TestLazyJoins.test_lazy_join_on_lazy_table_with_properties + ''' + + SELECT cohort_people__new_person.id AS id + FROM ( + SELECT cohortpeople.person_id AS person_id, cohortpeople.cohort_id AS cohort_id, cohortpeople.person_id AS cohort_people___person_id + FROM cohortpeople + WHERE equals(cohortpeople.team_id, 420) + GROUP BY person_id, cohort_id, cohort_people___person_id + HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0)) AS cohort_people LEFT JOIN ( + SELECT persons.id AS id, persons.properties___email AS cohort_people__new_person___properties___email + FROM ( + SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', ''), person.version) AS properties___email, person.id AS id + FROM person + WHERE equals(person.team_id, 420) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) + SETTINGS optimize_aggregation_in_order=1) AS persons) AS cohort_people__new_person ON equals(cohort_people.cohort_people___person_id, cohort_people__new_person.cohort_people__new_person___properties___email) + LIMIT 10000 + ''' +# --- # name: TestLazyJoins.test_resolve_lazy_table_as_select_table ''' @@ -52,7 +111,7 @@ SELECT person_distinct_ids__person.`properties___$browser` AS `$browser` FROM ( - SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_distinct_ids___person_id, argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 420) GROUP BY person_distinct_id2.distinct_id @@ -62,7 +121,7 @@ WHERE equals(person.team_id, 420) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) - SETTINGS optimize_aggregation_in_order=1) AS person_distinct_ids__person ON equals(person_distinct_ids.person_id, person_distinct_ids__person.id) + SETTINGS optimize_aggregation_in_order=1) AS person_distinct_ids__person ON equals(person_distinct_ids.person_distinct_ids___person_id, person_distinct_ids__person.id) LIMIT 10000 ''' # --- @@ -71,7 +130,7 @@ SELECT person_distinct_ids__person.`properties___$browser___in___json` AS `$browser__in__json` FROM ( - SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_distinct_ids___person_id, argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 420) GROUP BY person_distinct_id2.distinct_id @@ -81,7 +140,7 @@ WHERE equals(person.team_id, 420) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) - SETTINGS optimize_aggregation_in_order=1) AS person_distinct_ids__person ON equals(person_distinct_ids.person_id, person_distinct_ids__person.id) + SETTINGS optimize_aggregation_in_order=1) AS person_distinct_ids__person ON equals(person_distinct_ids.person_distinct_ids___person_id, person_distinct_ids__person.id) LIMIT 10000 ''' # --- @@ -104,7 +163,7 @@ SELECT events.event AS event, events__pdi__person.id AS id FROM events INNER JOIN ( - SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS events__pdi___person_id, argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 420) GROUP BY person_distinct_id2.distinct_id @@ -114,7 +173,7 @@ WHERE equals(person.team_id, 420) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) - SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id) + SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.events__pdi___person_id, events__pdi__person.id) WHERE equals(events.team_id, 420) LIMIT 10000 ''' @@ -124,7 +183,7 @@ SELECT events.event AS event, events__pdi__person.`properties___$browser` AS `$browser` FROM events INNER JOIN ( - SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS events__pdi___person_id, argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 420) GROUP BY person_distinct_id2.distinct_id @@ -134,7 +193,7 @@ WHERE equals(person.team_id, 420) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) - SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id) + SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.events__pdi___person_id, events__pdi__person.id) WHERE equals(events.team_id, 420) LIMIT 10000 ''' @@ -144,7 +203,7 @@ SELECT events.event AS event, events__pdi__person.properties AS properties, events__pdi__person.properties___name AS name FROM events INNER JOIN ( - SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS events__pdi___person_id, argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 420) GROUP BY person_distinct_id2.distinct_id @@ -154,7 +213,7 @@ WHERE equals(person.team_id, 420) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) - SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id) + SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.events__pdi___person_id, events__pdi__person.id) WHERE equals(events.team_id, 420) LIMIT 10000 ''' @@ -164,7 +223,7 @@ SELECT events.event AS event, events__pdi__person.id AS id FROM events INNER JOIN ( - SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS events__pdi___person_id, argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 420) GROUP BY person_distinct_id2.distinct_id @@ -174,7 +233,7 @@ WHERE equals(person.team_id, 420) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) - SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id) + SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.events__pdi___person_id, events__pdi__person.id) WHERE equals(events.team_id, 420) LIMIT 10000 ''' diff --git a/posthog/hogql/transforms/test/__snapshots__/test_property_types.ambr b/posthog/hogql/transforms/test/__snapshots__/test_property_types.ambr index 794edd853024c..259ae9de0f210 100644 --- a/posthog/hogql/transforms/test/__snapshots__/test_property_types.ambr +++ b/posthog/hogql/transforms/test/__snapshots__/test_property_types.ambr @@ -17,7 +17,7 @@ SELECT multiply(toFloat64OrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, %(hogql_val_1)s), ''), 'null'), '^"|"$', '')), toFloat64OrNull(events__pdi__person.properties___tickets)) FROM events INNER JOIN ( - SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS events__pdi___person_id, argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 420) GROUP BY person_distinct_id2.distinct_id @@ -27,7 +27,7 @@ WHERE equals(person.team_id, 420) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) - SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id) + SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.events__pdi___person_id, events__pdi__person.id) WHERE equals(events.team_id, 420) LIMIT 10000 ''' @@ -46,7 +46,7 @@ SELECT parseDateTime64BestEffortOrNull(events__pdi__person.properties___provided_timestamp, 6, %(hogql_val_1)s) AS provided_timestamp FROM events INNER JOIN ( - SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id + SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS events__pdi___person_id, argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 420) GROUP BY person_distinct_id2.distinct_id @@ -56,7 +56,7 @@ WHERE equals(person.team_id, 420) GROUP BY person.id HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) - SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id) + SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.events__pdi___person_id, events__pdi__person.id) WHERE equals(events.team_id, 420) LIMIT 10000 ''' diff --git a/posthog/hogql/transforms/test/test_lazy_tables.py b/posthog/hogql/transforms/test/test_lazy_tables.py index 131fcb227fbbc..8e93e16df1470 100644 --- a/posthog/hogql/transforms/test/test_lazy_tables.py +++ b/posthog/hogql/transforms/test/test_lazy_tables.py @@ -8,6 +8,7 @@ from posthog.hogql.printer import print_ast from posthog.hogql.test.utils import pretty_print_in_tests from posthog.test.base import BaseTest +from posthog.warehouse.models.join import DataWarehouseJoin class TestLazyJoins(BaseTest): @@ -86,3 +87,45 @@ def _print_select(self, select: str): "clickhouse", ) return pretty_print_in_tests(query, self.team.pk) + + @pytest.mark.usefixtures("unittest_snapshot") + def test_lazy_join_on_lazy_table(self): + DataWarehouseJoin( + team=self.team, + source_table_name="cohort_people", + source_table_key="person_id", + joining_table_name="persons", + joining_table_key="id", + field_name="new_person", + ).save() + + printed = self._print_select("select new_person.id from cohort_people") + assert printed == self.snapshot + + @pytest.mark.usefixtures("unittest_snapshot") + def test_lazy_join_on_lazy_table_with_properties(self): + DataWarehouseJoin( + team=self.team, + source_table_name="cohort_people", + source_table_key="person_id", + joining_table_name="persons", + joining_table_key="properties.email", + field_name="new_person", + ).save() + + printed = self._print_select("select new_person.id from cohort_people") + assert printed == self.snapshot + + @pytest.mark.usefixtures("unittest_snapshot") + def test_lazy_join_on_lazy_table_with_person_properties(self): + DataWarehouseJoin( + team=self.team, + source_table_name="persons", + source_table_key="properties.email", + joining_table_name="events", + joining_table_key="event", + field_name="events", + ).save() + + printed = self._print_select("select events.event from persons") + assert printed == self.snapshot diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr index bec07968fb0f8..29a1483cafc1c 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr @@ -458,6 +458,7 @@ FROM events AS e INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 2) @@ -473,7 +474,7 @@ FROM person WHERE equals(person.team_id, 2) GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-07-01 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('$pageview', 'user signed up')), or(and(ifNull(ilike(e__pdi__person.properties___email, '%.com%'), 0), ifNull(equals(e__pdi__person.properties___age, '20'), 0)), or(ifNull(ilike(e__pdi__person.properties___email, '%.org%'), 0), ifNull(equals(e__pdi__person.properties___age, '28'), 0)))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))))) WHERE ifNull(equals(step_0, 1), 0))) GROUP BY aggregation_target, @@ -567,6 +568,7 @@ FROM events AS e INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 2) @@ -582,7 +584,7 @@ FROM person WHERE equals(person.team_id, 2) GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-07-01 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('$pageview', 'user signed up')), or(and(ifNull(ilike(e__pdi__person.properties___email, '%.com%'), 0), ifNull(equals(e__pdi__person.properties___age, '20'), 0)), or(ifNull(ilike(e__pdi__person.properties___email, '%.org%'), 0), ifNull(equals(e__pdi__person.properties___age, '28'), 0)))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))))) WHERE ifNull(equals(step_0, 1), 0))) GROUP BY aggregation_target, @@ -680,6 +682,7 @@ FROM events AS e INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 2) @@ -695,7 +698,7 @@ FROM person WHERE equals(person.team_id, 2) GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-07-01 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('$pageview', 'user signed up')), or(and(ifNull(ilike(e__pdi__person.properties___email, '%.com%'), 0), ifNull(equals(e__pdi__person.properties___age, '20'), 0)), or(ifNull(ilike(e__pdi__person.properties___email, '%.org%'), 0), ifNull(equals(e__pdi__person.properties___age, '28'), 0)))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))))) WHERE ifNull(equals(step_0, 1), 0))) GROUP BY aggregation_target, @@ -793,6 +796,7 @@ FROM events AS e INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 2) @@ -808,7 +812,7 @@ FROM person WHERE equals(person.team_id, 2) GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'UTC')), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), toDateTime64('2020-07-01 23:59:59.999999', 6, 'UTC'))), in(e.event, tuple('$pageview', 'user signed up')), or(and(ifNull(ilike(e__pdi__person.properties___email, '%.com%'), 0), ifNull(equals(e__pdi__person.properties___age, '20'), 0)), or(ifNull(ilike(e__pdi__person.properties___email, '%.org%'), 0), ifNull(equals(e__pdi__person.properties___age, '28'), 0)))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0), ifNull(equals(step_2, 1), 0))))))) WHERE ifNull(equals(step_0, 1), 0))) GROUP BY aggregation_target, diff --git a/posthog/hogql_queries/insights/test/__snapshots__/test_lifecycle_query_runner.ambr b/posthog/hogql_queries/insights/test/__snapshots__/test_lifecycle_query_runner.ambr index d9d4a543c4c00..879021d448e4d 100644 --- a/posthog/hogql_queries/insights/test/__snapshots__/test_lifecycle_query_runner.ambr +++ b/posthog/hogql_queries/insights/test/__snapshots__/test_lifecycle_query_runner.ambr @@ -64,6 +64,7 @@ FROM events INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS events__pdi___person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 2) @@ -75,7 +76,7 @@ FROM person WHERE equals(person.team_id, 2) GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id) + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.events__pdi___person_id, events__pdi__person.id) WHERE and(equals(events.team_id, 2), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'UTC'))), toIntervalDay(1))), less(toTimeZone(events.timestamp, 'UTC'), plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'UTC'))), toIntervalDay(1))), ifNull(in(person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople @@ -140,6 +141,7 @@ FROM events SAMPLE 0.1 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS events__pdi___person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 2) @@ -151,7 +153,7 @@ FROM person WHERE equals(person.team_id, 2) GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id) + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.events__pdi___person_id, events__pdi__person.id) WHERE and(equals(events.team_id, 2), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'UTC'))), toIntervalDay(1))), less(toTimeZone(events.timestamp, 'UTC'), plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'UTC'))), toIntervalDay(1))), equals(events.event, '$pageview')) GROUP BY person_id) GROUP BY start_of_period, @@ -211,6 +213,7 @@ FROM events INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS events__pdi___person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 2) @@ -222,7 +225,7 @@ FROM person WHERE equals(person.team_id, 2) GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id) + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.events__pdi___person_id, events__pdi__person.id) WHERE and(equals(events.team_id, 2), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'UTC'))), toIntervalDay(1))), less(toTimeZone(events.timestamp, 'UTC'), plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'UTC'))), toIntervalDay(1))), equals(events.event, '$pageview')) GROUP BY person_id) GROUP BY start_of_period, @@ -282,6 +285,7 @@ FROM events INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS events__pdi___person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 2) @@ -293,7 +297,7 @@ FROM person WHERE equals(person.team_id, 2) GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id) + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.events__pdi___person_id, events__pdi__person.id) WHERE and(equals(events.team_id, 2), greaterOrEquals(toTimeZone(events.timestamp, 'US/Pacific'), minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'US/Pacific'))), toIntervalDay(1))), less(toTimeZone(events.timestamp, 'US/Pacific'), plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'US/Pacific'))), toIntervalDay(1))), equals(events.event, '$pageview')) GROUP BY person_id) GROUP BY start_of_period, diff --git a/posthog/hogql_queries/insights/test/__snapshots__/test_retention_query_runner.ambr b/posthog/hogql_queries/insights/test/__snapshots__/test_retention_query_runner.ambr index 3e815818de18b..4e2714d2e7212 100644 --- a/posthog/hogql_queries/insights/test/__snapshots__/test_retention_query_runner.ambr +++ b/posthog/hogql_queries/insights/test/__snapshots__/test_retention_query_runner.ambr @@ -539,6 +539,7 @@ FROM events INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS events__pdi___person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 2) @@ -553,7 +554,7 @@ FROM person WHERE equals(person.team_id, 2) GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id) + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.events__pdi___person_id, events__pdi__person.id) WHERE and(equals(events.team_id, 2), and(equals(events.event, '$pageview'), ifNull(equals(events__pdi__person.properties___email, 'person1@test.com'), 0)), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(toDateTime64('2020-06-10 00:00:00.000000', 6, 'UTC'))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-06-17 00:00:00.000000', 6, 'UTC'))))) AS target_event JOIN (SELECT toStartOfDay(toTimeZone(events.timestamp, 'UTC')) AS event_date, @@ -580,6 +581,7 @@ FROM events INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS events__pdi___person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 2) @@ -594,7 +596,7 @@ FROM person WHERE equals(person.team_id, 2) GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.person_id, events__pdi__person.id) + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.events__pdi___person_id, events__pdi__person.id) WHERE and(equals(events.team_id, 2), and(equals(events.event, '$pageview'), ifNull(equals(events__pdi__person.properties___email, 'person1@test.com'), 0)), and(greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfDay(toDateTime64('2020-06-10 00:00:00.000000', 6, 'UTC'))), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-06-17 00:00:00.000000', 6, 'UTC'))))) AS target_event) WHERE and(or(1, isNull(breakdown_values)), or(1, isNull(intervals_from_base)))) AS actor_activity GROUP BY breakdown_values, diff --git a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr index fabfda5c56bf3..2f1c4c1de0917 100644 --- a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr +++ b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr @@ -67,6 +67,7 @@ FROM events AS e SAMPLE 1 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 2) @@ -81,7 +82,7 @@ FROM person WHERE equals(person.team_id, 2) GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))), ifNull(equals(e__pdi__person.`properties___$bool_prop`, 'x'), 0), and(equals(e.event, 'sign up'), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople @@ -622,6 +623,7 @@ FROM events AS e INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 2) @@ -636,7 +638,7 @@ FROM person WHERE equals(person.team_id, 2) GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC')))), and(equals(e.event, '$pageview'), and(or(ifNull(equals(e__pdi__person.properties___name, 'p1'), 0), ifNull(equals(e__pdi__person.properties___name, 'p2'), 0), ifNull(equals(e__pdi__person.properties___name, 'p3'), 0)), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople @@ -691,6 +693,7 @@ FROM events AS e SAMPLE 1 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 2) @@ -705,7 +708,7 @@ FROM person WHERE equals(person.team_id, 2) GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), and(and(equals(e.event, '$pageview'), and(or(ifNull(equals(e__pdi__person.properties___name, 'p1'), 0), ifNull(equals(e__pdi__person.properties___name, 'p2'), 0), ifNull(equals(e__pdi__person.properties___name, 'p3'), 0)), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople @@ -1076,7 +1079,8 @@ toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start FROM events AS e SAMPLE 1 INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 2) @@ -1091,7 +1095,7 @@ FROM person WHERE equals(person.team_id, 2) GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-26 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-02 23:59:59', 6, 'UTC'))), equals(e.event, 'event_name'), ifNull(equals(e__pdi__person.properties___name, 'Jane'), 0)) GROUP BY day_start) GROUP BY day_start @@ -1292,6 +1296,7 @@ FROM events AS e INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 2) @@ -1307,7 +1312,7 @@ FROM person WHERE equals(person.team_id, 2) GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(equals(e.event, 'sign up'), ifNull(equals(e__pdi__person.properties___filter_prop, 'filter_val'), 0))) GROUP BY value ORDER BY count DESC, value DESC @@ -1357,6 +1362,7 @@ FROM events AS e SAMPLE 1 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 2) @@ -1372,7 +1378,7 @@ FROM person WHERE equals(person.team_id, 2) GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), and(equals(e.event, 'sign up'), ifNull(equals(e__pdi__person.properties___filter_prop, 'filter_val'), 0), or(ifNull(equals(transform(ifNull(e__pdi__person.`properties___$some_prop`, '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'some_val2', 'some_val'], ['$$_posthog_breakdown_other_$$', 'some_val2', 'some_val'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(e__pdi__person.`properties___$some_prop`, 'some_val2'), 0), ifNull(equals(e__pdi__person.`properties___$some_prop`, 'some_val'), 0))), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), toIntervalDay(30))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), 0)) GROUP BY timestamp, actor_id, breakdown_value) AS e @@ -1716,7 +1722,8 @@ toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start FROM events AS e SAMPLE 1 INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 2) @@ -1731,7 +1738,7 @@ FROM person WHERE equals(person.team_id, 2) GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'watched movie'), ifNull(equals(e__pdi__person.properties___name, 'person1'), 0)) GROUP BY day_start) GROUP BY day_start @@ -1759,7 +1766,8 @@ toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start FROM events AS e SAMPLE 1 INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 2) @@ -1774,7 +1782,7 @@ FROM person WHERE equals(person.team_id, 2) GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'watched movie'), ifNull(equals(e__pdi__person.properties___name, 'person1'), 0)) GROUP BY day_start) GROUP BY day_start @@ -1828,7 +1836,8 @@ toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start FROM events AS e SAMPLE 1 INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 2) @@ -1843,7 +1852,7 @@ FROM person WHERE equals(person.team_id, 2) GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'watched movie'), ifNull(equals(e__pdi__person.properties___name, 'person1'), 0)) GROUP BY day_start) GROUP BY day_start @@ -1897,7 +1906,8 @@ toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start FROM events AS e SAMPLE 1 INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 2) @@ -1912,7 +1922,7 @@ FROM person WHERE equals(person.team_id, 2) GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'watched movie'), ifNull(equals(e__pdi__person.properties___name, 'person1'), 0)) GROUP BY day_start) GROUP BY day_start @@ -3056,7 +3066,8 @@ count(e.uuid) AS count FROM events AS e INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 2) @@ -3073,7 +3084,7 @@ FROM person WHERE equals(person.team_id, 2) GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(or(ifNull(notILike(e__pdi__person.properties___email, '%@posthog.com%'), 1), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0)), or(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'safari'), 0))))) GROUP BY value ORDER BY count DESC, value DESC @@ -3111,7 +3122,8 @@ transform(ifNull(e__pdi__person.properties___email, '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'test2@posthog.com', 'test@gmail.com', 'test5@posthog.com', 'test4@posthog.com', 'test3@posthog.com'], ['$$_posthog_breakdown_other_$$', 'test2@posthog.com', 'test@gmail.com', 'test5@posthog.com', 'test4@posthog.com', 'test3@posthog.com'], '$$_posthog_breakdown_other_$$') AS breakdown_value FROM events AS e SAMPLE 1 INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 2) @@ -3128,7 +3140,7 @@ FROM person WHERE equals(person.team_id, 2) GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(or(ifNull(notILike(e__pdi__person.properties___email, '%@posthog.com%'), 1), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0)), or(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'safari'), 0))), or(ifNull(equals(transform(ifNull(e__pdi__person.properties___email, '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'test2@posthog.com', 'test@gmail.com', 'test5@posthog.com', 'test4@posthog.com', 'test3@posthog.com'], ['$$_posthog_breakdown_other_$$', 'test2@posthog.com', 'test@gmail.com', 'test5@posthog.com', 'test4@posthog.com', 'test3@posthog.com'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(e__pdi__person.properties___email, 'test2@posthog.com'), 0), ifNull(equals(e__pdi__person.properties___email, 'test@gmail.com'), 0), ifNull(equals(e__pdi__person.properties___email, 'test5@posthog.com'), 0), ifNull(equals(e__pdi__person.properties___email, 'test4@posthog.com'), 0), ifNull(equals(e__pdi__person.properties___email, 'test3@posthog.com'), 0))) GROUP BY day_start, breakdown_value) @@ -3150,7 +3162,8 @@ count(e.uuid) AS count FROM events AS e INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 2) @@ -3167,7 +3180,7 @@ FROM person WHERE equals(person.team_id, 2) GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'chrome'), 0)), and(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0), ifNull(ilike(e__pdi__person.properties___email, '%@posthog.com%'), 0)))) GROUP BY value ORDER BY count DESC, value DESC @@ -3205,7 +3218,8 @@ transform(ifNull(e__pdi__person.properties___email, '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'test2@posthog.com'], ['$$_posthog_breakdown_other_$$', 'test2@posthog.com'], '$$_posthog_breakdown_other_$$') AS breakdown_value FROM events AS e SAMPLE 1 INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 2) @@ -3222,7 +3236,7 @@ FROM person WHERE equals(person.team_id, 2) GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-07-01 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), and(ifNull(equals(e__pdi__person.`properties___$os`, 'android'), 0), ifNull(equals(e__pdi__person.`properties___$browser`, 'chrome'), 0)), and(ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0), ifNull(ilike(e__pdi__person.properties___email, '%@posthog.com%'), 0)), or(ifNull(equals(transform(ifNull(e__pdi__person.properties___email, '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'test2@posthog.com'], ['$$_posthog_breakdown_other_$$', 'test2@posthog.com'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(e__pdi__person.properties___email, 'test2@posthog.com'), 0))) GROUP BY day_start, breakdown_value) @@ -3279,7 +3293,8 @@ toStartOfDay(toTimeZone(e.timestamp, 'UTC')) AS day_start FROM events AS e SAMPLE 1 INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 2) @@ -3294,7 +3309,7 @@ FROM person WHERE equals(person.team_id, 2) GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), ifNull(equals(e__pdi__person.`properties___$some_prop`, 'some_val'), 0)) GROUP BY day_start) GROUP BY day_start @@ -3313,7 +3328,8 @@ count(e.uuid) AS count FROM events AS e INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 2) @@ -3328,7 +3344,7 @@ FROM person WHERE equals(person.team_id, 2) GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'))) GROUP BY value ORDER BY count DESC, value DESC @@ -3366,7 +3382,8 @@ transform(ifNull(e__pdi__person.`properties___$some_prop`, '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'some_val'], ['$$_posthog_breakdown_other_$$', 'some_val'], '$$_posthog_breakdown_other_$$') AS breakdown_value FROM events AS e SAMPLE 1 INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 2) @@ -3381,7 +3398,7 @@ FROM person WHERE equals(person.team_id, 2) GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-24 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-31 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(transform(ifNull(e__pdi__person.`properties___$some_prop`, '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'some_val'], ['$$_posthog_breakdown_other_$$', 'some_val'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(e__pdi__person.`properties___$some_prop`, 'some_val'), 0))) GROUP BY day_start, breakdown_value) @@ -4188,7 +4205,8 @@ WHERE and(equals(events.team_id, 2), ifNull(notEquals(id, ''), 1)) GROUP BY id) AS e__session ON equals(e.`$session_id`, e__session.id) INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 2) @@ -4203,7 +4221,7 @@ FROM person WHERE equals(person.team_id, 2) GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC')))), and(greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'))) GROUP BY value ORDER BY count DESC, value DESC @@ -4228,7 +4246,8 @@ WHERE and(equals(events.team_id, 2), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(events.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), ifNull(notEquals(id, ''), 1)) GROUP BY id) AS e__session ON equals(e.`$session_id`, e__session.id) INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 2) @@ -4243,7 +4262,7 @@ FROM person WHERE equals(person.team_id, 2) GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfWeek(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')), 0)), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), equals(e.event, 'sign up'), or(ifNull(equals(transform(ifNull(e__pdi__person.`properties___$some_prop`, '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'some_val', 'another_val'], ['$$_posthog_breakdown_other_$$', 'some_val', 'another_val'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(e__pdi__person.`properties___$some_prop`, 'some_val'), 0), ifNull(equals(e__pdi__person.`properties___$some_prop`, 'another_val'), 0))) GROUP BY e__session.id, breakdown_value) @@ -4806,6 +4825,7 @@ FROM events AS e SAMPLE 1 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 2) @@ -4820,7 +4840,7 @@ FROM person WHERE equals(person.team_id, 2) GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), and(equals(e.event, '$pageview'), or(ifNull(equals(e__pdi__person.properties___name, 'person-1'), 0), ifNull(equals(e__pdi__person.properties___name, 'person-2'), 0))), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0)) GROUP BY timestamp, actor_id) AS e WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) @@ -4862,6 +4882,7 @@ FROM events AS e SAMPLE 1 INNER JOIN (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, + argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, person_distinct_id2.distinct_id AS distinct_id FROM person_distinct_id2 WHERE equals(person_distinct_id2.team_id, 2) @@ -4876,7 +4897,7 @@ FROM person WHERE equals(person.team_id, 2) GROUP BY person.id - HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.person_id, e__pdi__person.id) + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) WHERE and(equals(e.team_id, 2), and(equals(e.event, '$pageview'), or(ifNull(equals(e__pdi__person.properties___name, 'person-1'), 0), ifNull(equals(e__pdi__person.properties___name, 'person-2'), 0))), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0)) GROUP BY timestamp, actor_id) AS e WHERE and(ifNull(lessOrEquals(e.timestamp, plus(d.timestamp, toIntervalDay(1))), 0), ifNull(greater(e.timestamp, minus(d.timestamp, toIntervalDay(6))), 0)) diff --git a/posthog/warehouse/api/view_link.py b/posthog/warehouse/api/view_link.py index 27315e5a7c61d..38c35159939f4 100644 --- a/posthog/warehouse/api/view_link.py +++ b/posthog/warehouse/api/view_link.py @@ -65,15 +65,10 @@ def _validate_join_key(self, join_key: Optional[str], table: Optional[str], team database = create_hogql_database(team_id) try: - table_instance = database.get_table(table) + database.get_table(table) except Exception: raise serializers.ValidationError(f"Invalid table: {table}") - try: - table_instance.fields[join_key] - except Exception: - raise serializers.ValidationError(f"Invalid join key: {join_key}") - return diff --git a/posthog/warehouse/models/join.py b/posthog/warehouse/models/join.py index 966e7841a7d06..c06e4179d6939 100644 --- a/posthog/warehouse/models/join.py +++ b/posthog/warehouse/models/join.py @@ -6,6 +6,7 @@ from posthog.hogql.ast import SelectQuery from posthog.hogql.context import HogQLContext from posthog.hogql.errors import HogQLException +from posthog.hogql.parser import parse_expr from posthog.models.team import Team from posthog.models.utils import CreatedMetaFields, DeletedMetaFields, UUIDModel from posthog.warehouse.models.datawarehouse_saved_query import DataWarehouseSavedQuery @@ -53,15 +54,30 @@ def _join_function( if not requested_fields: raise HogQLException(f"No fields requested from {to_table}") + left = parse_expr(self.source_table_key) + if not isinstance(left, ast.Field): + raise HogQLException("Data Warehouse Join HogQL expression should be a Field node") + left.chain = [from_table, *left.chain] + + right = parse_expr(self.joining_table_key) + if not isinstance(right, ast.Field): + raise HogQLException("Data Warehouse Join HogQL expression should be a Field node") + right.chain = [to_table, *right.chain] + join_expr = ast.JoinExpr( - table=ast.Field(chain=[self.joining_table_name]), + table=ast.SelectQuery( + select=[ + ast.Alias(alias=alias, expr=ast.Field(chain=chain)) for alias, chain in requested_fields.items() + ], + select_from=ast.JoinExpr(table=ast.Field(chain=[self.joining_table_name])), + ), join_type="LEFT JOIN", alias=to_table, constraint=ast.JoinConstraint( expr=ast.CompareOperation( op=ast.CompareOperationOp.Eq, - left=ast.Field(chain=[from_table, self.source_table_key]), - right=ast.Field(chain=[to_table, self.joining_table_key]), + left=left, + right=right, ) ), )