Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(hogql): better handling of known types for function calls #24237

Closed
wants to merge 40 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
40 commits
Select commit Hold shift + click to select a range
ad70016
handle non-null calls for toString in concat
thmsobrmlr Aug 7, 2024
14c84f5
add to _is_nullable
thmsobrmlr Aug 7, 2024
8d40d2a
WIP
Gilbert09 Aug 8, 2024
f942c03
WIP
Gilbert09 Aug 8, 2024
7725cc6
Update query snapshots
github-actions[bot] Aug 8, 2024
8ca52db
Update query snapshots
github-actions[bot] Aug 8, 2024
f3d2027
More types
Gilbert09 Aug 8, 2024
065b7b6
Merge branch 'master' into hogql-null-handling
Gilbert09 Aug 8, 2024
7127859
Update query snapshots
github-actions[bot] Aug 8, 2024
4f43f2b
Update query snapshots
github-actions[bot] Aug 8, 2024
1c756e4
WIP command
Gilbert09 Aug 8, 2024
d600082
More typings
Gilbert09 Aug 9, 2024
cff06ca
Dont use ifNull for some funcs
Gilbert09 Aug 9, 2024
df29697
Update query snapshots
github-actions[bot] Aug 9, 2024
1d1cf05
Update query snapshots
github-actions[bot] Aug 9, 2024
c09741b
Update query snapshots
github-actions[bot] Aug 9, 2024
4848bb8
cleanup hogql_function_types
thmsobrmlr Aug 12, 2024
4ceafe1
update snapshots
thmsobrmlr Aug 12, 2024
70580cd
adaptations
thmsobrmlr Aug 12, 2024
3e4ca24
Update query snapshots
github-actions[bot] Aug 12, 2024
8458e7b
Update query snapshots
github-actions[bot] Aug 12, 2024
aa312c3
Update query snapshots
github-actions[bot] Aug 12, 2024
33c8991
more type checks
thmsobrmlr Aug 12, 2024
d035037
mypy
thmsobrmlr Aug 12, 2024
b7798a6
tests
thmsobrmlr Aug 12, 2024
73d5946
Update query snapshots
github-actions[bot] Aug 12, 2024
14089cb
cleanup
thmsobrmlr Aug 12, 2024
2e26114
annotate now()
thmsobrmlr Aug 12, 2024
e953333
add support for tuples and arrays
thmsobrmlr Aug 12, 2024
368d893
Update query snapshots
github-actions[bot] Aug 12, 2024
0736a0c
Update query snapshots
github-actions[bot] Aug 12, 2024
137cbd5
Update query snapshots
github-actions[bot] Aug 12, 2024
e79d336
Update query snapshots
github-actions[bot] Aug 12, 2024
c60f717
Update query snapshots
github-actions[bot] Aug 12, 2024
405b8af
non-nullable events fields
thmsobrmlr Aug 13, 2024
8f23c75
select query
thmsobrmlr Aug 13, 2024
3a8bf9e
Update query snapshots
github-actions[bot] Aug 13, 2024
b595e43
Update query snapshots
github-actions[bot] Aug 13, 2024
c4560cc
Update query snapshots
github-actions[bot] Aug 13, 2024
c1c8495
tests
thmsobrmlr Aug 13, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
4 changes: 2 additions & 2 deletions ee/clickhouse/queries/test/__snapshots__/test_lifecycle.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -353,7 +353,7 @@
AND event = '$pageview'
AND timestamp >= toDateTime(dateTrunc('day', toDateTime('2021-04-28 00:00:00', 'UTC'))) - INTERVAL 1 day
AND timestamp < toDateTime(dateTrunc('day', toDateTime('2021-05-05 23:59:59', 'UTC'))) + INTERVAL 1 day
AND (and(ifNull(like(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$current_url'), ''), 'null'), '^"|"$', ''), '%example%'), 0), 1))
AND (and(like(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$current_url'), ''), 'null'), '^"|"$', ''), '%example%'), 1))
GROUP BY pdi.person_id)
GROUP BY start_of_period,
status)
Expand Down Expand Up @@ -427,7 +427,7 @@
AND event = '$pageview'
AND timestamp >= toDateTime(dateTrunc('day', toDateTime('2021-04-28 00:00:00', 'UTC'))) - INTERVAL 1 day
AND timestamp < toDateTime(dateTrunc('day', toDateTime('2021-05-05 23:59:59', 'UTC'))) + INTERVAL 1 day
AND (and(ifNull(like(nullIf(nullIf(events.`mat_$current_url`, ''), 'null'), '%example%'), 0), 1))
AND (and(like(nullIf(nullIf(events.`mat_$current_url`, ''), 'null'), '%example%'), 1))
GROUP BY pdi.person_id)
GROUP BY start_of_period,
status)
Expand Down

Large diffs are not rendered by default.

18 changes: 0 additions & 18 deletions posthog/api/test/__snapshots__/test_insight.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -1631,24 +1631,6 @@
LIMIT 21
'''
# ---
# name: TestInsight.test_listing_insights_does_not_nplus1.30
'''
SELECT "posthog_taggeditem"."id",
"posthog_taggeditem"."tag_id",
"posthog_taggeditem"."dashboard_id",
"posthog_taggeditem"."insight_id",
"posthog_taggeditem"."event_definition_id",
"posthog_taggeditem"."property_definition_id",
"posthog_taggeditem"."action_id",
"posthog_taggeditem"."feature_flag_id"
FROM "posthog_taggeditem"
WHERE "posthog_taggeditem"."insight_id" IN (1,
2,
3,
4,
5 /* ... */)
'''
# ---
# name: TestInsight.test_listing_insights_does_not_nplus1.4
'''
SELECT "posthog_team"."id",
Expand Down
24 changes: 12 additions & 12 deletions posthog/api/test/__snapshots__/test_query.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -445,12 +445,12 @@
(SELECT person.id AS id,
replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', '') AS properties___email
FROM person
WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version),
(SELECT person.id AS id, max(person.version) AS version
FROM person
WHERE equals(person.team_id, 2)
GROUP BY person.id
HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.events__pdi___person_id, events__pdi__person.id)
WHERE and(equals(person.team_id, 2), in(tuple(person.id, person.version),
(SELECT person.id AS id, max(person.version) AS version
FROM person
WHERE equals(person.team_id, 2)
GROUP BY person.id
HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0))))) SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.events__pdi___person_id, events__pdi__person.id)
WHERE and(equals(events.team_id, 2), ifNull(equals(events__pdi__person.properties___email, '[email protected]'), 0), less(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-01-10 12:14:05.000000', 6, 'UTC')), greater(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-01-09 12:14:00.000000', 6, 'UTC')))
ORDER BY events.event ASC
LIMIT 101
Expand Down Expand Up @@ -484,12 +484,12 @@
(SELECT person.id AS id,
nullIf(nullIf(person.pmat_email, ''), 'null') AS properties___email
FROM person
WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version),
(SELECT person.id AS id, max(person.version) AS version
FROM person
WHERE equals(person.team_id, 2)
GROUP BY person.id
HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.events__pdi___person_id, events__pdi__person.id)
WHERE and(equals(person.team_id, 2), in(tuple(person.id, person.version),
(SELECT person.id AS id, max(person.version) AS version
FROM person
WHERE equals(person.team_id, 2)
GROUP BY person.id
HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0))))) SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.events__pdi___person_id, events__pdi__person.id)
WHERE and(equals(events.team_id, 2), ifNull(equals(events__pdi__person.properties___email, '[email protected]'), 0), less(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-01-10 12:14:05.000000', 6, 'UTC')), greater(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-01-09 12:14:00.000000', 6, 'UTC')))
ORDER BY events.event ASC
LIMIT 101
Expand Down
22 changes: 20 additions & 2 deletions posthog/hogql/ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,8 +271,12 @@ def resolve_column_constant_type(self, name: str, context: HogQLContext) -> "Con
return field.resolve_constant_type(context)

def resolve_constant_type(self, context: HogQLContext) -> "ConstantType":
# Used only for resolving the constant type of a `ast.Lambda` node or `SELECT 1` query
return UnknownType()
columns = list(self.columns.values())
if len(columns) == 1:
return columns[0].resolve_constant_type(context)
return TupleType(
item_types=[column.resolve_constant_type(context) for column in self.columns.values()],
)


@dataclass(kw_only=True)
Expand Down Expand Up @@ -435,6 +439,20 @@ def print_type(self) -> str:
return "Tuple"


AnyConstantType = (
StringType
| BooleanType
| DateType
| DateTimeType
| UUIDType
| ArrayType
| TupleType
| UnknownType
| IntegerType
| FloatType
)


@dataclass(kw_only=True)
class CallType(Type):
name: str
Expand Down
1 change: 1 addition & 0 deletions posthog/hogql/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ class CTE(Expr):
class ConstantType(Type):
data_type: ConstantDataType
nullable: bool = field(default=True)
is_timezone_type: bool = field(default=False)

def resolve_constant_type(self, context: "HogQLContext") -> "ConstantType":
return self
Expand Down
38 changes: 19 additions & 19 deletions posthog/hogql/database/schema/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,16 +54,16 @@ def to_printed_hogql(self):

class EventsTable(Table):
fields: dict[str, FieldOrTable] = {
"uuid": StringDatabaseField(name="uuid"),
"event": StringDatabaseField(name="event"),
"properties": StringJSONDatabaseField(name="properties"),
"timestamp": DateTimeDatabaseField(name="timestamp"),
"team_id": IntegerDatabaseField(name="team_id"),
"distinct_id": StringDatabaseField(name="distinct_id"),
"elements_chain": StringDatabaseField(name="elements_chain"),
"created_at": DateTimeDatabaseField(name="created_at"),
"$session_id": StringDatabaseField(name="$session_id"),
"$window_id": StringDatabaseField(name="$window_id"),
"uuid": StringDatabaseField(name="uuid", nullable=False),
"event": StringDatabaseField(name="event", nullable=False),
"properties": StringJSONDatabaseField(name="properties", nullable=False),
"timestamp": DateTimeDatabaseField(name="timestamp", nullable=False),
"team_id": IntegerDatabaseField(name="team_id", nullable=False),
"distinct_id": StringDatabaseField(name="distinct_id", nullable=False),
"elements_chain": StringDatabaseField(name="elements_chain", nullable=False),
"created_at": DateTimeDatabaseField(name="created_at", nullable=False),
"$session_id": StringDatabaseField(name="$session_id", nullable=False),
"$window_id": StringDatabaseField(name="$window_id", nullable=False),
# Lazy table that adds a join to the persons table
"pdi": LazyJoin(
from_field=["distinct_id"],
Expand All @@ -80,31 +80,31 @@ class EventsTable(Table):
# These are swapped out if the user has PoE enabled
"person": FieldTraverser(chain=["pdi", "person"]),
"person_id": FieldTraverser(chain=["pdi", "person_id"]),
"$group_0": StringDatabaseField(name="$group_0"),
"$group_0": StringDatabaseField(name="$group_0", nullable=False),
"group_0": LazyJoin(
from_field=["$group_0"],
join_table=GroupsTable(),
join_function=join_with_group_n_table(0),
),
"$group_1": StringDatabaseField(name="$group_1"),
"$group_1": StringDatabaseField(name="$group_1", nullable=False),
"group_1": LazyJoin(
from_field=["$group_1"],
join_table=GroupsTable(),
join_function=join_with_group_n_table(1),
),
"$group_2": StringDatabaseField(name="$group_2"),
"$group_2": StringDatabaseField(name="$group_2", nullable=False),
"group_2": LazyJoin(
from_field=["$group_2"],
join_table=GroupsTable(),
join_function=join_with_group_n_table(2),
),
"$group_3": StringDatabaseField(name="$group_3"),
"$group_3": StringDatabaseField(name="$group_3", nullable=False),
"group_3": LazyJoin(
from_field=["$group_3"],
join_table=GroupsTable(),
join_function=join_with_group_n_table(3),
),
"$group_4": StringDatabaseField(name="$group_4"),
"$group_4": StringDatabaseField(name="$group_4", nullable=False),
"group_4": LazyJoin(
from_field=["$group_4"],
join_table=GroupsTable(),
Expand All @@ -115,10 +115,10 @@ class EventsTable(Table):
join_table=SessionsTableV1(),
join_function=join_events_table_to_sessions_table,
),
"elements_chain_href": StringDatabaseField(name="elements_chain_href"),
"elements_chain_texts": StringArrayDatabaseField(name="elements_chain_texts"),
"elements_chain_ids": StringArrayDatabaseField(name="elements_chain_ids"),
"elements_chain_elements": StringArrayDatabaseField(name="elements_chain_elements"),
"elements_chain_href": StringDatabaseField(name="elements_chain_href", nullable=False),
"elements_chain_texts": StringArrayDatabaseField(name="elements_chain_texts", nullable=False),
"elements_chain_ids": StringArrayDatabaseField(name="elements_chain_ids", nullable=False),
"elements_chain_elements": StringArrayDatabaseField(name="elements_chain_elements", nullable=False),
}

def to_printed_clickhouse(self, context):
Expand Down
Loading
Loading