Skip to content

Commit

Permalink
fix: replay events table had duplicate columns (#19390)
Browse files Browse the repository at this point in the history
  • Loading branch information
pauldambra authored Dec 19, 2023
1 parent faf3dd6 commit a246a8c
Show file tree
Hide file tree
Showing 3 changed files with 19 additions and 29 deletions.
8 changes: 7 additions & 1 deletion posthog/hogql/database/schema/session_replay_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,8 @@
)
from posthog.schema import HogQLQueryModifiers

RAW_ONLY_FIELDS = ["min_first_timestamp", "max_last_timestamp"]

SESSION_REPLAY_EVENTS_COMMON_FIELDS: Dict[str, FieldOrTable] = {
"session_id": StringDatabaseField(name="session_id"),
"team_id": IntegerDatabaseField(name="team_id"),
Expand Down Expand Up @@ -88,6 +90,10 @@ def select_from_session_replay_events_table(requested_fields: Dict[str, List[str
group_by_fields: List[ast.Expr] = []

for name, chain in requested_fields.items():
if name in RAW_ONLY_FIELDS:
# these fields are accounted for by start_time and end_time, so can be skipped in the "not raw" table
continue

if name in aggregate_fields:
select_fields.append(ast.Alias(alias=name, expr=aggregate_fields[name]))
else:
Expand All @@ -103,7 +109,7 @@ def select_from_session_replay_events_table(requested_fields: Dict[str, List[str

class SessionReplayEventsTable(LazyTable):
fields: Dict[str, FieldOrTable] = {
**SESSION_REPLAY_EVENTS_COMMON_FIELDS,
**{k: v for k, v in SESSION_REPLAY_EVENTS_COMMON_FIELDS.items() if k not in RAW_ONLY_FIELDS},
"start_time": DateTimeDatabaseField(name="start_time"),
"end_time": DateTimeDatabaseField(name="end_time"),
"first_url": StringDatabaseField(name="first_url"),
Expand Down
16 changes: 0 additions & 16 deletions posthog/hogql/database/test/__snapshots__/test_database.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -320,14 +320,6 @@
"key": "distinct_id",
"type": "string"
},
{
"key": "min_first_timestamp",
"type": "datetime"
},
{
"key": "max_last_timestamp",
"type": "datetime"
},
{
"key": "first_url",
"type": "string"
Expand Down Expand Up @@ -1089,14 +1081,6 @@
"key": "distinct_id",
"type": "string"
},
{
"key": "min_first_timestamp",
"type": "datetime"
},
{
"key": "max_last_timestamp",
"type": "datetime"
},
{
"key": "first_url",
"type": "string"
Expand Down
24 changes: 12 additions & 12 deletions posthog/hogql/test/test_printer.py
Original file line number Diff line number Diff line change
Expand Up @@ -966,7 +966,7 @@ def test_functions_expecting_datetime_arg(self):
def test_field_nullable_equals(self):
generated_sql_statements1 = self._select(
"SELECT "
"min_first_timestamp = toStartOfMonth(now()), "
"start_time = toStartOfMonth(now()), "
"now() = now(), "
"1 = now(), "
"now() = 1, "
Expand All @@ -980,7 +980,7 @@ def test_field_nullable_equals(self):
)
generated_sql_statements2 = self._select(
"SELECT "
"equals(min_first_timestamp, toStartOfMonth(now())), "
"equals(start_time, toStartOfMonth(now())), "
"equals(now(), now()), "
"equals(1, now()), "
"equals(now(), 1), "
Expand All @@ -995,10 +995,10 @@ def test_field_nullable_equals(self):
assert generated_sql_statements1 == generated_sql_statements2
assert generated_sql_statements1 == (
f"SELECT "
# min_first_timestamp = toStartOfMonth(now())
# start_time = toStartOfMonth(now())
# (the return of toStartOfMonth() is treated as "potentially nullable" since we yet have full typing support)
f"ifNull(equals(toTimeZone(session_replay_events.min_first_timestamp, %(hogql_val_0)s), toStartOfMonth(now64(6, %(hogql_val_1)s))), "
f"isNull(toTimeZone(session_replay_events.min_first_timestamp, %(hogql_val_0)s)) and isNull(toStartOfMonth(now64(6, %(hogql_val_1)s)))), "
f"ifNull(equals(toTimeZone(session_replay_events.start_time, %(hogql_val_0)s), toStartOfMonth(now64(6, %(hogql_val_1)s))), "
f"isNull(toTimeZone(session_replay_events.start_time, %(hogql_val_0)s)) and isNull(toStartOfMonth(now64(6, %(hogql_val_1)s)))), "
# now() = now() (also two nullable fields)
f"ifNull(equals(now64(6, %(hogql_val_2)s), now64(6, %(hogql_val_3)s)), isNull(now64(6, %(hogql_val_2)s)) and isNull(now64(6, %(hogql_val_3)s))), "
# 1 = now()
Expand All @@ -1018,27 +1018,27 @@ def test_field_nullable_equals(self):
# null = click_count
f"isNull(session_replay_events.click_count) "
# ...
f"FROM (SELECT session_replay_events.min_first_timestamp AS min_first_timestamp, sum(session_replay_events.click_count) AS click_count, sum(session_replay_events.keypress_count) AS keypress_count FROM session_replay_events WHERE equals(session_replay_events.team_id, {self.team.pk}) GROUP BY session_replay_events.min_first_timestamp) AS session_replay_events LIMIT 10000"
f"FROM (SELECT min(session_replay_events.min_first_timestamp) AS start_time, sum(session_replay_events.click_count) AS click_count, sum(session_replay_events.keypress_count) AS keypress_count FROM session_replay_events WHERE equals(session_replay_events.team_id, {self.team.pk})) AS session_replay_events LIMIT 10000"
)

def test_field_nullable_not_equals(self):
generated_sql1 = self._select(
"SELECT min_first_timestamp != toStartOfMonth(now()), now() != now(), 1 != now(), now() != 1, 1 != 1, "
"SELECT start_time != toStartOfMonth(now()), now() != now(), 1 != now(), now() != 1, 1 != 1, "
"click_count != 1, 1 != click_count, click_count != keypress_count, click_count != null, null != click_count "
"FROM session_replay_events"
)
generated_sql2 = self._select(
"SELECT notEquals(min_first_timestamp, toStartOfMonth(now())), notEquals(now(), now()), notEquals(1, now()), notEquals(now(), 1), notEquals(1, 1), "
"SELECT notEquals(start_time, toStartOfMonth(now())), notEquals(now(), now()), notEquals(1, now()), notEquals(now(), 1), notEquals(1, 1), "
"notEquals(click_count, 1), notEquals(1, click_count), notEquals(click_count, keypress_count), notEquals(click_count, null), notEquals(null, click_count) "
"FROM session_replay_events"
)
assert generated_sql1 == generated_sql2
assert generated_sql1 == (
f"SELECT "
# min_first_timestamp = toStartOfMonth(now())
# start_time = toStartOfMonth(now())
# (the return of toStartOfMonth() is treated as "potentially nullable" since we yet have full typing support)
f"ifNull(notEquals(toTimeZone(session_replay_events.min_first_timestamp, %(hogql_val_0)s), toStartOfMonth(now64(6, %(hogql_val_1)s))), "
f"isNotNull(toTimeZone(session_replay_events.min_first_timestamp, %(hogql_val_0)s)) or isNotNull(toStartOfMonth(now64(6, %(hogql_val_1)s)))), "
f"ifNull(notEquals(toTimeZone(session_replay_events.start_time, %(hogql_val_0)s), toStartOfMonth(now64(6, %(hogql_val_1)s))), "
f"isNotNull(toTimeZone(session_replay_events.start_time, %(hogql_val_0)s)) or isNotNull(toStartOfMonth(now64(6, %(hogql_val_1)s)))), "
# now() = now() (also two nullable fields)
f"ifNull(notEquals(now64(6, %(hogql_val_2)s), now64(6, %(hogql_val_3)s)), isNotNull(now64(6, %(hogql_val_2)s)) or isNotNull(now64(6, %(hogql_val_3)s))), "
# 1 = now()
Expand All @@ -1058,7 +1058,7 @@ def test_field_nullable_not_equals(self):
# null = click_count
f"isNotNull(session_replay_events.click_count) "
# ...
f"FROM (SELECT session_replay_events.min_first_timestamp AS min_first_timestamp, sum(session_replay_events.click_count) AS click_count, sum(session_replay_events.keypress_count) AS keypress_count FROM session_replay_events WHERE equals(session_replay_events.team_id, {self.team.pk}) GROUP BY session_replay_events.min_first_timestamp) AS session_replay_events LIMIT 10000"
f"FROM (SELECT min(session_replay_events.min_first_timestamp) AS start_time, sum(session_replay_events.click_count) AS click_count, sum(session_replay_events.keypress_count) AS keypress_count FROM session_replay_events WHERE equals(session_replay_events.team_id, {self.team.pk})) AS session_replay_events LIMIT 10000"
)

def test_field_nullable_like(self):
Expand Down

0 comments on commit a246a8c

Please sign in to comment.