Skip to content

Commit

Permalink
fix: Don't use mat cols for session recordings events query (#19491)
Browse files Browse the repository at this point in the history
* fix: Don't use mat cols for session recordings events query

* test fix

* Update query snapshots

* Update query snapshots

* add comment for the future traveller

* fix

---------

Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
Co-authored-by: Paul D'Ambra <[email protected]>
  • Loading branch information
3 people authored Jan 4, 2024
1 parent e3f5be2 commit d16aab4
Show file tree
Hide file tree
Showing 4 changed files with 13 additions and 5 deletions.
3 changes: 2 additions & 1 deletion posthog/queries/event_query/event_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -259,6 +259,7 @@ def _get_prop_groups(
person_properties_mode=PersonPropertiesMode.USING_PERSON_PROPERTIES_COLUMN,
person_id_joined_alias="person_id",
prepend="global",
allow_denormalized_props=True,
) -> Tuple[str, Dict]:
if not prop_group:
return "", {}
Expand All @@ -276,7 +277,7 @@ def _get_prop_groups(
property_group=props_to_filter,
prepend=prepend,
table_name=self.EVENT_TABLE_ALIAS,
allow_denormalized_props=True,
allow_denormalized_props=allow_denormalized_props,
person_properties_mode=person_properties_mode,
person_id_joined_alias=person_id_joined_alias,
hogql_context=self._filter.hogql_context,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -118,7 +118,7 @@ def _get_events_timestamp_clause(self) -> Tuple[str, Dict[str, Any]]:

@staticmethod
def _get_console_log_clause(
console_logs_filter: List[Literal["error", "warn", "log"]]
console_logs_filter: List[Literal["error", "warn", "log"]],
) -> Tuple[str, Dict[str, Any]]:
return (
(
Expand Down Expand Up @@ -425,6 +425,11 @@ def get_query(self, select_event_ids: bool = False) -> Tuple[str, Dict[str, Any]
],
),
person_id_joined_alias=f"{self.DISTINCT_ID_TABLE_ALIAS}.person_id",
# TRICKY: we saw unusual memory usage behavior in EU clickhouse cluster
# when allowing use of denormalized properties in this query
# it is likely this can be returned to the default of True in future
# but would need careful monitoring
allow_denormalized_props=False,
)

(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2574,7 +2574,7 @@
AND timestamp <= '2021-01-22 08:00:00'
WHERE notEmpty(`$session_id`)
AND (event = '$pageview')
AND (has(['false'], "mat_is_internal_user")
AND (has(['false'], replaceRegexpAll(JSONExtractRaw(e.properties, 'is_internal_user'), '^"|"$', ''))
AND ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$browser'), ''), 'null'), '^"|"$', ''), 'Chrome'), 0))
AND e.distinct_id in
(select distinct_id
Expand Down Expand Up @@ -3842,7 +3842,7 @@
AND timestamp >= '2021-01-13 12:00:00'
AND timestamp <= '2021-01-22 08:00:00'
WHERE notEmpty(`$session_id`)
AND (has(['false'], "mat_is_internal_user"))
AND (has(['false'], replaceRegexpAll(JSONExtractRaw(e.properties, 'is_internal_user'), '^"|"$', '')))
GROUP BY `$session_id`
HAVING 1=1) as session_events_sub_query)
GROUP BY session_id
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2698,7 +2698,9 @@ def test_event_filter_with_hogql_event_properties_test_accounts_excluded(self):
(session_recordings, _) = session_recording_list_instance.run()
self.assertEqual(len(session_recordings), 1)

@also_test_with_materialized_columns(event_properties=["is_internal_user"])
# TRICKY: we had to disable use of materialized columns for part of the query generation
# due to RAM usage issues on the EU cluster
@also_test_with_materialized_columns(event_properties=["is_internal_user"], verify_no_jsonextract=False)
@freeze_time("2021-01-21T20:00:00.000Z")
@snapshot_clickhouse_queries
def test_top_level_event_property_test_account_filter(self):
Expand Down

0 comments on commit d16aab4

Please sign in to comment.