Skip to content

Commit

Permalink
fix: filtering out replays with no URL (#23032)
Browse files Browse the repository at this point in the history
Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
  • Loading branch information
pauldambra and github-actions[bot] authored Jun 18, 2024
1 parent d645459 commit f5dcb19
Show file tree
Hide file tree
Showing 6 changed files with 265 additions and 270 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -50,7 +50,7 @@
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) as session_persons_sub_query)
GROUP BY session_id
HAVING s.first_url is not null
HAVING 1=1
ORDER BY start_time DESC
LIMIT %(limit)s
OFFSET %(offset)s
Expand Down Expand Up @@ -107,7 +107,7 @@
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) as session_persons_sub_query)
GROUP BY session_id
HAVING s.first_url is not null
HAVING 1=1
ORDER BY start_time DESC
LIMIT %(limit)s
OFFSET %(offset)s
Expand Down Expand Up @@ -163,7 +163,7 @@
GROUP BY `$session_id`
HAVING 1=1) as session_events_sub_query)
GROUP BY session_id
HAVING s.first_url is not null
HAVING 1=1
ORDER BY start_time DESC
LIMIT %(limit)s
OFFSET %(offset)s
Expand Down Expand Up @@ -219,7 +219,7 @@
GROUP BY `$session_id`
HAVING 1=1) as session_events_sub_query)
GROUP BY session_id
HAVING s.first_url is not null
HAVING 1=1
ORDER BY start_time DESC
LIMIT %(limit)s
OFFSET %(offset)s
Expand Down Expand Up @@ -249,7 +249,7 @@
AND s.min_first_timestamp >= '2020-12-25 00:00:00'
AND s.min_first_timestamp <= '2021-01-01 13:46:23'
GROUP BY session_id
HAVING s.first_url is not null
HAVING 1=1
ORDER BY start_time DESC
LIMIT 51
OFFSET 0
Expand Down Expand Up @@ -292,7 +292,7 @@
GROUP BY `$session_id`
HAVING 1=1) as session_events_sub_query)
GROUP BY session_id
HAVING s.first_url is not null
HAVING 1=1
ORDER BY start_time DESC
LIMIT 51
OFFSET 0
Expand Down Expand Up @@ -322,7 +322,7 @@
AND s.min_first_timestamp >= '2020-12-25 00:00:00'
AND s.min_first_timestamp <= '2021-01-01 13:46:23'
GROUP BY session_id
HAVING s.first_url is not null
HAVING 1=1
ORDER BY start_time DESC
LIMIT 51
OFFSET 0
Expand Down Expand Up @@ -365,7 +365,7 @@
GROUP BY `$session_id`
HAVING 1=1) as session_events_sub_query)
GROUP BY session_id
HAVING s.first_url is not null
HAVING 1=1
ORDER BY start_time DESC
LIMIT 51
OFFSET 0
Expand Down Expand Up @@ -395,7 +395,7 @@
AND s.min_first_timestamp >= '2020-12-25 00:00:00'
AND s.min_first_timestamp <= '2021-01-01 13:46:23'
GROUP BY session_id
HAVING s.first_url is not null
HAVING 1=1
ORDER BY start_time DESC
LIMIT 51
OFFSET 0
Expand Down Expand Up @@ -438,7 +438,7 @@
GROUP BY `$session_id`
HAVING 1=1) as session_events_sub_query)
GROUP BY session_id
HAVING s.first_url is not null
HAVING 1=1
ORDER BY start_time DESC
LIMIT 51
OFFSET 0
Expand Down Expand Up @@ -468,7 +468,7 @@
AND s.min_first_timestamp >= '2020-12-25 00:00:00'
AND s.min_first_timestamp <= '2021-01-01 13:46:23'
GROUP BY session_id
HAVING s.first_url is not null
HAVING 1=1
ORDER BY start_time DESC
LIMIT 51
OFFSET 0
Expand Down Expand Up @@ -511,7 +511,7 @@
GROUP BY `$session_id`
HAVING 1=1) as session_events_sub_query)
GROUP BY session_id
HAVING s.first_url is not null
HAVING 1=1
ORDER BY start_time DESC
LIMIT 51
OFFSET 0
Expand Down Expand Up @@ -541,7 +541,7 @@
AND s.min_first_timestamp >= '2020-12-25 00:00:00'
AND s.min_first_timestamp <= '2021-01-01 13:46:23'
GROUP BY session_id
HAVING s.first_url is not null
HAVING 1=1
ORDER BY start_time DESC
LIMIT 51
OFFSET 0
Expand Down Expand Up @@ -592,7 +592,7 @@
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) as session_persons_sub_query)
GROUP BY session_id
HAVING s.first_url is not null
HAVING 1=1
ORDER BY start_time DESC
LIMIT 51
OFFSET 0
Expand Down Expand Up @@ -622,7 +622,7 @@
AND s.min_first_timestamp >= '2020-12-25 00:00:00'
AND s.min_first_timestamp <= '2021-01-01 13:46:23'
GROUP BY session_id
HAVING s.first_url is not null
HAVING 1=1
ORDER BY start_time DESC
LIMIT 51
OFFSET 0
Expand Down Expand Up @@ -673,7 +673,7 @@
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) as session_persons_sub_query)
GROUP BY session_id
HAVING s.first_url is not null
HAVING 1=1
ORDER BY start_time DESC
LIMIT 51
OFFSET 0
Expand Down Expand Up @@ -703,7 +703,7 @@
AND s.min_first_timestamp >= '2020-12-25 00:00:00'
AND s.min_first_timestamp <= '2021-01-01 13:46:23'
GROUP BY session_id
HAVING s.first_url is not null
HAVING 1=1
ORDER BY start_time DESC
LIMIT 51
OFFSET 0
Expand Down Expand Up @@ -754,7 +754,7 @@
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) as session_persons_sub_query)
GROUP BY session_id
HAVING s.first_url is not null
HAVING 1=1
ORDER BY start_time DESC
LIMIT 51
OFFSET 0
Expand Down Expand Up @@ -784,7 +784,7 @@
AND s.min_first_timestamp >= '2020-12-25 00:00:00'
AND s.min_first_timestamp <= '2021-01-01 13:46:23'
GROUP BY session_id
HAVING s.first_url is not null
HAVING 1=1
ORDER BY start_time DESC
LIMIT 51
OFFSET 0
Expand Down Expand Up @@ -835,7 +835,7 @@
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) as session_persons_sub_query)
GROUP BY session_id
HAVING s.first_url is not null
HAVING 1=1
ORDER BY start_time DESC
LIMIT 51
OFFSET 0
Expand Down Expand Up @@ -865,7 +865,7 @@
AND s.min_first_timestamp >= '2020-12-25 00:00:00'
AND s.min_first_timestamp <= '2021-01-01 13:46:23'
GROUP BY session_id
HAVING s.first_url is not null
HAVING 1=1
ORDER BY start_time DESC
LIMIT 51
OFFSET 0
Expand Down Expand Up @@ -916,7 +916,7 @@
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) as session_persons_sub_query)
GROUP BY session_id
HAVING s.first_url is not null
HAVING 1=1
ORDER BY start_time DESC
LIMIT 51
OFFSET 0
Expand Down Expand Up @@ -946,7 +946,7 @@
AND s.min_first_timestamp >= '2020-12-25 00:00:00'
AND s.min_first_timestamp <= '2021-01-01 13:46:23'
GROUP BY session_id
HAVING s.first_url is not null
HAVING 1=1
ORDER BY start_time DESC
LIMIT 51
OFFSET 0
Expand Down Expand Up @@ -997,7 +997,7 @@
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) as session_persons_sub_query)
GROUP BY session_id
HAVING s.first_url is not null
HAVING 1=1
ORDER BY start_time DESC
LIMIT 51
OFFSET 0
Expand Down Expand Up @@ -1027,7 +1027,7 @@
AND s.min_first_timestamp >= '2020-12-25 00:00:00'
AND s.min_first_timestamp <= '2021-01-01 13:46:23'
GROUP BY session_id
HAVING s.first_url is not null
HAVING 1=1
ORDER BY start_time DESC
LIMIT 51
OFFSET 0
Expand Down Expand Up @@ -1078,7 +1078,7 @@
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) as session_persons_sub_query)
GROUP BY session_id
HAVING s.first_url is not null
HAVING 1=1
ORDER BY start_time DESC
LIMIT 51
OFFSET 0
Expand Down Expand Up @@ -1108,7 +1108,7 @@
AND s.min_first_timestamp >= '2020-12-25 00:00:00'
AND s.min_first_timestamp <= '2021-01-01 13:46:23'
GROUP BY session_id
HAVING s.first_url is not null
HAVING 1=1
ORDER BY start_time DESC
LIMIT 51
OFFSET 0
Expand Down Expand Up @@ -1159,7 +1159,7 @@
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) as session_persons_sub_query)
GROUP BY session_id
HAVING s.first_url is not null
HAVING 1=1
ORDER BY start_time DESC
LIMIT 51
OFFSET 0
Expand Down Expand Up @@ -1204,7 +1204,7 @@
HAVING argMax(is_deleted, version) = 0
AND current_person_id = '00000000-0000-0000-0000-000000000000') as session_persons_sub_query)
GROUP BY session_id
HAVING s.first_url is not null
HAVING 1=1
ORDER BY start_time DESC
LIMIT 51
OFFSET 0
Expand Down Expand Up @@ -1249,7 +1249,7 @@
HAVING argMax(is_deleted, version) = 0
AND current_person_id = '00000000-0000-0000-0000-000000000000') as session_persons_sub_query)
GROUP BY session_id
HAVING s.first_url is not null
HAVING 1=1
ORDER BY start_time DESC
LIMIT 51
OFFSET 0
Expand Down Expand Up @@ -1294,7 +1294,7 @@
HAVING argMax(is_deleted, version) = 0
AND current_person_id = '00000000-0000-0000-0000-000000000000') as session_persons_sub_query)
GROUP BY session_id
HAVING s.first_url is not null
HAVING 1=1
ORDER BY start_time DESC
LIMIT 51
OFFSET 0
Expand Down Expand Up @@ -1339,7 +1339,7 @@
HAVING argMax(is_deleted, version) = 0
AND current_person_id = '00000000-0000-0000-0000-000000000000') as session_persons_sub_query)
GROUP BY session_id
HAVING s.first_url is not null
HAVING 1=1
ORDER BY start_time DESC
LIMIT 51
OFFSET 0
Expand Down Expand Up @@ -1384,7 +1384,7 @@
HAVING argMax(is_deleted, version) = 0
AND current_person_id = '00000000-0000-0000-0000-000000000000') as session_persons_sub_query)
GROUP BY session_id
HAVING s.first_url is not null
HAVING 1=1
ORDER BY start_time DESC
LIMIT 51
OFFSET 0
Expand Down Expand Up @@ -1429,7 +1429,7 @@
HAVING argMax(is_deleted, version) = 0
AND current_person_id = '00000000-0000-0000-0000-000000000000') as session_persons_sub_query)
GROUP BY session_id
HAVING s.first_url is not null
HAVING 1=1
ORDER BY start_time DESC
LIMIT 51
OFFSET 0
Expand Down Expand Up @@ -1474,7 +1474,7 @@
HAVING argMax(is_deleted, version) = 0
AND current_person_id = '00000000-0000-0000-0000-000000000000') as session_persons_sub_query)
GROUP BY session_id
HAVING s.first_url is not null
HAVING 1=1
ORDER BY start_time DESC
LIMIT 51
OFFSET 0
Expand Down Expand Up @@ -1519,7 +1519,7 @@
HAVING argMax(is_deleted, version) = 0
AND current_person_id = '00000000-0000-0000-0000-000000000000') as session_persons_sub_query)
GROUP BY session_id
HAVING s.first_url is not null
HAVING 1=1
ORDER BY start_time DESC
LIMIT 51
OFFSET 0
Expand Down Expand Up @@ -1562,7 +1562,7 @@
GROUP BY `$session_id`
HAVING 1=1) as session_events_sub_query)
GROUP BY session_id
HAVING s.first_url is not null
HAVING 1=1
ORDER BY start_time DESC
LIMIT 51
OFFSET 0
Expand Down Expand Up @@ -1605,7 +1605,7 @@
GROUP BY `$session_id`
HAVING 1=1) as session_events_sub_query)
GROUP BY session_id
HAVING s.first_url is not null
HAVING 1=1
ORDER BY start_time DESC
LIMIT 51
OFFSET 0
Expand Down Expand Up @@ -1648,7 +1648,7 @@
GROUP BY `$session_id`
HAVING 1=1) as session_events_sub_query)
GROUP BY session_id
HAVING s.first_url is not null
HAVING 1=1
ORDER BY start_time DESC
LIMIT 51
OFFSET 0
Expand Down Expand Up @@ -1691,7 +1691,7 @@
GROUP BY `$session_id`
HAVING 1=1) as session_events_sub_query)
GROUP BY session_id
HAVING s.first_url is not null
HAVING 1=1
ORDER BY start_time DESC
LIMIT 51
OFFSET 0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -257,12 +257,7 @@ def _where_predicates(self) -> Union[ast.And, ast.Or]:
return ast.And(exprs=exprs)

def _having_predicates(self) -> ast.And | Constant:
exprs: list[ast.Expr] = [
# a missing first url indicates delayed or incomplete ingestion and we can ignore those
ast.CompareOperation(
op=ast.CompareOperationOp.NotEq, left=ast.Field(chain=["first_url"]), right=ast.Constant(value=None)
)
]
exprs: list[ast.Expr] = []

if self._filter.recording_duration_filter:
op = (
Expand All @@ -278,7 +273,7 @@ def _having_predicates(self) -> ast.And | Constant:
),
)

return ast.And(exprs=exprs)
return ast.And(exprs=exprs) if exprs else ast.Constant(value=True)

def _strip_person_and_event_properties(self, property_group: PropertyGroup) -> PropertyGroup | None:
property_groups_to_keep = [
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -642,7 +642,7 @@ def ttl_days(self):
{provided_session_ids_clause}
{log_matching_session_ids_clause}
GROUP BY session_id
HAVING s.first_url is not null {duration_clause} {console_log_clause}
HAVING 1=1 {duration_clause} {console_log_clause}
{order_by_clause}
LIMIT %(limit)s OFFSET %(offset)s
"""
Expand Down
Loading

0 comments on commit f5dcb19

Please sign in to comment.