Skip to content

Commit

Permalink
Web analytics queries and their tests working
Browse files Browse the repository at this point in the history
  • Loading branch information
robbie-c committed Jun 20, 2024
1 parent 92b8898 commit c823171
Show file tree
Hide file tree
Showing 4 changed files with 166 additions and 117 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -329,14 +329,14 @@ def test_join_with_events(self):
FROM
events
JOIN (SELECT
sessions.session_id AS session_id
raw_sessions.session_id AS session_id
FROM
sessions
raw_sessions
WHERE
and(equals(sessions.team_id, {self.team.id}), ifNull(greaterOrEquals(plus(toTimeZone(sessions.min_timestamp, %(hogql_val_0)s), toIntervalDay(3)), %(hogql_val_1)s), 0))
GROUP BY
sessions.session_id,
sessions.session_id) AS sessions ON equals(events.`$session_id`, sessions.session_id)
raw_sessions.session_id,
raw_sessions.session_id) AS sessions ON equals(events.`$session_id`, sessions.session_id)
WHERE
and(equals(events.team_id, {self.team.id}), greater(toTimeZone(events.timestamp, %(hogql_val_2)s), %(hogql_val_3)s))
GROUP BY
Expand All @@ -363,17 +363,17 @@ def test_union(self):
FROM
events
LEFT JOIN (SELECT
dateDiff(%(hogql_val_0)s, min(sessions.min_timestamp), max(sessions.max_timestamp)) AS `$session_duration`,
sessions.session_id AS session_id
dateDiff(%(hogql_val_0)s, min(raw_sessions.min_timestamp), max(raw_sessions.max_timestamp)) AS `$session_duration`,
raw_sessions.session_id_v7 AS session_id_v7
FROM
sessions
raw_sessions
WHERE
and(equals(sessions.team_id, {self.team.id}), ifNull(lessOrEquals(minus(toTimeZone(sessions.min_timestamp, %(hogql_val_1)s), toIntervalDay(3)), today()), 0))
and(equals(raw_sessions.team_id, {self.team.id}), ifNull(lessOrEquals(minus(toTimeZone(fromUnixTimestamp(intDiv(toUInt64(bitShiftRight(raw_sessions.session_id_v7, 80)), 1000)), %(hogql_val_1)s), toIntervalDay(3)), today()), 0))
GROUP BY
sessions.session_id,
sessions.session_id) AS events__session ON equals(events.`$session_id`, events__session.session_id)
raw_sessions.session_id_v7,
raw_sessions.session_id_v7) AS events__session ON equals(toUInt128(accurateCastOrNull(events.`$session_id`, %(hogql_val_2)s)), events__session.session_id_v7)
WHERE
and(equals(events.team_id, {self.team.id}), less(toTimeZone(events.timestamp, %(hogql_val_2)s), today()))
and(equals(events.team_id, 1785), less(toTimeZone(events.timestamp, %(hogql_val_3)s), today()))
LIMIT {MAX_SELECT_RETURNED_ROWS}"""
assert expected == actual

Expand Down
71 changes: 32 additions & 39 deletions posthog/hogql_queries/web_analytics/stats_table.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,10 +101,9 @@ def _to_main_query_with_session_properties(self) -> ast.SelectQuery:
SELECT
any(person_id) AS person_id,
count() AS filtered_pageview_count,
{breakdown_value} AS breakdown_value
{breakdown_value} AS breakdown_value,
session.session_id AS session_id
FROM events
JOIN sessions
ON events.`$session_id` = sessions.session_id
WHERE and(
timestamp >= {date_from},
timestamp < {date_to},
Expand All @@ -113,7 +112,7 @@ def _to_main_query_with_session_properties(self) -> ast.SelectQuery:
{session_properties},
{where_breakdown}
)
GROUP BY events.`$session_id`, breakdown_value
GROUP BY session_id, breakdown_value
)
GROUP BY "context.columns.breakdown_value"
ORDER BY "context.columns.visitors" DESC,
Expand Down Expand Up @@ -146,10 +145,9 @@ def to_entry_bounce_query(self) -> ast.SelectQuery:
any(person_id) AS person_id,
count() AS filtered_pageview_count,
{bounce_breakdown} AS breakdown_value,
any(sessions.$is_bounce) AS is_bounce
any(session.$is_bounce) AS is_bounce,
session.session_id AS session_id
FROM events
JOIN sessions
ON events.`$session_id` = sessions.session_id
WHERE and(
timestamp >= {date_from},
timestamp < {date_to},
Expand All @@ -158,7 +156,7 @@ def to_entry_bounce_query(self) -> ast.SelectQuery:
{session_properties},
{where_breakdown}
)
GROUP BY events.`$session_id`, breakdown_value
GROUP BY session_id, breakdown_value
)
GROUP BY "context.columns.breakdown_value"
ORDER BY "context.columns.visitors" DESC,
Expand Down Expand Up @@ -200,10 +198,9 @@ def to_path_scroll_bounce_query(self) -> ast.SelectQuery:
SELECT
any(person_id) AS person_id,
count() AS filtered_pageview_count,
{breakdown_value} AS breakdown_value
{breakdown_value} AS breakdown_value,
session.session_id AS session_id
FROM events
JOIN sessions
ON events.`$session_id` = sessions.session_id
WHERE and(
timestamp >= {date_from},
timestamp < {date_to},
Expand All @@ -212,7 +209,7 @@ def to_path_scroll_bounce_query(self) -> ast.SelectQuery:
{session_properties},
breakdown_value IS NOT NULL
)
GROUP BY events.`$session_id`, breakdown_value
GROUP BY session_id, breakdown_value
)
GROUP BY breakdown_value
) AS counts
Expand All @@ -223,10 +220,9 @@ def to_path_scroll_bounce_query(self) -> ast.SelectQuery:
FROM (
SELECT
{bounce_breakdown_value} AS breakdown_value, -- use $entry_pathname to find the bounce rate for sessions that started on this pathname
any(session.`$is_bounce`) AS is_bounce
any(session.`$is_bounce`) AS is_bounce,
session.session_id AS session_id
FROM events
JOIN sessions
ON events.`$session_id` = sessions.session_id
WHERE and(
timestamp >= {date_from},
timestamp < {date_to},
Expand All @@ -235,7 +231,7 @@ def to_path_scroll_bounce_query(self) -> ast.SelectQuery:
{session_properties},
breakdown_value IS NOT NULL
)
GROUP BY events.`$session_id`, breakdown_value
GROUP BY session_id, breakdown_value
)
GROUP BY breakdown_value
) AS bounce
Expand All @@ -254,10 +250,9 @@ def to_path_scroll_bounce_query(self) -> ast.SelectQuery:
ELSE 0
END
) AS scroll_gt80_percentage_state,
avgState(toFloat(events.properties.`$prev_pageview_max_scroll_percentage`)) as average_scroll_percentage_state
avgState(toFloat(events.properties.`$prev_pageview_max_scroll_percentage`)) as average_scroll_percentage_state,
session.session_id AS session_id
FROM events
JOIN sessions
ON events.`$session_id` = sessions.session_id
WHERE and(
timestamp >= {date_from},
timestamp < {date_to},
Expand All @@ -266,7 +261,7 @@ def to_path_scroll_bounce_query(self) -> ast.SelectQuery:
{session_properties},
breakdown_value IS NOT NULL
)
GROUP BY events.`$session_id`, breakdown_value
GROUP BY session_id, breakdown_value
)
GROUP BY breakdown_value
) AS scroll
Expand Down Expand Up @@ -310,10 +305,9 @@ def to_path_bounce_query(self) -> ast.SelectQuery:
SELECT
any(person_id) AS person_id,
count() AS filtered_pageview_count,
{breakdown_value} AS breakdown_value
{breakdown_value} AS breakdown_value,
session.session_id AS session_id
FROM events
JOIN sessions
ON events.`$session_id` = sessions.session_id
WHERE and(
timestamp >= {date_from},
timestamp < {date_to},
Expand All @@ -322,7 +316,7 @@ def to_path_bounce_query(self) -> ast.SelectQuery:
{session_properties},
{where_breakdown}
)
GROUP BY events.`$session_id`, breakdown_value
GROUP BY session_id, breakdown_value
)
GROUP BY breakdown_value
) as counts
Expand All @@ -333,10 +327,9 @@ def to_path_bounce_query(self) -> ast.SelectQuery:
FROM (
SELECT
{bounce_breakdown_value} AS breakdown_value, -- use $entry_pathname to find the bounce rate for sessions that started on this pathname
any(session.`$is_bounce`) AS is_bounce
any(session.`$is_bounce`) AS is_bounce,
session.session_id AS session_id
FROM events
JOIN sessions
ON events.`$session_id` = sessions.session_id
WHERE and(
timestamp >= {date_from},
timestamp < {date_to},
Expand All @@ -345,7 +338,7 @@ def to_path_bounce_query(self) -> ast.SelectQuery:
{session_properties},
breakdown_value IS NOT NULL
)
GROUP BY events.`$session_id`, breakdown_value
GROUP BY session_id, breakdown_value
)
GROUP BY breakdown_value
) as bounce
Expand Down Expand Up @@ -409,7 +402,7 @@ def _session_properties(self) -> ast.Expr:
properties = [
p for p in self.query.properties + self._test_account_filters if get_property_type(p) == "session"
]
return property_to_expr(properties, team=self.team, scope="session")
return property_to_expr(properties, team=self.team, scope="event")

def _all_properties(self) -> ast.Expr:
properties = self.query.properties + self._test_account_filters
Expand Down Expand Up @@ -456,23 +449,23 @@ def _counts_breakdown_value(self):
case WebStatsBreakdown.PAGE:
return self._apply_path_cleaning(ast.Field(chain=["events", "properties", "$pathname"]))
case WebStatsBreakdown.INITIAL_PAGE:
return self._apply_path_cleaning(ast.Field(chain=["sessions", "$entry_pathname"]))
return self._apply_path_cleaning(ast.Field(chain=["session", "$entry_pathname"]))
case WebStatsBreakdown.EXIT_PAGE:
return self._apply_path_cleaning(ast.Field(chain=["sessions", "$exit_pathname"]))
return self._apply_path_cleaning(ast.Field(chain=["session", "$exit_pathname"]))
case WebStatsBreakdown.INITIAL_REFERRING_DOMAIN:
return ast.Field(chain=["sessions", "$entry_referring_domain"])
return ast.Field(chain=["session", "$entry_referring_domain"])
case WebStatsBreakdown.INITIAL_UTM_SOURCE:
return ast.Field(chain=["sessions", "$entry_utm_source"])
return ast.Field(chain=["session", "$entry_utm_source"])
case WebStatsBreakdown.INITIAL_UTM_CAMPAIGN:
return ast.Field(chain=["sessions", "$entry_utm_campaign"])
return ast.Field(chain=["session", "$entry_utm_campaign"])
case WebStatsBreakdown.INITIAL_UTM_MEDIUM:
return ast.Field(chain=["sessions", "$entry_utm_medium"])
return ast.Field(chain=["session", "$entry_utm_medium"])
case WebStatsBreakdown.INITIAL_UTM_TERM:
return ast.Field(chain=["sessions", "$entry_utm_term"])
return ast.Field(chain=["session", "$entry_utm_term"])
case WebStatsBreakdown.INITIAL_UTM_CONTENT:
return ast.Field(chain=["sessions", "$entry_utm_content"])
return ast.Field(chain=["session", "$entry_utm_content"])
case WebStatsBreakdown.INITIAL_CHANNEL_TYPE:
return ast.Field(chain=["sessions", "$channel_type"])
return ast.Field(chain=["session", "$channel_type"])
case WebStatsBreakdown.BROWSER:
return ast.Field(chain=["properties", "$browser"])
case WebStatsBreakdown.OS:
Expand Down Expand Up @@ -515,7 +508,7 @@ def _scroll_prev_pathname_breakdown(self):
return self._apply_path_cleaning(ast.Field(chain=["events", "properties", "$prev_pageview_pathname"]))

def _bounce_entry_pathname_breakdown(self):
return self._apply_path_cleaning(ast.Field(chain=["sessions", "$entry_pathname"]))
return self._apply_path_cleaning(ast.Field(chain=["session", "$entry_pathname"]))

def _apply_path_cleaning(self, path_expr: ast.Expr) -> ast.Expr:
if not self.query.doPathCleaning or not self.team.path_cleaning_filters:
Expand Down
Loading

0 comments on commit c823171

Please sign in to comment.