Skip to content

Commit

Permalink
feat(web-analytics): Fix top pages sql and add some extra insights (#…
Browse files Browse the repository at this point in the history
…17850)

* Fix top pages SQL

* Use person id for now

* Add limits to web analytics queries

* Add some trends to web analytics

* Tweak query date ranges and formatting

* Consistently use person_id for now
  • Loading branch information
robbie-c authored Oct 9, 2023
1 parent 7bb1cef commit aade1e4
Show file tree
Hide file tree
Showing 6 changed files with 92 additions and 10 deletions.
59 changes: 59 additions & 0 deletions frontend/src/scenes/web-analytics/webAnalyticsLogic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ import { actions, connect, kea, listeners, path, reducers, selectors, sharedList

import type { webAnalyticsLogicType } from './webAnalyticsLogicType'
import { NodeKind, QuerySchema } from '~/queries/schema'
import { BaseMathType, ChartDisplayType } from '~/types'

interface Layout {
colSpan?: number
Expand Down Expand Up @@ -59,6 +60,64 @@ export const webAnalyticsLogic = kea<webAnalyticsLogicType>([
},
},
},
{
layout: {
colSpan: 6,
},
query: {
kind: NodeKind.InsightVizNode,
source: {
kind: NodeKind.TrendsQuery,
dateRange: {
date_from: '-7d',
date_to: '-1d',
},
interval: 'day',
series: [
{
event: '$pageview',
kind: NodeKind.EventsNode,
math: BaseMathType.UniqueUsers,
name: '$pageview',
},
],
trendsFilter: {
compare: true,
display: ChartDisplayType.ActionsLineGraph,
},
filterTestAccounts: true,
},
},
},
{
layout: {
colSpan: 6,
},
query: {
kind: NodeKind.InsightVizNode,
source: {
kind: NodeKind.TrendsQuery,
breakdown: {
breakdown: '$geoip_country_code',
breakdown_type: 'person',
},
dateRange: {
date_from: '-7d',
},
series: [
{
event: '$pageview',
kind: NodeKind.EventsNode,
math: BaseMathType.UniqueUsers,
},
],
trendsFilter: {
display: ChartDisplayType.WorldMap,
},
filterTestAccounts: true,
},
},
},
],
],
}),
Expand Down
15 changes: 13 additions & 2 deletions posthog/hogql_queries/web_analytics/ctes.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,10 +59,21 @@
"""

PATHNAME_CTE = """
SELECT
events.properties.`$pathname` AS pathname,
count() as total_pageviews,
uniq(events.person_id) as unique_visitors -- might want to use person id? have seen a small number of pages where unique > total
FROM
events
WHERE
(event = '$pageview')
AND events.timestamp >= now() - INTERVAL 7 DAY
GROUP BY pathname
"""

PATHNAME_SCROLL_CTE = """
SELECT
events.properties.`$prev_pageview_pathname` AS pathname,
countIf(events.event == '$pageview') as total_pageviews,
COUNT(DISTINCT events.properties.distinct_id) as unique_visitors, -- might want to use person id? have seen a small number of pages where unique > total
avg(CASE
WHEN toFloat(JSONExtractRaw(events.properties, '$prev_pageview_max_content_percentage')) IS NULL THEN NULL
WHEN toFloat(JSONExtractRaw(events.properties, '$prev_pageview_max_content_percentage')) > 0.8 THEN 100
Expand Down
4 changes: 2 additions & 2 deletions posthog/hogql_queries/web_analytics/overview_stats.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@ def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery:
overview_stats_query = parse_select(
"""
SELECT
uniq(if(timestamp >= {mid} AND timestamp < {end}, events.distinct_id, NULL)) AS current_week_unique_users,
uniq(if(timestamp >= {start} AND timestamp < {mid}, events.distinct_id, NULL)) AS previous_week_unique_users,
uniq(if(timestamp >= {mid} AND timestamp < {end}, events.person_id, NULL)) AS current_week_unique_users,
uniq(if(timestamp >= {start} AND timestamp < {mid}, events.person_id, NULL)) AS previous_week_unique_users,
uniq(if(timestamp >= {mid} AND timestamp < {end}, events.properties.$session_id, NULL)) AS current_week_unique_sessions,
uniq(if(timestamp >= {start} AND timestamp < {mid}, events.properties.$session_id, NULL)) AS previous_week_unique_sessions,
Expand Down
1 change: 1 addition & 0 deletions posthog/hogql_queries/web_analytics/top_clicks.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery:
GROUP BY
el_text
ORDER BY total_clicks DESC
LIMIT 10
""",
timings=self.timings,
)
Expand Down
21 changes: 16 additions & 5 deletions posthog/hogql_queries/web_analytics/top_pages.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from posthog.hogql import ast
from posthog.hogql.parser import parse_select
from posthog.hogql.query import execute_hogql_query
from posthog.hogql_queries.web_analytics.ctes import SESSION_CTE, PATHNAME_CTE
from posthog.hogql_queries.web_analytics.ctes import SESSION_CTE, PATHNAME_CTE, PATHNAME_SCROLL_CTE
from posthog.hogql_queries.web_analytics.web_analytics_query_runner import WebAnalyticsQueryRunner
from posthog.hogql_queries.utils.query_date_range import QueryDateRange
from posthog.models.filters.mixins.utils import cached_property
Expand All @@ -19,16 +19,18 @@ def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery:
session_query = parse_select(SESSION_CTE, timings=self.timings)
with self.timings.measure("pathname_query"):
pathname_query = parse_select(PATHNAME_CTE, timings=self.timings)
with self.timings.measure("pathname_scroll_query"):
pathname_scroll_query = parse_select(PATHNAME_SCROLL_CTE, timings=self.timings)
with self.timings.measure("top_pages_query"):
top_sources_query = parse_select(
"""
SELECT
pathname.pathname as pathname,
pathname.total_pageviews as total_pageviews,
pathname.unique_visitors as unique_visitors,
pathname.scroll_gt80_percentage as scroll_gt80_percentage,
pathname.average_scroll_percentage as average_scroll_percentage,
bounce_rate.bounce_rate as bounce_rate
bounce_rate.bounce_rate as bounce_rate,
scroll_data.scroll_gt80_percentage as scroll_gt80_percentage,
scroll_data.average_scroll_percentage as average_scroll_percentage
FROM
{pathname_query} AS pathname
LEFT OUTER JOIN
Expand All @@ -43,11 +45,20 @@ def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery:
) AS bounce_rate
ON
pathname.pathname = bounce_rate.earliest_pathname
LEFT OUTER JOIN
{pathname_scroll_query} AS scroll_data
ON
pathname.pathname = scroll_data.pathname
ORDER BY
total_pageviews DESC
LIMIT 10
""",
timings=self.timings,
placeholders={"pathname_query": pathname_query, "session_query": session_query},
placeholders={
"pathname_query": pathname_query,
"session_query": session_query,
"pathname_scroll_query": pathname_scroll_query,
},
)
return top_sources_query

Expand Down
2 changes: 1 addition & 1 deletion posthog/hogql_queries/web_analytics/top_sources.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery:
GROUP BY blended_source
ORDER BY total_pageviews DESC
LIMIT 100
LIMIT 10
""",
timings=self.timings,
placeholders={"session_query": session_query},
Expand Down

0 comments on commit aade1e4

Please sign in to comment.