From 1b28e048e7a64183b9f00d62d078d0676c88bf82 Mon Sep 17 00:00:00 2001 From: Robbie Coomber Date: Wed, 4 Oct 2023 18:27:28 +0100 Subject: [PATCH 1/6] Fix top pages SQL --- posthog/hogql_queries/web_analytics/ctes.py | 15 ++++++++++++-- .../hogql_queries/web_analytics/top_pages.py | 20 ++++++++++++++----- 2 files changed, 28 insertions(+), 7 deletions(-) diff --git a/posthog/hogql_queries/web_analytics/ctes.py b/posthog/hogql_queries/web_analytics/ctes.py index 8fcd85b960a4f..ba89a9d392542 100644 --- a/posthog/hogql_queries/web_analytics/ctes.py +++ b/posthog/hogql_queries/web_analytics/ctes.py @@ -59,10 +59,21 @@ """ PATHNAME_CTE = """ +SELECT + events.properties.`$pathname` AS pathname, + count() as total_pageviews, + uniq(events.properties.distinct_id) as unique_visitors -- might want to use person id? have seen a small number of pages where unique > total +FROM + events +WHERE + (event = '$pageview') + AND events.timestamp >= now() - INTERVAL 7 DAY +GROUP BY pathname +""" + +PATHNAME_SCROLL_CTE = """ SELECT events.properties.`$prev_pageview_pathname` AS pathname, - countIf(events.event == '$pageview') as total_pageviews, - COUNT(DISTINCT events.properties.distinct_id) as unique_visitors, -- might want to use person id? have seen a small number of pages where unique > total avg(CASE WHEN toFloat(JSONExtractRaw(events.properties, '$prev_pageview_max_content_percentage')) IS NULL THEN NULL WHEN toFloat(JSONExtractRaw(events.properties, '$prev_pageview_max_content_percentage')) > 0.8 THEN 100 diff --git a/posthog/hogql_queries/web_analytics/top_pages.py b/posthog/hogql_queries/web_analytics/top_pages.py index a17febefdd31a..b8629bdbcf882 100644 --- a/posthog/hogql_queries/web_analytics/top_pages.py +++ b/posthog/hogql_queries/web_analytics/top_pages.py @@ -3,7 +3,7 @@ from posthog.hogql import ast from posthog.hogql.parser import parse_select from posthog.hogql.query import execute_hogql_query -from posthog.hogql_queries.web_analytics.ctes import SESSION_CTE, PATHNAME_CTE +from posthog.hogql_queries.web_analytics.ctes import SESSION_CTE, PATHNAME_CTE, PATHNAME_SCROLL_CTE from posthog.hogql_queries.web_analytics.web_analytics_query_runner import WebAnalyticsQueryRunner from posthog.hogql_queries.utils.query_date_range import QueryDateRange from posthog.models.filters.mixins.utils import cached_property @@ -19,6 +19,8 @@ def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: session_query = parse_select(SESSION_CTE, timings=self.timings) with self.timings.measure("pathname_query"): pathname_query = parse_select(PATHNAME_CTE, timings=self.timings) + with self.timings.measure("pathname_scroll_query"): + pathname_scroll_query = parse_select(PATHNAME_SCROLL_CTE, timings=self.timings) with self.timings.measure("top_pages_query"): top_sources_query = parse_select( """ @@ -26,9 +28,9 @@ def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: pathname.pathname as pathname, pathname.total_pageviews as total_pageviews, pathname.unique_visitors as unique_visitors, - pathname.scroll_gt80_percentage as scroll_gt80_percentage, - pathname.average_scroll_percentage as average_scroll_percentage, - bounce_rate.bounce_rate as bounce_rate + bounce_rate.bounce_rate as bounce_rate, + scroll_data.scroll_gt80_percentage as scroll_gt80_percentage, + scroll_data.average_scroll_percentage as average_scroll_percentage FROM {pathname_query} AS pathname LEFT OUTER JOIN @@ -43,11 +45,19 @@ def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: ) AS bounce_rate ON pathname.pathname = bounce_rate.earliest_pathname +LEFT OUTER JOIN + {pathname_scroll_query} AS scroll_data +ON + pathname.pathname = scroll_data.pathname ORDER BY total_pageviews DESC """, timings=self.timings, - placeholders={"pathname_query": pathname_query, "session_query": session_query}, + placeholders={ + "pathname_query": pathname_query, + "session_query": session_query, + "pathname_scroll_query": pathname_scroll_query, + }, ) return top_sources_query From 67eeb5e70f066001d4a8d427a0e2b2c3aa7e4f17 Mon Sep 17 00:00:00 2001 From: Robbie Coomber Date: Wed, 4 Oct 2023 21:47:23 +0100 Subject: [PATCH 2/6] Use person id for now --- posthog/hogql_queries/web_analytics/ctes.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/posthog/hogql_queries/web_analytics/ctes.py b/posthog/hogql_queries/web_analytics/ctes.py index ba89a9d392542..1a7ad57eafcb1 100644 --- a/posthog/hogql_queries/web_analytics/ctes.py +++ b/posthog/hogql_queries/web_analytics/ctes.py @@ -62,7 +62,7 @@ SELECT events.properties.`$pathname` AS pathname, count() as total_pageviews, - uniq(events.properties.distinct_id) as unique_visitors -- might want to use person id? have seen a small number of pages where unique > total + uniq(events.properties.person_id) as unique_visitors -- might want to use person id? have seen a small number of pages where unique > total FROM events WHERE From cf958325cc2385431d59cd8369c62d8528780464 Mon Sep 17 00:00:00 2001 From: Robbie Coomber Date: Wed, 4 Oct 2023 22:57:52 +0100 Subject: [PATCH 3/6] Add limits to web analytics queries --- posthog/hogql_queries/web_analytics/top_clicks.py | 1 + posthog/hogql_queries/web_analytics/top_pages.py | 1 + posthog/hogql_queries/web_analytics/top_sources.py | 2 +- 3 files changed, 3 insertions(+), 1 deletion(-) diff --git a/posthog/hogql_queries/web_analytics/top_clicks.py b/posthog/hogql_queries/web_analytics/top_clicks.py index 8521e35f461bf..d5e8237715ac8 100644 --- a/posthog/hogql_queries/web_analytics/top_clicks.py +++ b/posthog/hogql_queries/web_analytics/top_clicks.py @@ -31,6 +31,7 @@ def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: GROUP BY el_text ORDER BY total_clicks DESC +LIMIT 10 """, timings=self.timings, ) diff --git a/posthog/hogql_queries/web_analytics/top_pages.py b/posthog/hogql_queries/web_analytics/top_pages.py index b8629bdbcf882..6e13196275331 100644 --- a/posthog/hogql_queries/web_analytics/top_pages.py +++ b/posthog/hogql_queries/web_analytics/top_pages.py @@ -51,6 +51,7 @@ def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: pathname.pathname = scroll_data.pathname ORDER BY total_pageviews DESC +LIMIT 10 """, timings=self.timings, placeholders={ diff --git a/posthog/hogql_queries/web_analytics/top_sources.py b/posthog/hogql_queries/web_analytics/top_sources.py index ba61c6ab82698..8de3b79b19574 100644 --- a/posthog/hogql_queries/web_analytics/top_sources.py +++ b/posthog/hogql_queries/web_analytics/top_sources.py @@ -32,7 +32,7 @@ def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: GROUP BY blended_source ORDER BY total_pageviews DESC -LIMIT 100 +LIMIT 10 """, timings=self.timings, placeholders={"session_query": session_query}, From 4156482c3eff809e2986d0005f1d37cc0b9a6c1e Mon Sep 17 00:00:00 2001 From: Robbie Coomber Date: Wed, 4 Oct 2023 23:13:33 +0100 Subject: [PATCH 4/6] Add some trends to web analytics --- .../scenes/web-analytics/webAnalyticsLogic.ts | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) diff --git a/frontend/src/scenes/web-analytics/webAnalyticsLogic.ts b/frontend/src/scenes/web-analytics/webAnalyticsLogic.ts index 42ec60f4642d5..0e8a1570d2e88 100644 --- a/frontend/src/scenes/web-analytics/webAnalyticsLogic.ts +++ b/frontend/src/scenes/web-analytics/webAnalyticsLogic.ts @@ -2,6 +2,7 @@ import { actions, connect, kea, listeners, path, reducers, selectors, sharedList import type { webAnalyticsLogicType } from './webAnalyticsLogicType' import { NodeKind, QuerySchema } from '~/queries/schema' +import { BaseMathType, ChartDisplayType } from '~/types' interface Layout { colSpan?: number @@ -59,6 +60,62 @@ export const webAnalyticsLogic = kea([ }, }, }, + { + layout: { + colSpan: 6, + }, + query: { + kind: NodeKind.InsightVizNode, + source: { + filterTestAccounts: false, + interval: 'day', + kind: NodeKind.TrendsQuery, + series: [ + { + event: '$pageview', + kind: NodeKind.EventsNode, + math: BaseMathType.UniqueUsers, + name: '$pageview', + }, + ], + trendsFilter: { + compare: true, + display: ChartDisplayType.ActionsLineGraph, + }, + }, + }, + }, + { + layout: { + colSpan: 6, + }, + query: { + kind: NodeKind.InsightVizNode, + source: { + breakdown: { + breakdown: '$geoip_country_code', + breakdown_type: 'person', + }, + dateRange: { + date_from: '-7d', + date_to: null, + }, + filterTestAccounts: true, + kind: NodeKind.TrendsQuery, + series: [ + { + event: '$pageview', + kind: NodeKind.EventsNode, + math: BaseMathType.UniqueUsers, + name: '$pageview', + }, + ], + trendsFilter: { + display: ChartDisplayType.WorldMap, + }, + }, + }, + }, ], ], }), From 7a795e8d1bd0897816b1ae4ed9634372c9540978 Mon Sep 17 00:00:00 2001 From: Robbie Coomber Date: Thu, 5 Oct 2023 13:06:54 +0100 Subject: [PATCH 5/6] Tweak query date ranges and formatting --- .../src/scenes/web-analytics/webAnalyticsLogic.ts | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/frontend/src/scenes/web-analytics/webAnalyticsLogic.ts b/frontend/src/scenes/web-analytics/webAnalyticsLogic.ts index 0e8a1570d2e88..ff68b6cea6a50 100644 --- a/frontend/src/scenes/web-analytics/webAnalyticsLogic.ts +++ b/frontend/src/scenes/web-analytics/webAnalyticsLogic.ts @@ -67,9 +67,12 @@ export const webAnalyticsLogic = kea([ query: { kind: NodeKind.InsightVizNode, source: { - filterTestAccounts: false, - interval: 'day', kind: NodeKind.TrendsQuery, + dateRange: { + date_from: '-7d', + date_to: '-1d', + }, + interval: 'day', series: [ { event: '$pageview', @@ -82,6 +85,7 @@ export const webAnalyticsLogic = kea([ compare: true, display: ChartDisplayType.ActionsLineGraph, }, + filterTestAccounts: true, }, }, }, @@ -92,27 +96,25 @@ export const webAnalyticsLogic = kea([ query: { kind: NodeKind.InsightVizNode, source: { + kind: NodeKind.TrendsQuery, breakdown: { breakdown: '$geoip_country_code', breakdown_type: 'person', }, dateRange: { date_from: '-7d', - date_to: null, }, - filterTestAccounts: true, - kind: NodeKind.TrendsQuery, series: [ { event: '$pageview', kind: NodeKind.EventsNode, math: BaseMathType.UniqueUsers, - name: '$pageview', }, ], trendsFilter: { display: ChartDisplayType.WorldMap, }, + filterTestAccounts: true, }, }, }, From fdff82b8413ac0e19ebebf5a4fc5348b7a9d523e Mon Sep 17 00:00:00 2001 From: Robbie Coomber Date: Fri, 6 Oct 2023 15:48:41 +0100 Subject: [PATCH 6/6] Consistently use person_id for now --- posthog/hogql_queries/web_analytics/ctes.py | 2 +- posthog/hogql_queries/web_analytics/overview_stats.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/posthog/hogql_queries/web_analytics/ctes.py b/posthog/hogql_queries/web_analytics/ctes.py index 1a7ad57eafcb1..22a69c9193803 100644 --- a/posthog/hogql_queries/web_analytics/ctes.py +++ b/posthog/hogql_queries/web_analytics/ctes.py @@ -62,7 +62,7 @@ SELECT events.properties.`$pathname` AS pathname, count() as total_pageviews, - uniq(events.properties.person_id) as unique_visitors -- might want to use person id? have seen a small number of pages where unique > total + uniq(events.person_id) as unique_visitors -- might want to use person id? have seen a small number of pages where unique > total FROM events WHERE diff --git a/posthog/hogql_queries/web_analytics/overview_stats.py b/posthog/hogql_queries/web_analytics/overview_stats.py index 8632eaa781216..6ad7a30182444 100644 --- a/posthog/hogql_queries/web_analytics/overview_stats.py +++ b/posthog/hogql_queries/web_analytics/overview_stats.py @@ -22,8 +22,8 @@ def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: overview_stats_query = parse_select( """ SELECT - uniq(if(timestamp >= {mid} AND timestamp < {end}, events.distinct_id, NULL)) AS current_week_unique_users, - uniq(if(timestamp >= {start} AND timestamp < {mid}, events.distinct_id, NULL)) AS previous_week_unique_users, + uniq(if(timestamp >= {mid} AND timestamp < {end}, events.person_id, NULL)) AS current_week_unique_users, + uniq(if(timestamp >= {start} AND timestamp < {mid}, events.person_id, NULL)) AS previous_week_unique_users, uniq(if(timestamp >= {mid} AND timestamp < {end}, events.properties.$session_id, NULL)) AS current_week_unique_sessions, uniq(if(timestamp >= {start} AND timestamp < {mid}, events.properties.$session_id, NULL)) AS previous_week_unique_sessions,