From aade1e404d4517da6dc0b2341367ffc3749568f1 Mon Sep 17 00:00:00 2001 From: Robbie Date: Mon, 9 Oct 2023 12:09:44 +0100 Subject: [PATCH] feat(web-analytics): Fix top pages sql and add some extra insights (#17850) * Fix top pages SQL * Use person id for now * Add limits to web analytics queries * Add some trends to web analytics * Tweak query date ranges and formatting * Consistently use person_id for now --- .../scenes/web-analytics/webAnalyticsLogic.ts | 59 +++++++++++++++++++ posthog/hogql_queries/web_analytics/ctes.py | 15 ++++- .../web_analytics/overview_stats.py | 4 +- .../hogql_queries/web_analytics/top_clicks.py | 1 + .../hogql_queries/web_analytics/top_pages.py | 21 +++++-- .../web_analytics/top_sources.py | 2 +- 6 files changed, 92 insertions(+), 10 deletions(-) diff --git a/frontend/src/scenes/web-analytics/webAnalyticsLogic.ts b/frontend/src/scenes/web-analytics/webAnalyticsLogic.ts index 42ec60f4642d5..ff68b6cea6a50 100644 --- a/frontend/src/scenes/web-analytics/webAnalyticsLogic.ts +++ b/frontend/src/scenes/web-analytics/webAnalyticsLogic.ts @@ -2,6 +2,7 @@ import { actions, connect, kea, listeners, path, reducers, selectors, sharedList import type { webAnalyticsLogicType } from './webAnalyticsLogicType' import { NodeKind, QuerySchema } from '~/queries/schema' +import { BaseMathType, ChartDisplayType } from '~/types' interface Layout { colSpan?: number @@ -59,6 +60,64 @@ export const webAnalyticsLogic = kea([ }, }, }, + { + layout: { + colSpan: 6, + }, + query: { + kind: NodeKind.InsightVizNode, + source: { + kind: NodeKind.TrendsQuery, + dateRange: { + date_from: '-7d', + date_to: '-1d', + }, + interval: 'day', + series: [ + { + event: '$pageview', + kind: NodeKind.EventsNode, + math: BaseMathType.UniqueUsers, + name: '$pageview', + }, + ], + trendsFilter: { + compare: true, + display: ChartDisplayType.ActionsLineGraph, + }, + filterTestAccounts: true, + }, + }, + }, + { + layout: { + colSpan: 6, + }, + query: { + kind: NodeKind.InsightVizNode, + source: { + kind: NodeKind.TrendsQuery, + breakdown: { + breakdown: '$geoip_country_code', + breakdown_type: 'person', + }, + dateRange: { + date_from: '-7d', + }, + series: [ + { + event: '$pageview', + kind: NodeKind.EventsNode, + math: BaseMathType.UniqueUsers, + }, + ], + trendsFilter: { + display: ChartDisplayType.WorldMap, + }, + filterTestAccounts: true, + }, + }, + }, ], ], }), diff --git a/posthog/hogql_queries/web_analytics/ctes.py b/posthog/hogql_queries/web_analytics/ctes.py index 8fcd85b960a4f..22a69c9193803 100644 --- a/posthog/hogql_queries/web_analytics/ctes.py +++ b/posthog/hogql_queries/web_analytics/ctes.py @@ -59,10 +59,21 @@ """ PATHNAME_CTE = """ +SELECT + events.properties.`$pathname` AS pathname, + count() as total_pageviews, + uniq(events.person_id) as unique_visitors -- might want to use person id? have seen a small number of pages where unique > total +FROM + events +WHERE + (event = '$pageview') + AND events.timestamp >= now() - INTERVAL 7 DAY +GROUP BY pathname +""" + +PATHNAME_SCROLL_CTE = """ SELECT events.properties.`$prev_pageview_pathname` AS pathname, - countIf(events.event == '$pageview') as total_pageviews, - COUNT(DISTINCT events.properties.distinct_id) as unique_visitors, -- might want to use person id? have seen a small number of pages where unique > total avg(CASE WHEN toFloat(JSONExtractRaw(events.properties, '$prev_pageview_max_content_percentage')) IS NULL THEN NULL WHEN toFloat(JSONExtractRaw(events.properties, '$prev_pageview_max_content_percentage')) > 0.8 THEN 100 diff --git a/posthog/hogql_queries/web_analytics/overview_stats.py b/posthog/hogql_queries/web_analytics/overview_stats.py index 8632eaa781216..6ad7a30182444 100644 --- a/posthog/hogql_queries/web_analytics/overview_stats.py +++ b/posthog/hogql_queries/web_analytics/overview_stats.py @@ -22,8 +22,8 @@ def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: overview_stats_query = parse_select( """ SELECT - uniq(if(timestamp >= {mid} AND timestamp < {end}, events.distinct_id, NULL)) AS current_week_unique_users, - uniq(if(timestamp >= {start} AND timestamp < {mid}, events.distinct_id, NULL)) AS previous_week_unique_users, + uniq(if(timestamp >= {mid} AND timestamp < {end}, events.person_id, NULL)) AS current_week_unique_users, + uniq(if(timestamp >= {start} AND timestamp < {mid}, events.person_id, NULL)) AS previous_week_unique_users, uniq(if(timestamp >= {mid} AND timestamp < {end}, events.properties.$session_id, NULL)) AS current_week_unique_sessions, uniq(if(timestamp >= {start} AND timestamp < {mid}, events.properties.$session_id, NULL)) AS previous_week_unique_sessions, diff --git a/posthog/hogql_queries/web_analytics/top_clicks.py b/posthog/hogql_queries/web_analytics/top_clicks.py index 8521e35f461bf..d5e8237715ac8 100644 --- a/posthog/hogql_queries/web_analytics/top_clicks.py +++ b/posthog/hogql_queries/web_analytics/top_clicks.py @@ -31,6 +31,7 @@ def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: GROUP BY el_text ORDER BY total_clicks DESC +LIMIT 10 """, timings=self.timings, ) diff --git a/posthog/hogql_queries/web_analytics/top_pages.py b/posthog/hogql_queries/web_analytics/top_pages.py index a17febefdd31a..6e13196275331 100644 --- a/posthog/hogql_queries/web_analytics/top_pages.py +++ b/posthog/hogql_queries/web_analytics/top_pages.py @@ -3,7 +3,7 @@ from posthog.hogql import ast from posthog.hogql.parser import parse_select from posthog.hogql.query import execute_hogql_query -from posthog.hogql_queries.web_analytics.ctes import SESSION_CTE, PATHNAME_CTE +from posthog.hogql_queries.web_analytics.ctes import SESSION_CTE, PATHNAME_CTE, PATHNAME_SCROLL_CTE from posthog.hogql_queries.web_analytics.web_analytics_query_runner import WebAnalyticsQueryRunner from posthog.hogql_queries.utils.query_date_range import QueryDateRange from posthog.models.filters.mixins.utils import cached_property @@ -19,6 +19,8 @@ def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: session_query = parse_select(SESSION_CTE, timings=self.timings) with self.timings.measure("pathname_query"): pathname_query = parse_select(PATHNAME_CTE, timings=self.timings) + with self.timings.measure("pathname_scroll_query"): + pathname_scroll_query = parse_select(PATHNAME_SCROLL_CTE, timings=self.timings) with self.timings.measure("top_pages_query"): top_sources_query = parse_select( """ @@ -26,9 +28,9 @@ def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: pathname.pathname as pathname, pathname.total_pageviews as total_pageviews, pathname.unique_visitors as unique_visitors, - pathname.scroll_gt80_percentage as scroll_gt80_percentage, - pathname.average_scroll_percentage as average_scroll_percentage, - bounce_rate.bounce_rate as bounce_rate + bounce_rate.bounce_rate as bounce_rate, + scroll_data.scroll_gt80_percentage as scroll_gt80_percentage, + scroll_data.average_scroll_percentage as average_scroll_percentage FROM {pathname_query} AS pathname LEFT OUTER JOIN @@ -43,11 +45,20 @@ def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: ) AS bounce_rate ON pathname.pathname = bounce_rate.earliest_pathname +LEFT OUTER JOIN + {pathname_scroll_query} AS scroll_data +ON + pathname.pathname = scroll_data.pathname ORDER BY total_pageviews DESC +LIMIT 10 """, timings=self.timings, - placeholders={"pathname_query": pathname_query, "session_query": session_query}, + placeholders={ + "pathname_query": pathname_query, + "session_query": session_query, + "pathname_scroll_query": pathname_scroll_query, + }, ) return top_sources_query diff --git a/posthog/hogql_queries/web_analytics/top_sources.py b/posthog/hogql_queries/web_analytics/top_sources.py index ba61c6ab82698..8de3b79b19574 100644 --- a/posthog/hogql_queries/web_analytics/top_sources.py +++ b/posthog/hogql_queries/web_analytics/top_sources.py @@ -32,7 +32,7 @@ def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: GROUP BY blended_source ORDER BY total_pageviews DESC -LIMIT 100 +LIMIT 10 """, timings=self.timings, placeholders={"session_query": session_query},