diff --git a/frontend/src/queries/nodes/DataTable/queryFeatures.ts b/frontend/src/queries/nodes/DataTable/queryFeatures.ts index eacd6ed5e31d1..7416b323d0418 100644 --- a/frontend/src/queries/nodes/DataTable/queryFeatures.ts +++ b/frontend/src/queries/nodes/DataTable/queryFeatures.ts @@ -2,6 +2,7 @@ import { isEventsQuery, isHogQLQuery, isPersonsNode, + isWebOverviewStatsQuery, isWebTopClicksQuery, isWebTopPagesQuery, isWebTopSourcesQuery, @@ -47,7 +48,12 @@ export function getQueryFeatures(query: Node): Set { features.add(QueryFeature.personsSearch) } - if (isWebTopSourcesQuery(query) || isWebTopPagesQuery(query) || isWebTopClicksQuery(query)) { + if ( + isWebOverviewStatsQuery(query) || + isWebTopSourcesQuery(query) || + isWebTopPagesQuery(query) || + isWebTopClicksQuery(query) + ) { features.add(QueryFeature.columnsInResponse) features.add(QueryFeature.resultIsArrayOfArrays) } diff --git a/frontend/src/queries/schema.json b/frontend/src/queries/schema.json index cb13caa4be23e..6517179ed4c38 100644 --- a/frontend/src/queries/schema.json +++ b/frontend/src/queries/schema.json @@ -96,6 +96,9 @@ { "$ref": "#/definitions/TimeToSeeDataSessionsQuery" }, + { + "$ref": "#/definitions/WebOverviewStatsQuery" + }, { "$ref": "#/definitions/WebTopSourcesQuery" }, @@ -399,6 +402,9 @@ { "$ref": "#/definitions/TimeToSeeDataSessionsQuery" }, + { + "$ref": "#/definitions/WebOverviewStatsQuery" + }, { "$ref": "#/definitions/WebTopSourcesQuery" }, @@ -2293,6 +2299,60 @@ "type": "object" }, "WebAnalyticsFilters": {}, + "WebOverviewStatsQuery": { + "additionalProperties": false, + "properties": { + "dateRange": { + "$ref": "#/definitions/DateRange" + }, + "filters": { + "$ref": "#/definitions/WebAnalyticsFilters" + }, + "kind": { + "const": "WebOverviewStatsQuery", + "type": "string" + }, + "response": { + "$ref": "#/definitions/WebOverviewStatsQueryResponse" + } + }, + "required": ["kind", "filters"], + "type": "object" + }, + "WebOverviewStatsQueryResponse": { + "additionalProperties": false, + "properties": { + "columns": { + "items": {}, + "type": "array" + }, + "is_cached": { + "type": "boolean" + }, + "last_refresh": { + "type": "string" + }, + "next_allowed_client_refresh": { + "type": "string" + }, + "result": { + "items": {}, + "type": "array" + }, + "timings": { + "items": { + "$ref": "#/definitions/QueryTiming" + }, + "type": "array" + }, + "types": { + "items": {}, + "type": "array" + } + }, + "required": ["result"], + "type": "object" + }, "WebTopClicksQuery": { "additionalProperties": false, "properties": { diff --git a/frontend/src/queries/schema.ts b/frontend/src/queries/schema.ts index 3ce97fb670b76..4d3d06b98321c 100644 --- a/frontend/src/queries/schema.ts +++ b/frontend/src/queries/schema.ts @@ -58,6 +58,7 @@ export enum NodeKind { LifecycleQuery = 'LifecycleQuery', // Web analytics queries + WebOverviewStatsQuery = 'WebOverviewStatsQuery', WebTopSourcesQuery = 'WebTopSourcesQuery', WebTopPagesQuery = 'WebTopPagesQuery', WebTopClicksQuery = 'WebTopClicksQuery', @@ -80,6 +81,7 @@ export type AnyDataNode = | HogQLQuery | HogQLMetadata | TimeToSeeDataSessionsQuery + | WebOverviewStatsQuery | WebTopSourcesQuery | WebTopClicksQuery | WebTopPagesQuery @@ -293,6 +295,7 @@ export interface DataTableNode extends Node, DataTableNodeViewProps { | PersonsNode | HogQLQuery | TimeToSeeDataSessionsQuery + | WebOverviewStatsQuery | WebTopSourcesQuery | WebTopClicksQuery | WebTopPagesQuery @@ -507,6 +510,17 @@ export interface WebAnalyticsQueryBase { dateRange?: DateRange } +export interface WebOverviewStatsQuery extends WebAnalyticsQueryBase { + kind: NodeKind.WebOverviewStatsQuery + filters: WebAnalyticsFilters + response?: WebOverviewStatsQueryResponse +} + +export interface WebOverviewStatsQueryResponse extends QueryResponse { + result: unknown[] + types?: unknown[] + columns?: unknown[] +} export interface WebTopSourcesQuery extends WebAnalyticsQueryBase { kind: NodeKind.WebTopSourcesQuery filters: WebAnalyticsFilters diff --git a/frontend/src/queries/utils.ts b/frontend/src/queries/utils.ts index cdbfcedfafb90..463f0f727dfd4 100644 --- a/frontend/src/queries/utils.ts +++ b/frontend/src/queries/utils.ts @@ -29,6 +29,7 @@ import { WebTopSourcesQuery, WebTopClicksQuery, WebTopPagesQuery, + WebOverviewStatsQuery, } from '~/queries/schema' import { TaxonomicFilterGroupType, TaxonomicFilterValue } from 'lib/components/TaxonomicFilter/types' import { dayjs } from 'lib/dayjs' @@ -92,7 +93,9 @@ export function isInsightVizNode(node?: Node | null): node is InsightVizNode { export function isHogQLQuery(node?: Node | null): node is HogQLQuery { return node?.kind === NodeKind.HogQLQuery } - +export function isWebOverviewStatsQuery(node?: Node | null): node is WebOverviewStatsQuery { + return node?.kind === NodeKind.WebOverviewStatsQuery +} export function isWebTopSourcesQuery(node?: Node | null): node is WebTopSourcesQuery { return node?.kind === NodeKind.WebTopSourcesQuery } diff --git a/frontend/src/scenes/saved-insights/SavedInsights.tsx b/frontend/src/scenes/saved-insights/SavedInsights.tsx index cf2265f308ae9..b38b2df5c0790 100644 --- a/frontend/src/scenes/saved-insights/SavedInsights.tsx +++ b/frontend/src/scenes/saved-insights/SavedInsights.tsx @@ -242,6 +242,12 @@ export const QUERY_TYPES_METADATA: Record = { icon: InsightSQLIcon, inMenu: true, }, + [NodeKind.WebOverviewStatsQuery]: { + name: 'Overview Stats', + description: 'View overview stats for a website', + icon: InsightsTrendsIcon, + inMenu: true, + }, [NodeKind.WebTopSourcesQuery]: { name: 'Top Sources', description: 'View top sources for a website', diff --git a/frontend/src/scenes/web-analytics/WebAnalyticsScene.tsx b/frontend/src/scenes/web-analytics/WebAnalyticsScene.tsx index 4fb41fe7261cc..a154bb7006ebe 100644 --- a/frontend/src/scenes/web-analytics/WebAnalyticsScene.tsx +++ b/frontend/src/scenes/web-analytics/WebAnalyticsScene.tsx @@ -1,49 +1,9 @@ import { SceneExport } from 'scenes/sceneTypes' import { webAnalyticsLogic } from 'scenes/web-analytics/webAnalyticsLogic' -import { Query } from '~/queries/Query/Query' -import { NodeKind } from '~/queries/schema' +import { WebAnalyticsDashboard } from 'scenes/web-analytics/WebDashboard' export function WebAnalyticsScene(): JSX.Element { - return ( -
- Top sources - - Top clicks - - Top pages - -
- ) + return } export const scene: SceneExport = { diff --git a/frontend/src/scenes/web-analytics/WebDashboard.tsx b/frontend/src/scenes/web-analytics/WebDashboard.tsx new file mode 100644 index 0000000000000..35485ad82b70e --- /dev/null +++ b/frontend/src/scenes/web-analytics/WebDashboard.tsx @@ -0,0 +1,21 @@ +import { Query } from '~/queries/Query/Query' +import { useValues } from 'kea' +import { webAnalyticsLogic } from 'scenes/web-analytics/webAnalyticsLogic' + +export const WebAnalyticsDashboard = (): JSX.Element => { + const { tiles } = useValues(webAnalyticsLogic) + return ( +
+ {tiles.map(({ query, layout }, i) => ( +
+ +
+ ))} +
+ ) +} diff --git a/frontend/src/scenes/web-analytics/webAnalyticsLogic.ts b/frontend/src/scenes/web-analytics/webAnalyticsLogic.ts index ccb9f70f8857e..42ec60f4642d5 100644 --- a/frontend/src/scenes/web-analytics/webAnalyticsLogic.ts +++ b/frontend/src/scenes/web-analytics/webAnalyticsLogic.ts @@ -1,13 +1,67 @@ import { actions, connect, kea, listeners, path, reducers, selectors, sharedListeners } from 'kea' import type { webAnalyticsLogicType } from './webAnalyticsLogicType' +import { NodeKind, QuerySchema } from '~/queries/schema' +interface Layout { + colSpan?: number + rowSpan?: number +} +export interface WebDashboardTile { + query: QuerySchema + layout: Layout +} export const webAnalyticsLogic = kea([ path(['scenes', 'webAnalytics', 'webAnalyticsSceneLogic']), connect({}), actions({}), reducers({}), - selectors(() => ({})), + selectors({ + tiles: [ + () => [], + (): WebDashboardTile[] => [ + { + layout: { + colSpan: 12, + }, + query: { + full: true, + kind: NodeKind.DataTableNode, + source: { + kind: NodeKind.WebOverviewStatsQuery, + filters: {}, + }, + }, + }, + { + layout: { + colSpan: 6, + }, + query: { + full: true, + kind: NodeKind.DataTableNode, + source: { + kind: NodeKind.WebTopPagesQuery, + filters: {}, + }, + }, + }, + { + layout: { + colSpan: 6, + }, + query: { + full: true, + kind: NodeKind.DataTableNode, + source: { + kind: NodeKind.WebTopSourcesQuery, + filters: {}, + }, + }, + }, + ], + ], + }), sharedListeners(() => ({})), listeners(() => ({})), ]) diff --git a/frontend/src/styles/utilities.scss b/frontend/src/styles/utilities.scss index 745375f1c3f57..24664fb521a6b 100644 --- a/frontend/src/styles/utilities.scss +++ b/frontend/src/styles/utilities.scss @@ -506,6 +506,14 @@ $decorations: underline, overline, line-through, no-underline; display: inline; } +.grid { + display: grid; +} + +.inline-grid { + display: inline-grid; +} + .hidden { display: none; } @@ -619,6 +627,103 @@ $decorations: underline, overline, line-through, no-underline; align-self: baseline; } +// Grid Template Columns +@for $i from 1 through 12 { + .grid-cols-#{$i} { + grid-template-columns: repeat(#{$i}, minmax(0, 1fr)); + } +} +.grid-cols-none { + grid-template-columns: none; +} + +// Grid Column Start/End +.col-auto { + grid-column: auto; +} + +@for $i from 1 through 12 { + .col-span-#{$i} { + grid-column: span #{$i} / span #{$i}; + } +} +.col-span-full { + grid-column: 1 / -1; +} + +@for $i from 1 through 13 { + .col-start-#{$i} { + grid-column-start: #{$i}; + } +} +.col-start-auto { + grid-column-start: auto; +} + +@for $i from 1 through 13 { + .col-end-#{$i} { + grid-column-end: #{$i}; + } +} +.col-end-auto { + grid-column-end: auto; +} + +// Grid Row Start/End +.row-auto { + grid-row: auto; +} + +@for $i from 1 through 6 { + .row-span-#{$i} { + grid-row: span #{$i} / span #{$i}; + } +} +.row-span-full { + grid-row: 1 / -1; +} + +@for $i from 1 through 7 { + .row-start-#{$i} { + grid-row-start: #{$i}; + } +} +.row-start-auto { + grid-row-start: auto; +} + +@for $i from 1 through 7 { + .row-end-#{$i} { + grid-row-end: #{$i}; + } +} +.row-end-auto { + grid-row-end: auto; +} + +// Gap +@each $space in $all_spaces { + .gap-#{escape-number($space)} { + gap: #{$space * 0.25}rem; + } + .gap-x-#{escape-number($space)} { + column-gap: #{$space * 0.25}rem; + } + .gap-y-#{escape-number($space)} { + row-gap: #{$space * 0.25}rem; + } +} +.gap-px { + gap: 1px; +} +.gap-x-px { + column-gap: 1px; +} +.gap-y-px { + row-gap: 1px; +} + +// Typography .font-thin { font-weight: 100; } diff --git a/posthog/api/query.py b/posthog/api/query.py index cb3ea78fad090..078bf8cd3eaee 100644 --- a/posthog/api/query.py +++ b/posthog/api/query.py @@ -41,6 +41,7 @@ QUERY_WITH_RUNNER = [ "LifecycleQuery", "TrendsQuery", + "WebOverviewStatsQuery", "WebTopSourcesQuery", "WebTopClicksQuery", "WebTopPagesQuery", diff --git a/posthog/hogql_queries/query_runner.py b/posthog/hogql_queries/query_runner.py index d8b51dd959343..915fd79de9ee3 100644 --- a/posthog/hogql_queries/query_runner.py +++ b/posthog/hogql_queries/query_runner.py @@ -21,6 +21,7 @@ WebTopSourcesQuery, WebTopClicksQuery, WebTopPagesQuery, + WebOverviewStatsQuery, ) from posthog.utils import generate_cache_key, get_safe_cache @@ -61,6 +62,7 @@ class CachedQueryResponse(QueryResponse): RunnableQueryNode = Union[ TrendsQuery, LifecycleQuery, + WebOverviewStatsQuery, WebTopSourcesQuery, WebTopClicksQuery, WebTopPagesQuery, @@ -84,7 +86,10 @@ def get_query_runner( from .insights.trends_query_runner import TrendsQueryRunner return TrendsQueryRunner(query=cast(TrendsQuery | Dict[str, Any], query), team=team, timings=timings) + if kind == "WebOverviewStatsQuery": + from .web_analytics.overview_stats import WebOverviewStatsQueryRunner + return WebOverviewStatsQueryRunner(query=query, team=team, timings=timings) if kind == "WebTopSourcesQuery": from .web_analytics.top_sources import WebTopSourcesQueryRunner diff --git a/posthog/hogql_queries/web_analytics/ctes.py b/posthog/hogql_queries/web_analytics/ctes.py new file mode 100644 index 0000000000000..8fcd85b960a4f --- /dev/null +++ b/posthog/hogql_queries/web_analytics/ctes.py @@ -0,0 +1,78 @@ +# The intention is for these CTEs to become materialized views for performance reasons, but +# while these queries are under development they are left as CTEs so that they can be iterated +# on without needing database migrations + +SESSION_CTE = """ +SELECT + events.properties.`$session_id` AS session_id, + min(events.timestamp) AS min_timestamp, + max(events.timestamp) AS max_timestamp, + dateDiff('second', min_timestamp, max_timestamp) AS duration_s, + + argMin(events.properties.`$referrer`, events.timestamp) AS earliest_referrer, + argMin(events.properties.`$pathname`, events.timestamp) AS earliest_pathname, + argMax(events.properties.`$pathname`, events.timestamp ) AS latest_pathname, + argMax(events.properties.utm_source, events.timestamp) AS earliest_utm_source, + + if(domain(earliest_referrer) = '', earliest_referrer, domain(earliest_referrer)) AS referrer_domain, + multiIf( + earliest_utm_source IS NOT NULL, earliest_utm_source, + -- This will need to be an approach that scales better + referrer_domain == 'app.posthog.com', 'posthog', + referrer_domain == 'eu.posthog.com', 'posthog', + referrer_domain == 'posthog.com', 'posthog', + referrer_domain == 'www.google.com', 'google', + referrer_domain == 'www.google.co.uk', 'google', + referrer_domain == 'www.google.com.hk', 'google', + referrer_domain == 'www.google.de', 'google', + referrer_domain == 't.co', 'twitter', + referrer_domain == 'github.com', 'github', + referrer_domain == 'duckduckgo.com', 'duckduckgo', + referrer_domain == 'www.bing.com', 'bing', + referrer_domain == 'bing.com', 'bing', + referrer_domain == 'yandex.ru', 'yandex', + referrer_domain == 'quora.com', 'quora', + referrer_domain == 'www.quora.com', 'quora', + referrer_domain == 'linkedin.com', 'linkedin', + referrer_domain == 'www.linkedin.com', 'linkedin', + startsWith(referrer_domain, 'http://localhost:'), 'localhost', + referrer_domain + ) AS blended_source, + + countIf(events.event == '$pageview') AS num_pageviews, + countIf(events.event == '$autocapture') AS num_autocaptures, + -- in v1 we'd also want to count whether there were any conversion events + + any(events.person_id) as person_id, + -- definition of a GA4 bounce from here https://support.google.com/analytics/answer/12195621?hl=en + (num_autocaptures == 0 AND num_pageviews <= 1 AND duration_s < 10) AS is_bounce +FROM + events +WHERE + session_id IS NOT NULL +AND + events.timestamp >= now() - INTERVAL 8 DAY +GROUP BY + events.properties.`$session_id` +HAVING + min_timestamp >= now() - INTERVAL 7 DAY + """ + +PATHNAME_CTE = """ +SELECT + events.properties.`$prev_pageview_pathname` AS pathname, + countIf(events.event == '$pageview') as total_pageviews, + COUNT(DISTINCT events.properties.distinct_id) as unique_visitors, -- might want to use person id? have seen a small number of pages where unique > total + avg(CASE + WHEN toFloat(JSONExtractRaw(events.properties, '$prev_pageview_max_content_percentage')) IS NULL THEN NULL + WHEN toFloat(JSONExtractRaw(events.properties, '$prev_pageview_max_content_percentage')) > 0.8 THEN 100 + ELSE 0 + END) AS scroll_gt80_percentage, + avg(toFloat(JSONExtractRaw(events.properties, '$prev_pageview_max_scroll_percentage'))) as average_scroll_percentage +FROM + events +WHERE + (event = '$pageview' OR event = '$pageleave') AND events.properties.`$prev_pageview_pathname` IS NOT NULL + AND events.timestamp >= now() - INTERVAL 7 DAY +GROUP BY pathname +""" diff --git a/posthog/hogql_queries/web_analytics/overview_stats.py b/posthog/hogql_queries/web_analytics/overview_stats.py new file mode 100644 index 0000000000000..810cab5111ffe --- /dev/null +++ b/posthog/hogql_queries/web_analytics/overview_stats.py @@ -0,0 +1,59 @@ +from django.utils.timezone import datetime + +from posthog.hogql import ast +from posthog.hogql.parser import parse_select, parse_expr +from posthog.hogql.query import execute_hogql_query +from posthog.hogql_queries.utils.query_date_range import QueryDateRange +from posthog.hogql_queries.web_analytics.web_analytics_query_runner import WebAnalyticsQueryRunner +from posthog.models.filters.mixins.utils import cached_property +from posthog.schema import WebOverviewStatsQueryResponse, WebOverviewStatsQuery + + +class WebOverviewStatsQueryRunner(WebAnalyticsQueryRunner): + query: WebOverviewStatsQuery + query_type = WebOverviewStatsQuery + + def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: + with self.timings.measure("date_expr"): + start = parse_expr("today() - 14") + mid = parse_expr("today() - 7") + end = parse_expr("today()") + with self.timings.measure("overview_stats_query"): + overview_stats_query = parse_select( + """ +SELECT + uniq(if(timestamp >= {mid} AND timestamp < {end}, events.distinct_id, NULL)) AS current_week_unique_users, + uniq(if(timestamp >= {start} AND timestamp < {mid}, events.distinct_id, NULL)) AS previous_week_unique_users, + + uniq(if(timestamp >= {mid} AND timestamp < {end}, events.properties.$session_id, NULL)) AS current_week_unique_sessions, + uniq(if(timestamp >= {start} AND timestamp < {mid}, events.properties.$session_id, NULL)) AS previous_week_unique_sessions, + + countIf(timestamp >= {mid} AND timestamp < {end}) AS current_week_pageviews, + countIf(timestamp >= {start} AND timestamp < {mid}) AS previous_week_pageviews +FROM + events +WHERE + event = '$pageview' AND + timestamp >= {start} AND + timestamp < {end} + """, + timings=self.timings, + placeholders={"start": start, "mid": mid, "end": end}, + ) + return overview_stats_query + + def calculate(self): + response = execute_hogql_query( + query_type="overview_stats_query", + query=self.to_query(), + team=self.team, + timings=self.timings, + ) + + return WebOverviewStatsQueryResponse( + columns=response.columns, result=response.results, timings=response.timings, types=response.types + ) + + @cached_property + def query_date_range(self): + return QueryDateRange(date_range=self.query.dateRange, team=self.team, interval=None, now=datetime.now()) diff --git a/posthog/hogql_queries/web_analytics/top_pages.py b/posthog/hogql_queries/web_analytics/top_pages.py index 7ded183b80d1b..3c2db51de8504 100644 --- a/posthog/hogql_queries/web_analytics/top_pages.py +++ b/posthog/hogql_queries/web_analytics/top_pages.py @@ -3,6 +3,7 @@ from posthog.hogql import ast from posthog.hogql.parser import parse_select from posthog.hogql.query import execute_hogql_query +from posthog.hogql_queries.web_analytics.ctes import SESSION_CTE, PATHNAME_CTE from posthog.hogql_queries.web_analytics.web_analytics_query_runner import WebAnalyticsQueryRunner from posthog.hogql_queries.utils.query_date_range import QueryDateRange from posthog.models.filters.mixins.utils import cached_property @@ -14,123 +15,39 @@ class WebTopPagesQueryRunner(WebAnalyticsQueryRunner): query_type = WebTopPagesQuery def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: + with self.timings.measure("session_query"): + session_query = parse_select(SESSION_CTE, timings=self.timings) + with self.timings.measure("pathname_query"): + pathname_query = parse_select(PATHNAME_CTE, timings=self.timings) with self.timings.measure("top_pages_query"): top_sources_query = parse_select( """ -WITH - -scroll_depth_cte AS ( -SELECT - events.properties.`$prev_pageview_pathname` AS pathname, - countIf(events.event == '$pageview') as total_pageviews, - COUNT(DISTINCT events.properties.distinct_id) as unique_visitors, -- might want to use person id? have seen a small number of pages where unique > total - avg(CASE - WHEN events.properties.`$prev_pageview_max_content_percentage` IS NULL THEN NULL - WHEN events.properties.`$prev_pageview_max_content_percentage` > 0.8 THEN 100 - ELSE 0 - END) AS scroll_gt80_percentage, - avg(events.properties.$prev_pageview_max_scroll_percentage) * 100 as average_scroll_percentage -FROM - events -WHERE - (event = '$pageview' OR event = '$pageleave') AND events.properties.`$prev_pageview_pathname` IS NOT NULL - AND events.timestamp >= now() - INTERVAL 7 DAY -GROUP BY pathname -) - -, - -session_cte AS ( SELECT - events.properties.`$session_id` AS session_id, - min(events.timestamp) AS min_timestamp, - max(events.timestamp) AS max_timestamp, - dateDiff('second', min_timestamp, max_timestamp) AS duration_s, - - -- create a tuple so that these are grouped in the same order, see https://github.com/ClickHouse/ClickHouse/discussions/42338 - groupArray((events.timestamp, events.properties.`$referrer`, events.properties.`$pathname`, events.properties.utm_source)) AS tuple_array, - arrayFirstIndex(x -> tupleElement(x, 1) == min_timestamp, tuple_array) as index_of_earliest, - arrayFirstIndex(x -> tupleElement(x, 1) == max_timestamp, tuple_array) as index_of_latest, - tupleElement(arrayElement( - tuple_array, - index_of_earliest - ), 2) AS earliest_referrer, - tupleElement(arrayElement( - tuple_array, - index_of_earliest - ), 3) AS earliest_pathname, - tupleElement(arrayElement( - tuple_array, - index_of_earliest - ), 4) AS earliest_utm_source, - - if(domain(earliest_referrer) = '', earliest_referrer, domain(earliest_referrer)) AS referrer_domain, - multiIf( - earliest_utm_source IS NOT NULL, earliest_utm_source, - -- This will need to be an approach that scales better - referrer_domain == 'app.posthog.com', 'posthog', - referrer_domain == 'eu.posthog.com', 'posthog', - referrer_domain == 'posthog.com', 'posthog', - referrer_domain == 'www.google.com', 'google', - referrer_domain == 'www.google.co.uk', 'google', - referrer_domain == 'www.google.com.hk', 'google', - referrer_domain == 'www.google.de', 'google', - referrer_domain == 't.co', 'twitter', - referrer_domain == 'github.com', 'github', - referrer_domain == 'duckduckgo.com', 'duckduckgo', - referrer_domain == 'www.bing.com', 'bing', - referrer_domain == 'bing.com', 'bing', - referrer_domain == 'yandex.ru', 'yandex', - referrer_domain == 'quora.com', 'quora', - referrer_domain == 'www.quora.com', 'quora', - referrer_domain == 'linkedin.com', 'linkedin', - referrer_domain == 'www.linkedin.com', 'linkedin', - startsWith(referrer_domain, 'http://localhost:'), 'localhost', - referrer_domain - ) AS blended_source, - - countIf(events.event == '$pageview') AS num_pageviews, - countIf(events.event == '$autocapture') AS num_autocaptures, - -- in v1 we'd also want to count whether there were any conversion events - - any(events.person_id) as person_id, - -- definition of a GA4 bounce from here https://support.google.com/analytics/answer/12195621?hl=en - (num_autocaptures == 0 AND num_pageviews <= 1 AND duration_s < 10) AS is_bounce -FROM - events -WHERE - session_id IS NOT NULL -AND - events.timestamp >= now() - INTERVAL 8 DAY -GROUP BY - events.properties.`$session_id` -HAVING - min_timestamp >= now() - INTERVAL 7 DAY -) - -, - -bounce_rate_cte AS ( -SELECT session_cte.earliest_pathname, - avg(session_cte.is_bounce) as bounce_rate -FROM session_cte -GROUP BY earliest_pathname -) - - - -SELECT scroll_depth_cte.pathname as pathname, -scroll_depth_cte.total_pageviews as total_pageviews, -scroll_depth_cte.unique_visitors as unique_visitors, -scroll_depth_cte.scroll_gt80_percentage as scroll_gt80_percentage, -scroll_depth_cte.average_scroll_percentage as average_scroll_percentage, -bounce_rate_cte.bounce_rate as bounce_rate + pathname.pathname as pathname, + pathname.total_pageviews as total_pageviews, + pathname.unique_visitors as unique_visitors, + pathname.scroll_gt80_percentage as scroll_gt80_percentage, + pathname.average_scroll_percentage as average_scroll_percentage, + bounce_rate.bounce_rate as bounce_rate FROM - scroll_depth_cte LEFT OUTER JOIN bounce_rate_cte -ON scroll_depth_cte.pathname = bounce_rate_cte.earliest_pathname -ORDER BY total_pageviews DESC + {pathname_query} AS pathname +LEFT OUTER JOIN + ( + SELECT + session.earliest_pathname, + avg(session.is_bounce) as bounce_rate + FROM + {session_query} AS session + GROUP BY + session.earliest_pathname + ) AS bounce_rate +ON + pathname.pathname = bounce_rate.earliest_pathname +ORDER BY + total_pageviews DESC """, timings=self.timings, + placeholders={"pathname_query": pathname_query, "session_query": session_query}, ) return top_sources_query diff --git a/posthog/hogql_queries/web_analytics/top_sources.py b/posthog/hogql_queries/web_analytics/top_sources.py index 2762627c6002d..2071eae4d5472 100644 --- a/posthog/hogql_queries/web_analytics/top_sources.py +++ b/posthog/hogql_queries/web_analytics/top_sources.py @@ -3,8 +3,9 @@ from posthog.hogql import ast from posthog.hogql.parser import parse_select from posthog.hogql.query import execute_hogql_query -from posthog.hogql_queries.web_analytics.web_analytics_query_runner import WebAnalyticsQueryRunner from posthog.hogql_queries.utils.query_date_range import QueryDateRange +from posthog.hogql_queries.web_analytics.ctes import SESSION_CTE +from posthog.hogql_queries.web_analytics.web_analytics_query_runner import WebAnalyticsQueryRunner from posthog.models.filters.mixins.utils import cached_property from posthog.schema import WebTopSourcesQuery, WebTopSourcesQueryResponse @@ -14,88 +15,18 @@ class WebTopSourcesQueryRunner(WebAnalyticsQueryRunner): query_type = WebTopSourcesQuery def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: + with self.timings.measure("session_query"): + session_query = parse_select(SESSION_CTE, timings=self.timings) with self.timings.measure("top_sources_query"): top_sources_query = parse_select( """ -WITH - -session_cte AS ( -SELECT - events.properties.`$session_id` AS session_id, - min(events.timestamp) AS min_timestamp, - max(events.timestamp) AS max_timestamp, - dateDiff('second', min_timestamp, max_timestamp) AS duration_s, - - -- create a tuple so that these are grouped in the same order, see https://github.com/ClickHouse/ClickHouse/discussions/42338 - groupArray((events.timestamp, events.properties.`$referrer`, events.properties.`$pathname`, events.properties.utm_source)) AS tuple_array, - arrayFirstIndex(x -> tupleElement(x, 1) == min_timestamp, tuple_array) as index_of_earliest, - arrayFirstIndex(x -> tupleElement(x, 1) == max_timestamp, tuple_array) as index_of_latest, - tupleElement(arrayElement( - tuple_array, - index_of_earliest - ), 2) AS earliest_referrer, - tupleElement(arrayElement( - tuple_array, - index_of_earliest - ), 3) AS earliest_pathname, - tupleElement(arrayElement( - tuple_array, - index_of_earliest - ), 4) AS earliest_utm_source, - - if(domain(earliest_referrer) = '', earliest_referrer, domain(earliest_referrer)) AS referrer_domain, - multiIf( - earliest_utm_source IS NOT NULL, earliest_utm_source, - -- This will need to be an approach that scales better - referrer_domain == 'app.posthog.com', 'posthog', - referrer_domain == 'eu.posthog.com', 'posthog', - referrer_domain == 'posthog.com', 'posthog', - referrer_domain == 'www.google.com', 'google', - referrer_domain == 'www.google.co.uk', 'google', - referrer_domain == 'www.google.com.hk', 'google', - referrer_domain == 'www.google.de', 'google', - referrer_domain == 't.co', 'twitter', - referrer_domain == 'github.com', 'github', - referrer_domain == 'duckduckgo.com', 'duckduckgo', - referrer_domain == 'www.bing.com', 'bing', - referrer_domain == 'bing.com', 'bing', - referrer_domain == 'yandex.ru', 'yandex', - referrer_domain == 'quora.com', 'quora', - referrer_domain == 'www.quora.com', 'quora', - referrer_domain == 'linkedin.com', 'linkedin', - referrer_domain == 'www.linkedin.com', 'linkedin', - startsWith(referrer_domain, 'http://localhost:'), 'localhost', - referrer_domain - ) AS blended_source, - - countIf(events.event == '$pageview') AS num_pageviews, - countIf(events.event == '$autocapture') AS num_autocaptures, - -- in v1 we'd also want to count whether there were any conversion events - - any(events.person_id) as person_id, - -- definition of a GA4 bounce from here https://support.google.com/analytics/answer/12195621?hl=en - (num_autocaptures == 0 AND num_pageviews <= 1 AND duration_s < 10) AS is_bounce -FROM - events -WHERE - session_id IS NOT NULL -AND - events.timestamp >= now() - INTERVAL 8 DAY -GROUP BY - events.properties.`$session_id` -HAVING - min_timestamp >= now() - INTERVAL 7 DAY -) - - - SELECT blended_source, count(num_pageviews) as total_pageviews, count(DISTINCT person_id) as unique_visitors, avg(is_bounce) AS bounce_rate FROM - session_cte + {session_query} WHERE blended_source IS NOT NULL GROUP BY blended_source @@ -104,6 +35,7 @@ def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: LIMIT 100 """, timings=self.timings, + placeholders={"session_query": session_query}, ) return top_sources_query diff --git a/posthog/schema.py b/posthog/schema.py index d1a0244579bcf..0bde69bffeceb 100644 --- a/posthog/schema.py +++ b/posthog/schema.py @@ -468,6 +468,19 @@ class TrendsQueryResponse(BaseModel): timings: Optional[List[QueryTiming]] = None +class WebOverviewStatsQueryResponse(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + columns: Optional[List] = None + is_cached: Optional[bool] = None + last_refresh: Optional[str] = None + next_allowed_client_refresh: Optional[str] = None + result: List + timings: Optional[List[QueryTiming]] = None + types: Optional[List] = None + + class WebTopClicksQueryResponse(BaseModel): model_config = ConfigDict( extra="forbid", @@ -693,6 +706,16 @@ class TimeToSeeDataSessionsQuery(BaseModel): teamId: Optional[float] = Field(default=None, description="Project to filter on. Defaults to current project") +class WebOverviewStatsQuery(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + dateRange: Optional[DateRange] = None + filters: Any + kind: Literal["WebOverviewStatsQuery"] = "WebOverviewStatsQuery" + response: Optional[WebOverviewStatsQueryResponse] = None + + class WebTopClicksQuery(BaseModel): model_config = ConfigDict( extra="forbid", @@ -1060,6 +1083,7 @@ class DataTableNode(BaseModel): PersonsNode, HogQLQuery, TimeToSeeDataSessionsQuery, + WebOverviewStatsQuery, WebTopSourcesQuery, WebTopClicksQuery, WebTopPagesQuery, @@ -1340,6 +1364,7 @@ class Model(RootModel): HogQLQuery, HogQLMetadata, TimeToSeeDataSessionsQuery, + WebOverviewStatsQuery, WebTopSourcesQuery, WebTopClicksQuery, WebTopPagesQuery, diff --git a/tailwind.config.js b/tailwind.config.js index 0d535f72c8a60..661c4211f7756 100644 --- a/tailwind.config.js +++ b/tailwind.config.js @@ -139,13 +139,13 @@ module.exports = { // 'gridAutoColumns', // The grid-auto-columns utilities like auto-cols-min // 'gridAutoFlow', // The grid-auto-flow utilities like grid-flow-dense // 'gridAutoRows', // The grid-auto-rows utilities like auto-rows-min - // 'gridColumn', // The grid-column utilities like col-span-6 - // 'gridColumnEnd', // The grid-column-end utilities like col-end-7 - // 'gridColumnStart', // The grid-column-start utilities like col-start-7 - // 'gridRow', // The grid-row utilities like row-span-3 - // 'gridRowEnd', // The grid-row-end utilities like row-end-4 - // 'gridRowStart', // The grid-row-start utilities like row-start-4 - // 'gridTemplateColumns', // The grid-template-columns utilities like grid-cols-7 + 'gridColumn', // The grid-column utilities like col-span-6 + 'gridColumnEnd', // The grid-column-end utilities like col-end-7 + 'gridColumnStart', // The grid-column-start utilities like col-start-7 + 'gridRow', // The grid-row utilities like row-span-3 + 'gridRowEnd', // The grid-row-end utilities like row-end-4 + 'gridRowStart', // The grid-row-start utilities like row-start-4 + 'gridTemplateColumns', // The grid-template-columns utilities like grid-cols-7 // 'gridTemplateRows', // The grid-template-rows utilities like grid-rows-4 'height', // The height utilities like h-72 // 'hueRotate', // The hue-rotate utilities like hue-rotate-30