From 7ec7356930ebfa24bbe178698bcc4f51f63dcd35 Mon Sep 17 00:00:00 2001 From: Rafael Audibert <32079912+rafaeelaudibert@users.noreply.github.com> Date: Mon, 2 Dec 2024 15:12:20 -0300 Subject: [PATCH] feat(web-analytics): Display values from previous period alongside current data (#26474) --- frontend/src/lib/utils.tsx | 4 + frontend/src/queries/types.ts | 2 +- .../web-analytics/tiles/WebAnalyticsTile.tsx | 118 ++++++++++---- posthog/hogql/functions/mapping.py | 1 + .../web_analytics/stats_table.py | 152 +++++++++++++----- .../test/test_web_stats_table.py | 144 ++++++++++------- 6 files changed, 287 insertions(+), 134 deletions(-) diff --git a/frontend/src/lib/utils.tsx b/frontend/src/lib/utils.tsx index f512aa8e5dd85..15b90eeb5473a 100644 --- a/frontend/src/lib/utils.tsx +++ b/frontend/src/lib/utils.tsx @@ -124,6 +124,10 @@ export function percentage( maximumFractionDigits: number = 2, fixedPrecision: boolean = false ): string { + if (division === Infinity) { + return '∞%' + } + return division.toLocaleString('en-US', { style: 'percent', maximumFractionDigits, diff --git a/frontend/src/queries/types.ts b/frontend/src/queries/types.ts index afbae27286816..849ebf6c33e36 100644 --- a/frontend/src/queries/types.ts +++ b/frontend/src/queries/types.ts @@ -49,7 +49,7 @@ export type QueryContextColumnComponent = ComponentType<{ }> interface QueryContextColumn { - title?: string + title?: JSX.Element | string renderTitle?: QueryContextColumnTitleComponent render?: QueryContextColumnComponent align?: 'left' | 'right' | 'center' // default is left diff --git a/frontend/src/scenes/web-analytics/tiles/WebAnalyticsTile.tsx b/frontend/src/scenes/web-analytics/tiles/WebAnalyticsTile.tsx index e42d6c2de89cb..35079d1241faa 100644 --- a/frontend/src/scenes/web-analytics/tiles/WebAnalyticsTile.tsx +++ b/frontend/src/scenes/web-analytics/tiles/WebAnalyticsTile.tsx @@ -1,11 +1,14 @@ -import { IconGear } from '@posthog/icons' +import { IconGear, IconTrending } from '@posthog/icons' +import { Tooltip } from '@posthog/lemon-ui' +import clsx from 'clsx' import { useActions, useValues } from 'kea' +import { getColorVar } from 'lib/colors' import { IntervalFilterStandalone } from 'lib/components/IntervalFilter' import { ProductIntroduction } from 'lib/components/ProductIntroduction/ProductIntroduction' -import { IconOpenInNew } from 'lib/lemon-ui/icons' +import { IconOpenInNew, IconTrendingDown, IconTrendingFlat } from 'lib/lemon-ui/icons' import { LemonButton } from 'lib/lemon-ui/LemonButton' import { LemonSwitch } from 'lib/lemon-ui/LemonSwitch' -import { UnexpectedNeverError } from 'lib/utils' +import { percentage, UnexpectedNeverError } from 'lib/utils' import { useCallback, useMemo } from 'react' import { NewActionButton } from 'scenes/actions/NewActionButton' import { countryCodeToFlag, countryCodeToName } from 'scenes/insights/views/WorldMap' @@ -37,15 +40,72 @@ const toUtcOffsetFormat = (value: number): string => { return `UTC${sign}${integerPart}${formattedMinutes}` } -const PercentageCell: QueryContextColumnComponent = ({ value }) => { - if (typeof value === 'number') { - return {`${(value * 100).toFixed(1)}%`} - } - return null -} +type VariationCellProps = { isPercentage?: boolean; reverseColors?: boolean } +const VariationCell = ( + { isPercentage, reverseColors }: VariationCellProps = { isPercentage: false, reverseColors: false } +): QueryContextColumnComponent => { + const formatNumber = (value: number): string => + isPercentage ? `${(value * 100).toFixed(1)}%` : value.toLocaleString() + + return function Cell({ value }) { + if (!value) { + return null + } + + if (!Array.isArray(value)) { + return {String(value)} + } + + const [current, previous] = value as [number, number] + const pctChangeFromPrevious = + previous === 0 && current === 0 // Special case, render as flatline + ? 0 + : current === null + ? null + : previous === null || previous === 0 + ? Infinity + : current / previous - 1 + + const trend = + pctChangeFromPrevious === null + ? null + : pctChangeFromPrevious === 0 + ? { Icon: IconTrendingFlat, color: getColorVar('muted') } + : pctChangeFromPrevious > 0 + ? { + Icon: IconTrending, + color: reverseColors ? getColorVar('danger') : getColorVar('success'), + } + : { + Icon: IconTrendingDown, + color: reverseColors ? getColorVar('success') : getColorVar('danger'), + } + + // If current === previous, say "increased by 0%" + const tooltip = + pctChangeFromPrevious !== null + ? `${current >= previous ? 'Increased' : 'Decreased'} by ${percentage( + Math.abs(pctChangeFromPrevious), + 0 + )} since last period (from ${formatNumber(previous)} to ${formatNumber(current)})` + : null -const NumericCell: QueryContextColumnComponent = ({ value }) => { - return {typeof value === 'number' ? value.toLocaleString() : String(value)} + return ( +
+ + + {formatNumber(current)}  + {trend && ( + // eslint-disable-next-line react/forbid-dom-props + + + + )} + + +
+ ) + } } const BreakdownValueTitle: QueryContextColumnTitleComponent = (props) => { @@ -227,48 +287,48 @@ export const webAnalyticsDataTableQueryContext: QueryContext = { render: BreakdownValueCell, }, bounce_rate: { - title: 'Bounce Rate', - render: PercentageCell, + title: Bounce Rate, + render: VariationCell({ isPercentage: true, reverseColors: true }), align: 'right', }, views: { - title: 'Views', - render: NumericCell, + title: Views, + render: VariationCell(), align: 'right', }, clicks: { - title: 'Clicks', - render: NumericCell, + title: Clicks, + render: VariationCell(), align: 'right', }, visitors: { - title: 'Visitors', - render: NumericCell, + title: Visitors, + render: VariationCell(), align: 'right', }, average_scroll_percentage: { - title: 'Average Scroll', - render: PercentageCell, + title: Average Scroll, + render: VariationCell({ isPercentage: true }), align: 'right', }, scroll_gt80_percentage: { - title: 'Deep Scroll Rate', - render: PercentageCell, + title: Deep Scroll Rate, + render: VariationCell({ isPercentage: true }), align: 'right', }, total_conversions: { - title: 'Total Conversions', - render: NumericCell, + title: Total Conversions, + render: VariationCell(), align: 'right', }, conversion_rate: { - title: 'Conversion Rate', - render: PercentageCell, + title: Conversion Rate, + render: VariationCell({ isPercentage: true }), align: 'right', }, converting_users: { - title: 'Converting Users', - render: NumericCell, + title: Converting Users, + render: VariationCell(), align: 'right', }, action_name: { diff --git a/posthog/hogql/functions/mapping.py b/posthog/hogql/functions/mapping.py index 9eb0980b9d933..d0f4755c67bad 100644 --- a/posthog/hogql/functions/mapping.py +++ b/posthog/hogql/functions/mapping.py @@ -1007,6 +1007,7 @@ def compare_types(arg_types: list[ConstantType], sig_arg_types: tuple[ConstantTy "argMaxMerge": HogQLFunctionMeta("argMaxMerge", 1, 1, aggregate=True), "avgState": HogQLFunctionMeta("avgState", 1, 1, aggregate=True), "avgMerge": HogQLFunctionMeta("avgMerge", 1, 1, aggregate=True), + "avgMergeIf": HogQLFunctionMeta("avgMergeIf", 2, 2, aggregate=True), "avgWeighted": HogQLFunctionMeta("avgWeighted", 2, 2, aggregate=True), "avgWeightedIf": HogQLFunctionMeta("avgWeightedIf", 3, 3, aggregate=True), "avgArray": HogQLFunctionMeta("avgArrayOrNull", 1, 1, aggregate=True), diff --git a/posthog/hogql_queries/web_analytics/stats_table.py b/posthog/hogql_queries/web_analytics/stats_table.py index 8336a173c04bf..1633c4389879d 100644 --- a/posthog/hogql_queries/web_analytics/stats_table.py +++ b/posthog/hogql_queries/web_analytics/stats_table.py @@ -45,30 +45,42 @@ def to_query(self) -> ast.SelectQuery: return self.to_path_scroll_bounce_query() elif self.query.includeBounceRate: return self.to_path_bounce_query() + if self.query.breakdownBy == WebStatsBreakdown.INITIAL_PAGE: if self.query.includeBounceRate: return self.to_entry_bounce_query() if self._has_session_properties(): - self._to_main_query_with_session_properties() + return self._to_main_query_with_session_properties() + return self.to_main_query() def to_main_query(self) -> ast.SelectQuery: with self.timings.measure("stats_table_query"): query = parse_select( """ +WITH + start_timestamp >= {date_from} AND start_timestamp < {date_to} AS current_period_segment, + start_timestamp >= {date_from_previous_period} AND start_timestamp < {date_from} AS previous_period_segment SELECT {processed_breakdown_value} AS "context.columns.breakdown_value", - uniq(filtered_person_id) AS "context.columns.visitors", - sum(filtered_pageview_count) AS "context.columns.views" + tuple( + uniqIf(filtered_person_id, current_period_segment), + uniqIf(filtered_person_id, previous_period_segment) + ) AS "context.columns.visitors", + tuple( + sumIf(filtered_pageview_count, current_period_segment), + sumIf(filtered_pageview_count, previous_period_segment) + ) AS "context.columns.views" FROM ( SELECT any(person_id) AS filtered_person_id, count() AS filtered_pageview_count, - {breakdown_value} AS breakdown_value + {breakdown_value} AS breakdown_value, + min(session.$start_timestamp) as start_timestamp FROM events WHERE and( - timestamp >= {date_from}, + timestamp >= {date_from_previous_period}, timestamp < {date_to}, events.event == '$pageview', {all_properties}, @@ -87,10 +99,12 @@ def to_main_query(self) -> ast.SelectQuery: "processed_breakdown_value": self._processed_breakdown_value(), "where_breakdown": self.where_breakdown(), "all_properties": self._all_properties(), + "date_from_previous_period": self._date_from_previous_period(), "date_from": self._date_from(), "date_to": self._date_to(), }, ) + assert isinstance(query, ast.SelectQuery) if self._include_extra_aggregation_value(): @@ -102,19 +116,29 @@ def _to_main_query_with_session_properties(self) -> ast.SelectQuery: with self.timings.measure("stats_table_query"): query = parse_select( """ +WITH + start_timestamp >= {date_from} AND start_timestamp < {date_to} AS current_period_segment, + start_timestamp >= {date_from_previous_period} AND start_timestamp < {date_from} AS previous_period_segment SELECT {processed_breakdown_value} AS "context.columns.breakdown_value", - uniq(filtered_person_id) AS "context.columns.visitors", - sum(filtered_pageview_count) AS "context.columns.views" + tuple( + uniqIf(filtered_person_id, current_period_segment), + uniqIf(filtered_person_id, previous_period_segment) + ) AS "context.columns.visitors", + tuple( + sumIf(filtered_pageview_count, current_period_segment), + sumIf(filtered_pageview_count, previous_period_segment) + ) AS "context.columns.views" FROM ( SELECT any(person_id) AS filtered_person_id, count() AS filtered_pageview_count, {breakdown_value} AS breakdown_value, - session.session_id AS session_id + session.session_id AS session_id, + min(session.$start_timestamp) as start_timestamp FROM events WHERE and( - timestamp >= {date_from}, + timestamp >= {date_from_previous_period}, timestamp < {date_to}, events.event == '$pageview', {event_properties}, @@ -135,6 +159,7 @@ def _to_main_query_with_session_properties(self) -> ast.SelectQuery: "where_breakdown": self.where_breakdown(), "event_properties": self._event_properties(), "session_properties": self._session_properties(), + "date_from_previous_period": self._date_from_previous_period(), "date_from": self._date_from(), "date_to": self._date_to(), }, @@ -150,21 +175,34 @@ def to_entry_bounce_query(self) -> ast.SelectQuery: with self.timings.measure("stats_table_query"): query = parse_select( """ +WITH + start_timestamp >= {date_from} AND start_timestamp < {date_to} AS current_period_segment, + start_timestamp >= {date_from_previous_period} AND start_timestamp < {date_from} AS previous_period_segment SELECT breakdown_value AS "context.columns.breakdown_value", - uniq(filtered_person_id) AS "context.columns.visitors", - sum(filtered_pageview_count) AS "context.columns.views", - avg(is_bounce) AS "context.columns.bounce_rate" + tuple( + uniqIf(filtered_person_id, current_period_segment), + uniqIf(filtered_person_id, previous_period_segment) + ) AS "context.columns.visitors", + tuple( + sumIf(filtered_pageview_count, current_period_segment), + sumIf(filtered_pageview_count, previous_period_segment) + ) AS "context.columns.views", + tuple( + avgIf(is_bounce, current_period_segment), + avgIf(is_bounce, previous_period_segment) + ) AS "context.columns.bounce_rate", FROM ( SELECT + {bounce_breakdown} AS breakdown_value, any(person_id) AS filtered_person_id, count() AS filtered_pageview_count, - {bounce_breakdown} AS breakdown_value, any(session.$is_bounce) AS is_bounce, - session.session_id AS session_id + session.session_id AS session_id, + min(session.$start_timestamp) as start_timestamp FROM events WHERE and( - timestamp >= {date_from}, + timestamp >= {date_from_previous_period}, timestamp < {date_to}, events.event == '$pageview', {event_properties}, @@ -184,6 +222,7 @@ def to_entry_bounce_query(self) -> ast.SelectQuery: "where_breakdown": self.where_breakdown(), "session_properties": self._session_properties(), "event_properties": self._event_properties(), + "date_from_previous_period": self._date_from_previous_period(), "date_from": self._date_from(), "date_to": self._date_to(), }, @@ -198,27 +237,33 @@ def to_path_scroll_bounce_query(self) -> ast.SelectQuery: with self.timings.measure("stats_table_bounce_query"): query = parse_select( """ +WITH + start_timestamp >= {date_from} AND start_timestamp < {date_to} AS current_period_segment, + start_timestamp >= {date_from_previous_period} AND start_timestamp < {date_from} AS previous_period_segment SELECT counts.breakdown_value AS "context.columns.breakdown_value", - counts.visitors AS "context.columns.visitors", - counts.views AS "context.columns.views", - bounce.bounce_rate AS "context.columns.bounce_rate", - scroll.average_scroll_percentage AS "context.columns.average_scroll_percentage", - scroll.scroll_gt80_percentage AS "context.columns.scroll_gt80_percentage" + tuple(counts.visitors, counts.previous_visitors) AS "context.columns.visitors", + tuple(counts.views, counts.previous_views) AS "context.columns.views", + tuple(bounce.bounce_rate, bounce.previous_bounce_rate) AS "context.columns.bounce_rate", + tuple(scroll.average_scroll_percentage, scroll.previous_average_scroll_percentage) AS "context.columns.average_scroll_percentage", + tuple(scroll.scroll_gt80_percentage, scroll.previous_scroll_gt80_percentage) AS "context.columns.scroll_gt80_percentage", FROM ( SELECT breakdown_value, - uniq(filtered_person_id) AS visitors, - sum(filtered_pageview_count) AS views + uniqIf(filtered_person_id, current_period_segment) AS visitors, + uniqIf(filtered_person_id, previous_period_segment) AS previous_visitors, + sumIf(filtered_pageview_count, current_period_segment) AS views, + sumIf(filtered_pageview_count, previous_period_segment) AS previous_views FROM ( SELECT any(person_id) AS filtered_person_id, count() AS filtered_pageview_count, {breakdown_value} AS breakdown_value, - session.session_id AS session_id + session.session_id AS session_id, + min(session.$start_timestamp ) AS start_timestamp FROM events WHERE and( - timestamp >= {date_from}, + timestamp >= {date_from_previous_period}, timestamp < {date_to}, events.event == '$pageview', {event_properties}, @@ -232,15 +277,17 @@ def to_path_scroll_bounce_query(self) -> ast.SelectQuery: LEFT JOIN ( SELECT breakdown_value, - avg(is_bounce) AS bounce_rate + avgIf(is_bounce, current_period_segment) AS bounce_rate, + avgIf(is_bounce, previous_period_segment) AS previous_bounce_rate FROM ( SELECT {bounce_breakdown_value} AS breakdown_value, -- use $entry_pathname to find the bounce rate for sessions that started on this pathname any(session.`$is_bounce`) AS is_bounce, - session.session_id AS session_id + session.session_id AS session_id, + min(session.$start_timestamp) as start_timestamp FROM events WHERE and( - timestamp >= {date_from}, + timestamp >= {date_from_previous_period}, timestamp < {date_to}, events.event == '$pageview', {event_properties}, @@ -255,8 +302,10 @@ def to_path_scroll_bounce_query(self) -> ast.SelectQuery: LEFT JOIN ( SELECT breakdown_value, - avgMerge(average_scroll_percentage_state) AS average_scroll_percentage, - avgMerge(scroll_gt80_percentage_state) AS scroll_gt80_percentage + avgMergeIf(average_scroll_percentage_state, current_period_segment) AS average_scroll_percentage, + avgMergeIf(average_scroll_percentage_state, previous_period_segment) AS previous_average_scroll_percentage, + avgMergeIf(scroll_gt80_percentage_state, current_period_segment) AS scroll_gt80_percentage, + avgMergeIf(scroll_gt80_percentage_state, previous_period_segment) AS previous_scroll_gt80_percentage FROM ( SELECT {scroll_breakdown_value} AS breakdown_value, -- use $prev_pageview_pathname to find the scroll depth when leaving this pathname @@ -267,10 +316,11 @@ def to_path_scroll_bounce_query(self) -> ast.SelectQuery: END ) AS scroll_gt80_percentage_state, avgState(toFloat(events.properties.`$prev_pageview_max_scroll_percentage`)) as average_scroll_percentage_state, - session.session_id AS session_id + session.session_id AS session_id, + min(session.$start_timestamp) AS start_timestamp FROM events WHERE and( - timestamp >= {date_from}, + timestamp >= {date_from_previous_period}, timestamp < {date_to}, or(events.event == '$pageview', events.event == '$pageleave'), {event_properties_for_scroll}, @@ -291,6 +341,7 @@ def to_path_scroll_bounce_query(self) -> ast.SelectQuery: "session_properties": self._session_properties(), "event_properties": self._event_properties(), "event_properties_for_scroll": self._event_properties_for_scroll(), + "date_from_previous_period": self._date_from_previous_period(), "date_from": self._date_from(), "date_to": self._date_to(), "breakdown_value": self._counts_breakdown_value(), @@ -308,25 +359,31 @@ def to_path_bounce_query(self) -> ast.SelectQuery: with self.timings.measure("stats_table_scroll_query"): query = parse_select( """ +WITH + start_timestamp >= {date_from} AND start_timestamp < {date_to} AS current_period_segment, + start_timestamp >= {date_from_previous_period} AND start_timestamp < {date_from} AS previous_period_segment SELECT counts.breakdown_value AS "context.columns.breakdown_value", - counts.visitors AS "context.columns.visitors", - counts.views AS "context.columns.views", - bounce.bounce_rate AS "context.columns.bounce_rate" + tuple(counts.visitors, counts.previous_visitors) AS "context.columns.visitors", + tuple(counts.views, counts.previous_views) AS "context.columns.views", + tuple(bounce.bounce_rate, bounce.previous_bounce_rate) AS "context.columns.bounce_rate" FROM ( SELECT breakdown_value, - uniq(filtered_person_id) AS visitors, - sum(filtered_pageview_count) AS views + uniqIf(filtered_person_id, current_period_segment) AS visitors, + uniqIf(filtered_person_id, previous_period_segment) AS previous_visitors, + sumIf(filtered_pageview_count, current_period_segment) AS views, + sumIf(filtered_pageview_count, previous_period_segment) AS previous_views FROM ( SELECT any(person_id) AS filtered_person_id, count() AS filtered_pageview_count, {breakdown_value} AS breakdown_value, - session.session_id AS session_id + session.session_id AS session_id, + min(session.$start_timestamp) AS start_timestamp FROM events WHERE and( - timestamp >= {date_from}, + timestamp >= {date_from_previous_period}, timestamp < {date_to}, events.event == '$pageview', {event_properties}, @@ -340,15 +397,17 @@ def to_path_bounce_query(self) -> ast.SelectQuery: LEFT JOIN ( SELECT breakdown_value, - avg(is_bounce) AS bounce_rate + avgIf(is_bounce, current_period_segment) AS bounce_rate, + avgIf(is_bounce, previous_period_segment) AS previous_bounce_rate FROM ( SELECT {bounce_breakdown_value} AS breakdown_value, -- use $entry_pathname to find the bounce rate for sessions that started on this pathname any(session.`$is_bounce`) AS is_bounce, - session.session_id AS session_id + session.session_id AS session_id, + min(session.$start_timestamp) AS start_timestamp FROM events WHERE and( - timestamp >= {date_from}, + timestamp >= {date_from_previous_period}, timestamp < {date_to}, events.event == '$pageview', {event_properties}, @@ -370,6 +429,7 @@ def to_path_bounce_query(self) -> ast.SelectQuery: "where_breakdown": self.where_breakdown(), "session_properties": self._session_properties(), "event_properties": self._event_properties(), + "date_from_previous_period": self._date_from_previous_period(), "date_from": self._date_from(), "date_to": self._date_to(), "bounce_breakdown_value": self._bounce_entry_pathname_breakdown(), @@ -433,6 +493,9 @@ def _date_to(self) -> ast.Expr: def _date_from(self) -> ast.Expr: return self.query_date_range.date_from_as_hogql() + def _date_from_previous_period(self) -> ast.Expr: + return self.query_date_range.previous_period_date_from_as_hogql() + def calculate(self): query = self.to_query() response = self.paginator.execute_hogql_query( @@ -450,8 +513,11 @@ def calculate(self): results, { 0: self._join_with_aggregation_value, # breakdown_value - 1: self._unsample, # views - 2: self._unsample, # visitors + 1: lambda tuple, row: (self._unsample(tuple[0], row), self._unsample(tuple[1], row)), # Views (tuple) + 2: lambda tuple, row: ( + self._unsample(tuple[0], row), + self._unsample(tuple[1], row), + ), # Visitors (tuple) }, ) diff --git a/posthog/hogql_queries/web_analytics/test/test_web_stats_table.py b/posthog/hogql_queries/web_analytics/test/test_web_stats_table.py index c021b6d1268bc..ae4b48b0632c1 100644 --- a/posthog/hogql_queries/web_analytics/test/test_web_stats_table.py +++ b/posthog/hogql_queries/web_analytics/test/test_web_stats_table.py @@ -147,8 +147,8 @@ def test_increase_in_users(self): self.assertEqual( [ - ["/", 2, 2], - ["/login", 1, 1], + ["/", (2, 0), (2, 0)], + ["/login", (1, 0), (1, 0)], ], results, ) @@ -168,9 +168,9 @@ def test_all_time(self): self.assertEqual( [ - ["/", 2, 2], - ["/docs", 1, 1], - ["/login", 1, 1], + ["/", (2, 0), (2, 0)], + ["/docs", (1, 0), (1, 0)], + ["/login", (1, 0), (1, 0)], ], results, ) @@ -195,7 +195,7 @@ def test_dont_filter_test_accounts(self): results = self._run_web_stats_table_query("2023-12-01", "2023-12-03", filter_test_accounts=False).results self.assertEqual( - [["/", 1, 1], ["/login", 1, 1]], + [["/", (1, 0), (1, 0)], ["/login", (1, 0), (1, 0)]], results, ) @@ -235,7 +235,7 @@ def test_limit(self): response_1 = self._run_web_stats_table_query("all", "2023-12-15", limit=1) self.assertEqual( [ - ["/", 2, 2], + ["/", (2, 0), (2, 0)], ], response_1.results, ) @@ -244,8 +244,8 @@ def test_limit(self): response_2 = self._run_web_stats_table_query("all", "2023-12-15", limit=2) self.assertEqual( [ - ["/", 2, 2], - ["/login", 1, 1], + ["/", (2, 0), (2, 0)], + ["/login", (1, 0), (1, 0)], ], response_2.results, ) @@ -280,10 +280,10 @@ def test_path_filters(self): self.assertEqual( [ - ["/cleaned/:id", 2, 2], - ["/cleaned/:id/path/:id", 1, 1], - ["/not-cleaned", 1, 1], - ["/thing_c", 1, 1], + ["/cleaned/:id", (2, 0), (2, 0)], + ["/cleaned/:id/path/:id", (1, 0), (1, 0)], + ["/not-cleaned", (1, 0), (1, 0)], + ["/thing_c", (1, 0), (1, 0)], ], results, ) @@ -308,9 +308,9 @@ def test_scroll_depth_bounce_rate_one_user(self): self.assertEqual( [ - ["/a", 1, 1, 0, 0.1, 0], - ["/b", 1, 1, None, 0.2, 0], - ["/c", 1, 1, None, 0.9, 1], + ["/a", (1, 0), (1, 0), (0, None), (0.1, None), (0, None)], + ["/b", (1, 0), (1, 0), (None, None), (0.2, None), (0, None)], + ["/c", (1, 0), (1, 0), (None, None), (0.9, None), (1, None)], ], results, ) @@ -350,9 +350,9 @@ def test_scroll_depth_bounce_rate(self): self.assertEqual( [ - ["/a", 3, 4, 1 / 3, 0.5, 0.5], - ["/b", 2, 2, None, 0.2, 0], - ["/c", 2, 2, None, 0.9, 1], + ["/a", (3, 0), (4, 0), (1 / 3, None), (0.5, None), (0.5, None)], + ["/b", (2, 0), (2, 0), (None, None), (0.2, None), (0, None)], + ["/c", (2, 0), (2, 0), (None, None), (0.9, None), (1, None)], ], results, ) @@ -393,7 +393,7 @@ def test_scroll_depth_bounce_rate_with_filter(self): self.assertEqual( [ - ["/a", 3, 4, 1 / 3, 0.5, 0.5], + ["/a", (3, 0), (4, 0), (1 / 3, None), (0.5, None), (0.5, None)], ], results, ) @@ -423,9 +423,9 @@ def test_scroll_depth_bounce_rate_path_cleaning(self): self.assertEqual( [ - ["/a/:id", 1, 1, 0, 0.1, 0], - ["/b/:id", 1, 1, None, 0.2, 0], - ["/c/:id", 1, 1, None, 0.9, 1], + ["/a/:id", (1, 0), (1, 0), (0, None), (0.1, None), (0, None)], + ["/b/:id", (1, 0), (1, 0), (None, None), (0.2, None), (0, None)], + ["/c/:id", (1, 0), (1, 0), (None, None), (0.9, None), (1, None)], ], results, ) @@ -449,9 +449,9 @@ def test_bounce_rate_one_user(self): self.assertEqual( [ - ["/a", 1, 1, 0], - ["/b", 1, 1, None], - ["/c", 1, 1, None], + ["/a", (1, 0), (1, 0), (0, None)], + ["/b", (1, 0), (1, 0), (None, None)], + ["/c", (1, 0), (1, 0), (None, None)], ], results, ) @@ -490,9 +490,9 @@ def test_bounce_rate(self): self.assertEqual( [ - ["/a", 3, 4, 1 / 3], - ["/b", 2, 2, None], - ["/c", 2, 2, None], + ["/a", (3, 0), (4, 0), (1 / 3, None)], + ["/b", (2, 0), (2, 0), (None, None)], + ["/c", (2, 0), (2, 0), (None, None)], ], results, ) @@ -532,7 +532,7 @@ def test_bounce_rate_with_property(self): self.assertEqual( [ - ["/a", 3, 4, 1 / 3], + ["/a", (3, 0), (4, 0), (1 / 3, None)], ], results, ) @@ -561,9 +561,9 @@ def test_bounce_rate_path_cleaning(self): self.assertEqual( [ - ["/a/:id", 1, 1, 0], - ["/b/:id", 1, 1, None], - ["/c/:id", 1, 1, None], + ["/a/:id", (1, 0), (1, 0), (0, None)], + ["/b/:id", (1, 0), (1, 0), (None, None)], + ["/c/:id", (1, 0), (1, 0), (None, None)], ], results, ) @@ -587,7 +587,7 @@ def test_entry_bounce_rate_one_user(self): self.assertEqual( [ - ["/a", 1, 3, 0], + ["/a", (1, 0), (3, 0), (0, None)], ], results, ) @@ -626,7 +626,7 @@ def test_entry_bounce_rate(self): self.assertEqual( [ - ["/a", 3, 8, 1 / 3], + ["/a", (3, 0), (8, 0), (1 / 3, None)], ], results, ) @@ -666,7 +666,7 @@ def test_entry_bounce_rate_with_property(self): self.assertEqual( [ - ["/a", 3, 4, 1 / 3], + ["/a", (3, 0), (4, 0), (1 / 3, None)], ], results, ) @@ -695,7 +695,7 @@ def test_entry_bounce_rate_path_cleaning(self): self.assertEqual( [ - ["/a/:id", 1, 3, 0], + ["/a/:id", (1, 0), (3, 0), (0, None)], ], results, ) @@ -743,7 +743,10 @@ def test_source_medium_campaign(self): ).results self.assertEqual( - [["google / (none) / (none)", 1, 1], ["news.ycombinator.com / referral / (none)", 1, 1]], + [ + ["google / (none) / (none)", (1, 0), (1, 0)], + ["news.ycombinator.com / referral / (none)", (1, 0), (1, 0)], + ], results, ) @@ -792,7 +795,7 @@ def test_null_in_utm_tags(self): ).results self.assertEqual( - [["google", 1.0, 1.0], [None, 1.0, 1.0]], + [["google", (1, 0), (1, 0)], [None, (1, 0), (1, 0)]], results, ) @@ -842,7 +845,7 @@ def test_is_not_set_filter(self): ).results self.assertEqual( - [[None, 1.0, 1.0]], + [[None, (1, 0), (1, 0)]], results, ) @@ -878,7 +881,7 @@ def test_same_user_multiple_sessions(self): "2024-07-31", breakdown_by=WebStatsBreakdown.INITIAL_UTM_SOURCE, ).results - assert [["google", 1, 2]] == results_session + assert [["google", (1, 0), (2, 0)]] == results_session # Try this with a query that uses event properties results_event = self._run_web_stats_table_query( @@ -886,13 +889,13 @@ def test_same_user_multiple_sessions(self): "2024-07-31", breakdown_by=WebStatsBreakdown.PAGE, ).results - assert [["/path", 1, 2]] == results_event + assert [["/path", (1, 0), (2, 0)]] == results_event # Try this with a query using the bounce rate results_event = self._run_web_stats_table_query( "all", "2024-07-31", breakdown_by=WebStatsBreakdown.PAGE, include_bounce_rate=True ).results - assert [["/path", 1, 2, None]] == results_event + assert [["/path", (1, 0), (2, 0), (None, None)]] == results_event # Try this with a query using the scroll depth results_event = self._run_web_stats_table_query( @@ -902,7 +905,7 @@ def test_same_user_multiple_sessions(self): include_bounce_rate=True, include_scroll_depth=True, ).results - assert [["/path", 1, 2, None, None, None]] == results_event + assert [["/path", (1, 0), (2, 0), (None, None), (None, None), (None, None)]] == results_event def test_no_session_id(self): d1 = "d1" @@ -935,13 +938,16 @@ def test_no_session_id(self): ).results assert [] == results - # Do show event property breakdowns of events of events with no session id + # Do show event property breakdowns of events with no session id + # but it will return 0 views because we depend on session.$start_timestamp + # to figure out the previous/current values results = self._run_web_stats_table_query( "all", "2024-07-31", breakdown_by=WebStatsBreakdown.PAGE, ).results - assert [["/path", 1, 1]] == results + + assert [["/path", (0, 0), (0, 0)]] == results def test_cohort_test_filters(self): d1 = "d1" @@ -1003,7 +1009,7 @@ def test_cohort_test_filters(self): breakdown_by=WebStatsBreakdown.PAGE, ).results - assert results == [["/path1", 1, 1]] + assert results == [["/path1", (1, 0), (1, 0)]] def test_language_filter(self): d1, s1 = "d1", str(uuid7("2024-07-30")) @@ -1079,45 +1085,61 @@ def test_language_filter(self): # which is causing this to be flaky (en-GB happens sometimes), # we'll instead assert on a reduced form where we're # not counting the country, but only the locale - # assert results == [["en-US", 1.0, 3.0], ["pt-BR", 1.0, 2.0], ["nl-", 1.0, 1.0]] + # assert results == [["en-US", (1, 0), (3, 0)], ["pt-BR", (1, 0), (2, 0)], ["nl-", (1, 0), (1, 0)]] country_results = [result[0].split("-")[0] for result in results] assert country_results == ["en", "pt", "nl"] - def test_timezone_filter(self): - date = "2024-07-30" + def test_timezone_filter_general(self): + before_date = "2024-07-14" + after_date = "2024-07-16" - for idx, (distinct_id, session_id) in enumerate( + for idx, (distinct_id, before_session_id, after_session_id) in enumerate( [ - ("UTC", str(uuid7(date))), - ("Asia/Calcutta", str(uuid7(date))), - ("America/New_York", str(uuid7(date))), - ("America/Sao_Paulo", str(uuid7(date))), + ("UTC", str(uuid7(before_date)), str(uuid7(after_date))), + ("Asia/Calcutta", str(uuid7(before_date)), str(uuid7(after_date))), + ("America/New_York", str(uuid7(before_date)), str(uuid7(after_date))), + ("America/Sao_Paulo", str(uuid7(before_date)), str(uuid7(after_date))), ] ): _create_person( team_id=self.team.pk, distinct_ids=[distinct_id], - properties={"name": session_id, "email": f"{distinct_id}@example.com"}, + properties={"name": before_session_id, "email": f"{distinct_id}@example.com"}, ) + # Always one event in the before_date + _create_event( + team=self.team, + event="$pageview", + distinct_id=distinct_id, + timestamp=before_date, + properties={"$session_id": before_session_id, "$pathname": f"/path/landing", "$timezone": distinct_id}, + ) + + # Several events in the actual range for i in range(idx + 1): _create_event( team=self.team, event="$pageview", distinct_id=distinct_id, - timestamp=date, - properties={"$session_id": session_id, "$pathname": f"/path{i}", "$timezone": distinct_id}, + timestamp=after_date, + properties={"$session_id": after_session_id, "$pathname": f"/path{i}", "$timezone": distinct_id}, ) results = self._run_web_stats_table_query( - "all", + "2024-07-15", # Period is since July first, we create some events before that date, and some after None, breakdown_by=WebStatsBreakdown.TIMEZONE, ).results # Brasilia UTC-3, New York UTC-4, Calcutta UTC+5:30, UTC - assert results == [[-3.0, 1.0, 4.0], [-4.0, 1.0, 3.0], [5.5, 1.0, 2.0], [0.0, 1.0, 1.0]] + assert results == [ + [-3, (1, 1), (4, 1)], + [-4, (1, 1), (3, 1)], + [5.5, (1, 1), (2, 1)], + [0, (1, 1), (1, 1)], + ] def test_timezone_filter_dst_change(self): did = "id" @@ -1146,7 +1168,7 @@ def test_timezone_filter_dst_change(self): ).results # Change from UTC-2 to UTC-3 in the middle of the night - assert results == [[-3.0, 1.0, 4.0], [-2.0, 1.0, 2.0]] + assert results == [[-3, (1, 0), (4, 0)], [-2, (1, 0), (2, 0)]] def test_timezone_filter_with_invalid_timezone(self): date = "2024-07-30"