diff --git a/frontend/__snapshots__/exporter-exporter--dashboard--light.png b/frontend/__snapshots__/exporter-exporter--dashboard--light.png index ac90639ab4b75..61f6b619057b7 100644 Binary files a/frontend/__snapshots__/exporter-exporter--dashboard--light.png and b/frontend/__snapshots__/exporter-exporter--dashboard--light.png differ diff --git a/frontend/src/lib/lemon-ui/LemonTable/LemonTable.scss b/frontend/src/lib/lemon-ui/LemonTable/LemonTable.scss index 5419d9f12b5ab..2bf5449f0b4aa 100644 --- a/frontend/src/lib/lemon-ui/LemonTable/LemonTable.scss +++ b/frontend/src/lib/lemon-ui/LemonTable/LemonTable.scss @@ -18,6 +18,12 @@ --lemon-table-background-color: var(--bg-light); } + .WebAnalyticsDashboard &, + .WebAnalyticsModal & { + // Special override for scenes where the surroundings provide a border + border: none; + } + .posthog-3000 & { --row-base-height: auto; diff --git a/frontend/src/queries/schema.json b/frontend/src/queries/schema.json index 61209cba2a8b7..4c14f1fd1c36d 100644 --- a/frontend/src/queries/schema.json +++ b/frontend/src/queries/schema.json @@ -5073,6 +5073,9 @@ "dateRange": { "$ref": "#/definitions/DateRange" }, + "doPathCleaning": { + "type": "boolean" + }, "includeBounceRate": { "type": "boolean" }, diff --git a/frontend/src/queries/schema.ts b/frontend/src/queries/schema.ts index 0e1532814c675..57f0e90f1a9db 100644 --- a/frontend/src/queries/schema.ts +++ b/frontend/src/queries/schema.ts @@ -1017,6 +1017,7 @@ export interface WebStatsTableQuery extends WebAnalyticsQueryBase { response?: WebStatsTableQueryResponse includeScrollDepth?: boolean // automatically sets includeBounceRate to true includeBounceRate?: boolean + doPathCleaning?: boolean /** @asType integer */ limit?: number } diff --git a/frontend/src/scenes/web-analytics/WebAnalyticsModal.tsx b/frontend/src/scenes/web-analytics/WebAnalyticsModal.tsx index 438486433e5c7..422f38acbefd0 100644 --- a/frontend/src/scenes/web-analytics/WebAnalyticsModal.tsx +++ b/frontend/src/scenes/web-analytics/WebAnalyticsModal.tsx @@ -30,7 +30,7 @@ export const WebAnalyticsModal = (): JSX.Element | null => { fullScreen={false} closable={true} > -
+
{ query={modal.query} insightProps={modal.insightProps} showIntervalSelect={modal.showIntervalSelect} + showPathCleaningControls={modal.showPathCleaningControls} />
diff --git a/frontend/src/scenes/web-analytics/WebAnalyticsTile.tsx b/frontend/src/scenes/web-analytics/WebAnalyticsTile.tsx index 542f94a9b3227..c89754e0951d9 100644 --- a/frontend/src/scenes/web-analytics/WebAnalyticsTile.tsx +++ b/frontend/src/scenes/web-analytics/WebAnalyticsTile.tsx @@ -1,8 +1,12 @@ +import { IconGear } from '@posthog/icons' import { useActions, useValues } from 'kea' import { IntervalFilterStandalone } from 'lib/components/IntervalFilter' +import { LemonButton } from 'lib/lemon-ui/LemonButton' +import { LemonSwitch } from 'lib/lemon-ui/LemonSwitch' import { UnexpectedNeverError } from 'lib/utils' import { useCallback, useMemo } from 'react' import { countryCodeToFlag, countryCodeToName } from 'scenes/insights/views/WorldMap' +import { urls } from 'scenes/urls' import { DeviceTab, GeographyTab, webAnalyticsLogic } from 'scenes/web-analytics/webAnalyticsLogic' import { Query } from '~/queries/Query/Query' @@ -307,12 +311,16 @@ export const WebStatsTableTile = ({ query, breakdownBy, insightProps, + showPathCleaningControls, }: { query: DataTableNode breakdownBy: WebStatsBreakdown insightProps: InsightLogicProps + showPathCleaningControls?: boolean }): JSX.Element => { - const { togglePropertyFilter } = useActions(webAnalyticsLogic) + const { togglePropertyFilter, setIsPathCleaningEnabled } = useActions(webAnalyticsLogic) + const { isPathCleaningEnabled } = useValues(webAnalyticsLogic) + const { key, type } = webStatsBreakdownToPropertyName(breakdownBy) || {} const onClick = useCallback( @@ -327,6 +335,15 @@ export const WebStatsTableTile = ({ const context = useMemo((): QueryContext => { const rowProps: QueryContext['rowProps'] = (record: unknown) => { + if ( + (breakdownBy === WebStatsBreakdown.InitialPage || breakdownBy === WebStatsBreakdown.Page) && + isPathCleaningEnabled + ) { + // if the path cleaning is enabled, don't allow toggling a path by clicking a row, as this wouldn't + // work due to the order that the regex and filters are applied + return {} + } + const breakdownValue = getBreakdownValue(record, breakdownBy) if (breakdownValue === undefined) { return {} @@ -342,7 +359,37 @@ export const WebStatsTableTile = ({ } }, [onClick, insightProps]) - return + const pathCleaningSettingsUrl = urls.settings('project-product-analytics', 'path-cleaning') + return ( +
+ {showPathCleaningControls && ( +
+
+ + Enable path cleaning + } + type="tertiary" + status="alt" + size="small" + noPadding={true} + tooltip="Edit path cleaning settings" + to={pathCleaningSettingsUrl} + /> +
+ } + checked={isPathCleaningEnabled} + onChange={setIsPathCleaningEnabled} + className="h-full" + /> +
+
+ )} + +
+ ) } const getBreakdownValue = (record: unknown, breakdownBy: WebStatsBreakdown): string | undefined => { @@ -383,14 +430,23 @@ const getBreakdownValue = (record: unknown, breakdownBy: WebStatsBreakdown): str export const WebQuery = ({ query, showIntervalSelect, + showPathCleaningControls, insightProps, }: { query: QuerySchema showIntervalSelect?: boolean + showPathCleaningControls?: boolean insightProps: InsightLogicProps }): JSX.Element => { if (query.kind === NodeKind.DataTableNode && query.source.kind === NodeKind.WebStatsTableQuery) { - return + return ( + + ) } if (query.kind === NodeKind.InsightVizNode) { return diff --git a/frontend/src/scenes/web-analytics/WebDashboard.tsx b/frontend/src/scenes/web-analytics/WebDashboard.tsx index e7fc272557d5e..638c0c73ba4e9 100644 --- a/frontend/src/scenes/web-analytics/WebDashboard.tsx +++ b/frontend/src/scenes/web-analytics/WebDashboard.tsx @@ -66,7 +66,7 @@ const Tiles = (): JSX.Element => { } const QueryTileItem = ({ tile }: { tile: QueryTile }): JSX.Element => { - const { query, title, layout, insightProps } = tile + const { query, title, layout, insightProps, showPathCleaningControls, showIntervalSelect } = tile const { openModal } = useActions(webAnalyticsLogic) const { getNewInsightUrl } = useValues(webAnalyticsLogic) @@ -107,7 +107,12 @@ const QueryTileItem = ({ tile }: { tile: QueryTile }): JSX.Element => { )} > {title &&

{title}

} - + {buttonsRow.length > 0 ?
{buttonsRow}
: null}
) @@ -137,6 +142,7 @@ const TabsTileItem = ({ tile }: { tile: TabsTile }): JSX.Element => { key={tab.id} query={tab.query} showIntervalSelect={tab.showIntervalSelect} + showPathCleaningControls={tab.showPathCleaningControls} insightProps={tab.insightProps} /> ), diff --git a/frontend/src/scenes/web-analytics/webAnalyticsLogic.ts b/frontend/src/scenes/web-analytics/webAnalyticsLogic.ts index f3a871d4cde4c..fea26dcf8fa8e 100644 --- a/frontend/src/scenes/web-analytics/webAnalyticsLogic.ts +++ b/frontend/src/scenes/web-analytics/webAnalyticsLogic.ts @@ -71,6 +71,8 @@ interface BaseTile { export interface QueryTile extends BaseTile { title?: string query: QuerySchema + showIntervalSelect?: boolean + showPathCleaningControls?: boolean insightProps: InsightLogicProps canOpenModal: boolean canOpenInsight?: boolean @@ -85,6 +87,7 @@ export interface TabsTile extends BaseTile { linkText: string query: QuerySchema showIntervalSelect?: boolean + showPathCleaningControls?: boolean insightProps: InsightLogicProps canOpenModal?: boolean canOpenInsight?: boolean @@ -100,6 +103,7 @@ export interface WebDashboardModalQuery { query: QuerySchema insightProps: InsightLogicProps showIntervalSelect?: boolean + showPathCleaningControls?: boolean canOpenInsight?: boolean } @@ -196,6 +200,7 @@ export const webAnalyticsLogic = kea([ setGeographyTab: (tab: string) => ({ tab }), setDates: (dateFrom: string | null, dateTo: string | null) => ({ dateFrom, dateTo }), setInterval: (interval: IntervalType) => ({ interval }), + setIsPathCleaningEnabled: (isPathCleaningEnabled: boolean) => ({ isPathCleaningEnabled }), setStateFromUrl: (state: { filters: WebAnalyticsPropertyFilters dateFrom: string | null @@ -206,6 +211,7 @@ export const webAnalyticsLogic = kea([ deviceTab: string | null pathTab: string | null geographyTab: string | null + isPathCleaningEnabled: boolean | null }) => ({ state, }), @@ -308,6 +314,13 @@ export const webAnalyticsLogic = kea([ togglePropertyFilter: (oldTab, { tabChange }) => tabChange?.geographyTab || oldTab, }, ], + isPathCleaningEnabled: [ + false as boolean, + { + setIsPathCleaningEnabled: (_, { isPathCleaningEnabled }) => isPathCleaningEnabled, + setStateFromUrl: (_, { state }) => state.isPathCleaningEnabled || false, + }, + ], _modalTileAndTab: [ null as { tileId: TileId; tabId?: string } | null, { @@ -367,6 +380,7 @@ export const webAnalyticsLogic = kea([ s.pathTab, s.geographyTab, s.dateFilter, + s.isPathCleaningEnabled, () => values.statusCheck, () => values.isGreaterThanMd, () => values.shouldShowGeographyTile, @@ -379,6 +393,7 @@ export const webAnalyticsLogic = kea([ pathTab, geographyTab, { dateFrom, dateTo, interval }, + isPathCleaningEnabled: boolean, statusCheck, isGreaterThanMd: boolean, shouldShowGeographyTile @@ -552,12 +567,14 @@ export const webAnalyticsLogic = kea([ includeScrollDepth: statusCheck?.isSendingPageLeavesScroll, includeBounceRate: true, sampling, + doPathCleaning: isPathCleaningEnabled, limit: 10, }, embedded: false, }, insightProps: createInsightProps(TileId.PATHS, PathTab.PATH), canOpenModal: true, + showPathCleaningControls: true, }, { id: PathTab.INITIAL_PATH, @@ -573,12 +590,14 @@ export const webAnalyticsLogic = kea([ dateRange, includeScrollDepth: statusCheck?.isSendingPageLeavesScroll, sampling, + doPathCleaning: isPathCleaningEnabled, limit: 10, }, embedded: false, }, insightProps: createInsightProps(TileId.PATHS, PathTab.INITIAL_PATH), canOpenModal: true, + showPathCleaningControls: true, }, ], }, @@ -990,6 +1009,7 @@ export const webAnalyticsLogic = kea([ tabId, title: tab.title, showIntervalSelect: tab.showIntervalSelect, + showPathCleaningControls: tab.showPathCleaningControls, insightProps: { dashboardItemId: getDashboardItemId(tileId, tabId, true), loadPriority: 0, @@ -1004,6 +1024,8 @@ export const webAnalyticsLogic = kea([ return { tileId, title: tile.title, + showIntervalSelect: tile.showIntervalSelect, + showPathCleaningControls: tile.showPathCleaningControls, insightProps: { dashboardItemId: getDashboardItemId(tileId, undefined, true), loadPriority: 0, @@ -1188,6 +1210,7 @@ export const webAnalyticsLogic = kea([ pathTab, geographyTab, graphsTab, + isPathCleaningEnabled, } = values const urlParams = new URLSearchParams() @@ -1214,6 +1237,9 @@ export const webAnalyticsLogic = kea([ if (geographyTab) { urlParams.set('geography_tab', geographyTab) } + if (isPathCleaningEnabled) { + urlParams.set('path_cleaning', isPathCleaningEnabled.toString()) + } return `/web?${urlParams.toString()}` } @@ -1233,7 +1259,18 @@ export const webAnalyticsLogic = kea([ urlToAction(({ actions }) => ({ '/web': ( _, - { filters, date_from, date_to, interval, device_tab, source_tab, graphs_tab, path_tab, geography_tab } + { + filters, + date_from, + date_to, + interval, + device_tab, + source_tab, + graphs_tab, + path_tab, + geography_tab, + path_cleaning, + } ) => { const parsedFilters = isWebAnalyticsPropertyFilters(filters) ? filters : initialWebAnalyticsFilter @@ -1247,6 +1284,7 @@ export const webAnalyticsLogic = kea([ graphsTab: graphs_tab || null, pathTab: path_tab || null, geographyTab: geography_tab || null, + isPathCleaningEnabled: [true, 'true', 1, '1'].includes(path_cleaning), }) }, })), diff --git a/posthog/hogql_queries/web_analytics/stats_table.py b/posthog/hogql_queries/web_analytics/stats_table.py index bede9accc6d6d..a7ced15c87b33 100644 --- a/posthog/hogql_queries/web_analytics/stats_table.py +++ b/posthog/hogql_queries/web_analytics/stats_table.py @@ -9,6 +9,7 @@ ) from posthog.hogql_queries.web_analytics.web_analytics_query_runner import ( WebAnalyticsQueryRunner, + map_columns, ) from posthog.schema import ( WebStatsTableQuery, @@ -169,19 +170,13 @@ def calculate(self): assert results is not None - def to_data(col_val, col_idx): - if col_idx == 0: # breakdown_value - return col_val - elif col_idx == 1: # views - return self._unsample(col_val) - elif col_idx == 2: # visitors - return self._unsample(col_val) - elif col_idx == 3: # bounce_rate - return col_val - else: - return col_val - - results_mapped = [[to_data(c, i) for (i, c) in enumerate(r)] for r in results] + results_mapped = map_columns( + results, + { + 1: self._unsample, # views + 2: self._unsample, # visitors + }, + ) return WebStatsTableQueryResponse( columns=response.columns, @@ -195,11 +190,11 @@ def to_data(col_val, col_idx): def counts_breakdown(self): match self.query.breakdownBy: case WebStatsBreakdown.Page: - return ast.Field(chain=["properties", "$pathname"]) + return self._apply_path_cleaning(ast.Field(chain=["properties", "$pathname"])) case WebStatsBreakdown.InitialChannelType: raise NotImplementedError("Breakdown InitialChannelType not implemented") case WebStatsBreakdown.InitialPage: - return ast.Field(chain=["person", "properties", "$initial_pathname"]) + return self._apply_path_cleaning(ast.Field(chain=["person", "properties", "$initial_pathname"])) case WebStatsBreakdown.InitialReferringDomain: return ast.Field(chain=["person", "properties", "$initial_referring_domain"]) case WebStatsBreakdown.InitialUTMSource: @@ -233,9 +228,15 @@ def bounce_breakdown(self): match self.query.breakdownBy: case WebStatsBreakdown.Page: # use initial pathname for bounce rate - return ast.Call(name="any", args=[ast.Field(chain=["person", "properties", "$initial_pathname"])]) + return self._apply_path_cleaning( + ast.Call(name="any", args=[ast.Field(chain=["person", "properties", "$initial_pathname"])]) + ) case WebStatsBreakdown.InitialChannelType: raise NotImplementedError("Breakdown InitialChannelType not implemented") + case WebStatsBreakdown.InitialPage: + return self._apply_path_cleaning( + ast.Call(name="any", args=[ast.Field(chain=["person", "properties", "$initial_pathname"])]) + ) case _: return ast.Call(name="any", args=[self.counts_breakdown()]) @@ -364,3 +365,19 @@ def to_channel_query(self): ) return top_sources_query + + def _apply_path_cleaning(self, path_expr: ast.Expr) -> ast.Expr: + if not self.query.doPathCleaning or not self.team.path_cleaning_filters: + return path_expr + + for replacement in self.team.path_cleaning_filter_models(): + path_expr = ast.Call( + name="replaceRegexpAll", + args=[ + path_expr, + ast.Constant(value=replacement.regex), + ast.Constant(value=replacement.alias), + ], + ) + + return path_expr diff --git a/posthog/hogql_queries/web_analytics/test/test_web_stats_table.py b/posthog/hogql_queries/web_analytics/test/test_web_stats_table.py index 041121b4055a6..8705bc7a9796d 100644 --- a/posthog/hogql_queries/web_analytics/test/test_web_stats_table.py +++ b/posthog/hogql_queries/web_analytics/test/test_web_stats_table.py @@ -35,13 +35,17 @@ def _create_events(self, data, event="$pageview"): ) return person_result - def _run_web_stats_table_query(self, date_from, date_to, breakdown_by=WebStatsBreakdown.Page, limit=None): + def _run_web_stats_table_query( + self, date_from, date_to, breakdown_by=WebStatsBreakdown.Page, limit=None, path_cleaning_filters=None + ): query = WebStatsTableQuery( dateRange=DateRange(date_from=date_from, date_to=date_to), properties=[], breakdownBy=breakdown_by, limit=limit, + doPathCleaning=bool(path_cleaning_filters), ) + self.team.path_cleaning_filters = path_cleaning_filters or [] runner = WebStatsTableQueryRunner(team=self.team, query=query) return runner.calculate() @@ -141,3 +145,35 @@ def test_limit(self): response_2.results, ) self.assertEqual(False, response_2.hasMore) + + def test_path_filters(self): + self._create_events( + [ + ("p1", [("2023-12-02", "s1", "/cleaned/123/path/456")]), + ("p2", [("2023-12-10", "s2", "/cleaned/123")]), + ("p3", [("2023-12-10", "s3", "/cleaned/456")]), + ("p4", [("2023-12-11", "s4", "/not-cleaned")]), + ("p5", [("2023-12-11", "s5", "/thing_a")]), + ] + ) + + results = self._run_web_stats_table_query( + "all", + "2023-12-15", + path_cleaning_filters=[ + {"regex": "\\/cleaned\\/\\d+", "alias": "/cleaned/:id"}, + {"regex": "\\/path\\/\\d+", "alias": "/path/:id"}, + {"regex": "thing_a", "alias": "thing_b"}, + {"regex": "thing_b", "alias": "thing_c"}, + ], + ).results + + self.assertEqual( + [ + ["/cleaned/:id", 2, 2], + ["/thing_c", 1, 1], + ["/not-cleaned", 1, 1], + ["/cleaned/:id/path/:id", 1, 1], + ], + results, + ) diff --git a/posthog/hogql_queries/web_analytics/web_analytics_query_runner.py b/posthog/hogql_queries/web_analytics/web_analytics_query_runner.py index ad9d568d6c107..e20a2810274a9 100644 --- a/posthog/hogql_queries/web_analytics/web_analytics_query_runner.py +++ b/posthog/hogql_queries/web_analytics/web_analytics_query_runner.py @@ -1,3 +1,4 @@ +import typing from abc import ABC from datetime import timedelta from math import ceil @@ -228,6 +229,10 @@ def _unsample(self, n: Optional[int | float]): else n / self._sample_rate.numerator ) + def _cache_key(self) -> str: + original = super()._cache_key() + return f"{original}_{self.team.path_cleaning_filters}" + def _sample_rate_from_count(count: int) -> SamplingRate: # Change the sample rate so that the query will sample about 100_000 to 1_000_000 events, but use defined steps of @@ -239,3 +244,7 @@ def _sample_rate_from_count(count: int) -> SamplingRate: if count / sample_target >= step: return SamplingRate(numerator=1, denominator=step) return SamplingRate(numerator=1) + + +def map_columns(results, mapper: dict[int, typing.Callable]): + return [[mapper[i](data) if i in mapper else data for i, data in enumerate(row)] for row in results] diff --git a/posthog/schema.py b/posthog/schema.py index 324f601e1142f..328f61346b2b3 100644 --- a/posthog/schema.py +++ b/posthog/schema.py @@ -1547,6 +1547,7 @@ class WebStatsTableQuery(BaseModel): ) breakdownBy: WebStatsBreakdown dateRange: Optional[DateRange] = None + doPathCleaning: Optional[bool] = None includeBounceRate: Optional[bool] = None includeScrollDepth: Optional[bool] = None kind: Literal["WebStatsTableQuery"] = "WebStatsTableQuery"