diff --git a/frontend/__snapshots__/scenes-app-data-management-actions--actions-list--dark.png b/frontend/__snapshots__/scenes-app-data-management-actions--actions-list--dark.png index f62a42ff859d1..f1b31c7575e0e 100644 Binary files a/frontend/__snapshots__/scenes-app-data-management-actions--actions-list--dark.png and b/frontend/__snapshots__/scenes-app-data-management-actions--actions-list--dark.png differ diff --git a/frontend/__snapshots__/scenes-app-data-management-actions--actions-list--light.png b/frontend/__snapshots__/scenes-app-data-management-actions--actions-list--light.png index 3aefe218922c0..b20fcc1b450d3 100644 Binary files a/frontend/__snapshots__/scenes-app-data-management-actions--actions-list--light.png and b/frontend/__snapshots__/scenes-app-data-management-actions--actions-list--light.png differ diff --git a/frontend/src/layout/ErrorBoundary/ErrorBoundary.tsx b/frontend/src/layout/ErrorBoundary/ErrorBoundary.tsx index 28b171d53e1d7..baf1a727a91cf 100644 --- a/frontend/src/layout/ErrorBoundary/ErrorBoundary.tsx +++ b/frontend/src/layout/ErrorBoundary/ErrorBoundary.tsx @@ -1,16 +1,30 @@ import './ErrorBoundary.scss' import { ErrorBoundary as SentryErrorBoundary, getCurrentHub } from '@sentry/react' -import { useActions } from 'kea' +import type { Primitive } from '@sentry/types' +import { useActions, useValues } from 'kea' import { supportLogic } from 'lib/components/Support/supportLogic' import { LemonButton } from 'lib/lemon-ui/LemonButton' +import { teamLogic } from 'scenes/teamLogic' -export function ErrorBoundary({ children }: { children?: React.ReactNode }): JSX.Element { +interface ErrorBoundaryProps { + children?: React.ReactNode + tags?: Record +} + +export function ErrorBoundary({ children, tags = {} }: ErrorBoundaryProps): JSX.Element { const isSentryInitialized = !!getCurrentHub().getClient() + const { currentTeamId } = useValues(teamLogic) const { openSupportForm } = useActions(supportLogic) return ( { + if (currentTeamId !== undefined) { + scope.setTag('team_id', currentTeamId) + } + scope.setTags(tags) + }} fallback={({ error, eventId }) => (
<> diff --git a/frontend/src/lib/components/Cards/InsightCard/InsightCard.tsx b/frontend/src/lib/components/Cards/InsightCard/InsightCard.tsx index 833795d4f24b5..0bb002e91e590 100644 --- a/frontend/src/lib/components/Cards/InsightCard/InsightCard.tsx +++ b/frontend/src/lib/components/Cards/InsightCard/InsightCard.tsx @@ -121,7 +121,7 @@ function InsightCardInternal( style={{ ...(divProps?.style ?? {}), ...(theme?.boxStyle ?? {}) }} ref={ref} > - + {item.icon} : null} + className="font-mono" > {item.name} @@ -50,6 +51,7 @@ export function TreeTableRow({ item, onClick, selected }: TreeTableRowProps): JS
  • ([ searchPlaceholder: 'actions', type: TaxonomicFilterGroupType.Actions, logic: actionsModel, - value: 'actions', + value: 'actionsSorted', getName: (action: ActionType) => action.name || '', getValue: (action: ActionType) => action.id, getPopoverHeader: () => 'Action', diff --git a/frontend/src/lib/constants.tsx b/frontend/src/lib/constants.tsx index 3d2c46077b67d..2a8b466400bdb 100644 --- a/frontend/src/lib/constants.tsx +++ b/frontend/src/lib/constants.tsx @@ -210,6 +210,8 @@ export const FEATURE_FLAGS = { WEB_ANALYTICS_REPLAY: 'web-analytics-replay', // owner: @robbie-c BATCH_EXPORTS_POSTHOG_HTTP: 'posthog-http-batch-exports', EXPERIMENT_MAKE_DECISION: 'experiment-make-decision', // owner: @jurajmajerik #team-feature-success + WEB_ANALYTICS_CONVERSION_GOALS: 'web-analytics-conversion-goals', // owner: @robbie-c + WEB_ANALYTICS_LAST_CLICK: 'web-analytics-last-click', // owner: @robbie-c } as const export type FeatureFlagKey = (typeof FEATURE_FLAGS)[keyof typeof FEATURE_FLAGS] diff --git a/frontend/src/lib/lemon-ui/LemonButton/LemonButton.scss b/frontend/src/lib/lemon-ui/LemonButton/LemonButton.scss index e4e592e6b3ff7..0b3a7505c4b77 100644 --- a/frontend/src/lib/lemon-ui/LemonButton/LemonButton.scss +++ b/frontend/src/lib/lemon-ui/LemonButton/LemonButton.scss @@ -76,7 +76,8 @@ outline: none; transition: var(--lemon-button-transition); - .font-normal { + .font-normal, + &.font-normal { font-family: var(--font-sans); } diff --git a/frontend/src/lib/taxonomy.tsx b/frontend/src/lib/taxonomy.tsx index d015bc83661f1..0c391b009a3a8 100644 --- a/frontend/src/lib/taxonomy.tsx +++ b/frontend/src/lib/taxonomy.tsx @@ -1128,6 +1128,11 @@ export const CORE_FILTER_DEFINITIONS_BY_GROUP = { description: Whether the session was a bounce., examples: ['true', 'false'], }, + $last_external_click_url: { + label: 'Last external click URL', + description: The last external URL clicked in this session, + examples: ['https://example.com/interesting-article?parameter=true'], + }, }, groups: { $group_key: { @@ -1155,6 +1160,17 @@ export const CORE_FILTER_DEFINITIONS_BY_GROUP = { description: 'URL a user visited during their session', }, }, + log_entries: { + level: { + label: 'Console log level', + description: 'Level of the ', + examples: ['info', 'warn', 'error'], + }, + message: { + label: 'Console log message', + description: 'The contents of the log message', + }, + }, } satisfies Partial>> CORE_FILTER_DEFINITIONS_BY_GROUP.numerical_event_properties = CORE_FILTER_DEFINITIONS_BY_GROUP.event_properties diff --git a/frontend/src/models/actionsModel.ts b/frontend/src/models/actionsModel.ts index b6e508a3d7ed1..2d336c3f1220e 100644 --- a/frontend/src/models/actionsModel.ts +++ b/frontend/src/models/actionsModel.ts @@ -22,7 +22,7 @@ export const actionsModel = kea([ connect({ values: [teamLogic, ['currentTeam']], }), - loaders(({ props, values }) => ({ + loaders(({ props, values, actions }) => ({ actions: { __default: [] as ActionType[], loadActions: async () => { @@ -31,6 +31,22 @@ export const actionsModel = kea([ }, updateAction: (action: ActionType) => (values.actions || []).map((a) => (action.id === a.id ? action : a)), }, + pin: { + pinAction: async (action: ActionType) => { + const response = await api.actions.update(action.id, { + name: action.name, + pinned_at: new Date().toISOString(), + }) + actions.updateAction(response) + }, + unpinAction: async (action: ActionType) => { + const response = await api.actions.update(action.id, { + name: action.name, + pinned_at: null, + }) + actions.updateAction(response) + }, + }, })), selectors(({ selectors }) => ({ actionsGrouped: [ @@ -51,6 +67,12 @@ export const actionsModel = kea([ (actions): Partial> => Object.fromEntries(actions.map((action) => [action.id, action])), ], + actionsSorted: [ + (s) => [s.actions], + (actions: ActionType[]): ActionType[] => { + return actions.sort((a, b) => (b.pinned_at ? 1 : 0) - (a.pinned_at ? 1 : 0)) + }, + ], })), events(({ values, actions }) => ({ afterMount: () => { diff --git a/frontend/src/models/propertyDefinitionsModel.ts b/frontend/src/models/propertyDefinitionsModel.ts index 5db493d55213b..31a1bafc283c4 100644 --- a/frontend/src/models/propertyDefinitionsModel.ts +++ b/frontend/src/models/propertyDefinitionsModel.ts @@ -59,7 +59,7 @@ const localOptions: Record = { { id: 0, name: 'web' }, { id: 1, name: 'mobile' }, ], - 'session/console_log_level': [ + 'log_entry/level': [ { id: 0, name: 'info' }, { id: 1, name: 'warn' }, { id: 2, name: 'error' }, diff --git a/frontend/src/queries/nodes/DataTable/queryFeatures.ts b/frontend/src/queries/nodes/DataTable/queryFeatures.ts index c5642f1a82c5f..066774effad9a 100644 --- a/frontend/src/queries/nodes/DataTable/queryFeatures.ts +++ b/frontend/src/queries/nodes/DataTable/queryFeatures.ts @@ -5,6 +5,7 @@ import { isHogQLQuery, isPersonsNode, isSessionAttributionExplorerQuery, + isWebGoalsQuery, isWebOverviewQuery, isWebStatsTableQuery, isWebTopClicksQuery, @@ -58,7 +59,12 @@ export function getQueryFeatures(query: Node): Set { } } - if (isWebOverviewQuery(query) || isWebTopClicksQuery(query) || isWebStatsTableQuery(query)) { + if ( + isWebOverviewQuery(query) || + isWebTopClicksQuery(query) || + isWebStatsTableQuery(query) || + isWebGoalsQuery(query) + ) { features.add(QueryFeature.columnsInResponse) features.add(QueryFeature.resultIsArrayOfArrays) features.add(QueryFeature.hideLoadNextButton) diff --git a/frontend/src/queries/nodes/InsightViz/InsightViz.tsx b/frontend/src/queries/nodes/InsightViz/InsightViz.tsx index 694c206dd1333..8ee5238633b5d 100644 --- a/frontend/src/queries/nodes/InsightViz/InsightViz.tsx +++ b/frontend/src/queries/nodes/InsightViz/InsightViz.tsx @@ -9,6 +9,7 @@ import { insightSceneLogic } from 'scenes/insights/insightSceneLogic' import { insightVizDataLogic } from 'scenes/insights/insightVizDataLogic' import { keyForInsightLogicProps } from 'scenes/insights/sharedUtils' +import { ErrorBoundary } from '~/layout/ErrorBoundary' import { InsightVizNode } from '~/queries/schema' import { QueryContext } from '~/queries/types' import { isFunnelsQuery } from '~/queries/utils' @@ -93,25 +94,27 @@ export function InsightViz({ uniqueKey, query, setQuery, context, readOnly, embe ) return ( - - - -
    - {!readOnly && ( - - )} - {!isEmbedded ?
    {display}
    : display} -
    + + + + +
    + {!readOnly && ( + + )} + {!isEmbedded ?
    {display}
    : display} +
    +
    -
    + ) } diff --git a/frontend/src/queries/schema.json b/frontend/src/queries/schema.json index ba60660e3805d..3a9ea2ab136b2 100644 --- a/frontend/src/queries/schema.json +++ b/frontend/src/queries/schema.json @@ -369,6 +369,9 @@ { "$ref": "#/definitions/WebTopClicksQuery" }, + { + "$ref": "#/definitions/WebGoalsQuery" + }, { "$ref": "#/definitions/SessionAttributionExplorerQuery" }, @@ -1898,6 +1901,92 @@ ], "type": "object" }, + "CachedWebGoalsQueryResponse": { + "additionalProperties": false, + "properties": { + "cache_key": { + "type": "string" + }, + "cache_target_age": { + "format": "date-time", + "type": "string" + }, + "calculation_trigger": { + "description": "What triggered the calculation of the query, leave empty if user/immediate", + "type": "string" + }, + "columns": { + "items": {}, + "type": "array" + }, + "error": { + "description": "Query error. Returned only if 'explain' or `modifiers.debug` is true. Throws an error otherwise.", + "type": "string" + }, + "hasMore": { + "type": "boolean" + }, + "hogql": { + "description": "Generated HogQL query.", + "type": "string" + }, + "is_cached": { + "type": "boolean" + }, + "last_refresh": { + "format": "date-time", + "type": "string" + }, + "limit": { + "type": "integer" + }, + "modifiers": { + "$ref": "#/definitions/HogQLQueryModifiers", + "description": "Modifiers used when performing the query" + }, + "next_allowed_client_refresh": { + "format": "date-time", + "type": "string" + }, + "offset": { + "type": "integer" + }, + "query_status": { + "$ref": "#/definitions/QueryStatus", + "description": "Query status indicates whether next to the provided data, a query is still running." + }, + "results": { + "items": {}, + "type": "array" + }, + "samplingRate": { + "$ref": "#/definitions/SamplingRate" + }, + "timezone": { + "type": "string" + }, + "timings": { + "description": "Measured timings for different parts of the query generation process", + "items": { + "$ref": "#/definitions/QueryTiming" + }, + "type": "array" + }, + "types": { + "items": {}, + "type": "array" + } + }, + "required": [ + "cache_key", + "is_cached", + "last_refresh", + "next_allowed_client_refresh", + "results", + "timezone" + ], + "type": "object" + }, "CachedWebOverviewQueryResponse": { "additionalProperties": false, "properties": { @@ -2720,6 +2809,60 @@ "required": ["results"], "type": "object" }, + { + "additionalProperties": false, + "properties": { + "columns": { + "items": {}, + "type": "array" + }, + "error": { + "description": "Query error. Returned only if 'explain' or `modifiers.debug` is true. Throws an error otherwise.", + "type": "string" + }, + "hasMore": { + "type": "boolean" + }, + "hogql": { + "description": "Generated HogQL query.", + "type": "string" + }, + "limit": { + "type": "integer" + }, + "modifiers": { + "$ref": "#/definitions/HogQLQueryModifiers", + "description": "Modifiers used when performing the query" + }, + "offset": { + "type": "integer" + }, + "query_status": { + "$ref": "#/definitions/QueryStatus", + "description": "Query status indicates whether next to the provided data, a query is still running." + }, + "results": { + "items": {}, + "type": "array" + }, + "samplingRate": { + "$ref": "#/definitions/SamplingRate" + }, + "timings": { + "description": "Measured timings for different parts of the query generation process", + "items": { + "$ref": "#/definitions/QueryTiming" + }, + "type": "array" + }, + "types": { + "items": {}, + "type": "array" + } + }, + "required": ["results"], + "type": "object" + }, { "additionalProperties": false, "properties": { @@ -2921,6 +3064,9 @@ { "$ref": "#/definitions/WebTopClicksQuery" }, + { + "$ref": "#/definitions/WebGoalsQuery" + }, { "$ref": "#/definitions/SessionAttributionExplorerQuery" }, @@ -6226,6 +6372,7 @@ "WebOverviewQuery", "WebTopClicksQuery", "WebStatsTableQuery", + "WebGoalsQuery", "DatabaseSchemaQuery" ], "type": "string" @@ -7266,6 +7413,60 @@ "required": ["results"], "type": "object" }, + { + "additionalProperties": false, + "properties": { + "columns": { + "items": {}, + "type": "array" + }, + "error": { + "description": "Query error. Returned only if 'explain' or `modifiers.debug` is true. Throws an error otherwise.", + "type": "string" + }, + "hasMore": { + "type": "boolean" + }, + "hogql": { + "description": "Generated HogQL query.", + "type": "string" + }, + "limit": { + "type": "integer" + }, + "modifiers": { + "$ref": "#/definitions/HogQLQueryModifiers", + "description": "Modifiers used when performing the query" + }, + "offset": { + "type": "integer" + }, + "query_status": { + "$ref": "#/definitions/QueryStatus", + "description": "Query status indicates whether next to the provided data, a query is still running." + }, + "results": { + "items": {}, + "type": "array" + }, + "samplingRate": { + "$ref": "#/definitions/SamplingRate" + }, + "timings": { + "description": "Measured timings for different parts of the query generation process", + "items": { + "$ref": "#/definitions/QueryTiming" + }, + "type": "array" + }, + "types": { + "items": {}, + "type": "array" + } + }, + "required": ["results"], + "type": "object" + }, { "additionalProperties": false, "properties": { @@ -7703,6 +7904,60 @@ "required": ["results"], "type": "object" }, + { + "additionalProperties": false, + "properties": { + "columns": { + "items": {}, + "type": "array" + }, + "error": { + "description": "Query error. Returned only if 'explain' or `modifiers.debug` is true. Throws an error otherwise.", + "type": "string" + }, + "hasMore": { + "type": "boolean" + }, + "hogql": { + "description": "Generated HogQL query.", + "type": "string" + }, + "limit": { + "type": "integer" + }, + "modifiers": { + "$ref": "#/definitions/HogQLQueryModifiers", + "description": "Modifiers used when performing the query" + }, + "offset": { + "type": "integer" + }, + "query_status": { + "$ref": "#/definitions/QueryStatus", + "description": "Query status indicates whether next to the provided data, a query is still running." + }, + "results": { + "items": {}, + "type": "array" + }, + "samplingRate": { + "$ref": "#/definitions/SamplingRate" + }, + "timings": { + "description": "Measured timings for different parts of the query generation process", + "items": { + "$ref": "#/definitions/QueryTiming" + }, + "type": "array" + }, + "types": { + "items": {}, + "type": "array" + } + }, + "required": ["results"], + "type": "object" + }, { "additionalProperties": false, "properties": { @@ -8150,6 +8405,9 @@ { "$ref": "#/definitions/WebTopClicksQuery" }, + { + "$ref": "#/definitions/WebGoalsQuery" + }, { "$ref": "#/definitions/SessionAttributionExplorerQuery" }, @@ -9762,6 +10020,106 @@ }, "type": "array" }, + "WebGoalsQuery": { + "additionalProperties": false, + "properties": { + "dateRange": { + "$ref": "#/definitions/DateRange" + }, + "filterTestAccounts": { + "type": "boolean" + }, + "kind": { + "const": "WebGoalsQuery", + "type": "string" + }, + "limit": { + "type": "integer" + }, + "modifiers": { + "$ref": "#/definitions/HogQLQueryModifiers", + "description": "Modifiers used when performing the query" + }, + "properties": { + "$ref": "#/definitions/WebAnalyticsPropertyFilters" + }, + "response": { + "$ref": "#/definitions/WebGoalsQueryResponse" + }, + "sampling": { + "additionalProperties": false, + "properties": { + "enabled": { + "type": "boolean" + }, + "forceSamplingRate": { + "$ref": "#/definitions/SamplingRate" + } + }, + "type": "object" + }, + "useSessionsTable": { + "deprecated": "ignored, always treated as enabled *", + "type": "boolean" + } + }, + "required": ["kind", "properties"], + "type": "object" + }, + "WebGoalsQueryResponse": { + "additionalProperties": false, + "properties": { + "columns": { + "items": {}, + "type": "array" + }, + "error": { + "description": "Query error. Returned only if 'explain' or `modifiers.debug` is true. Throws an error otherwise.", + "type": "string" + }, + "hasMore": { + "type": "boolean" + }, + "hogql": { + "description": "Generated HogQL query.", + "type": "string" + }, + "limit": { + "type": "integer" + }, + "modifiers": { + "$ref": "#/definitions/HogQLQueryModifiers", + "description": "Modifiers used when performing the query" + }, + "offset": { + "type": "integer" + }, + "query_status": { + "$ref": "#/definitions/QueryStatus", + "description": "Query status indicates whether next to the provided data, a query is still running." + }, + "results": { + "items": {}, + "type": "array" + }, + "samplingRate": { + "$ref": "#/definitions/SamplingRate" + }, + "timings": { + "description": "Measured timings for different parts of the query generation process", + "items": { + "$ref": "#/definitions/QueryTiming" + }, + "type": "array" + }, + "types": { + "items": {}, + "type": "array" + } + }, + "required": ["results"], + "type": "object" + }, "WebOverviewItem": { "additionalProperties": false, "properties": { @@ -9884,6 +10242,7 @@ "Page", "InitialPage", "ExitPage", + "ExitClick", "InitialChannelType", "InitialReferringDomain", "InitialUTMSource", diff --git a/frontend/src/queries/schema.ts b/frontend/src/queries/schema.ts index 0a41adf57544c..176eb9d8579ab 100644 --- a/frontend/src/queries/schema.ts +++ b/frontend/src/queries/schema.ts @@ -92,6 +92,7 @@ export enum NodeKind { WebOverviewQuery = 'WebOverviewQuery', WebTopClicksQuery = 'WebTopClicksQuery', WebStatsTableQuery = 'WebStatsTableQuery', + WebGoalsQuery = 'WebGoalsQuery', // Database metadata DatabaseSchemaQuery = 'DatabaseSchemaQuery', @@ -113,6 +114,7 @@ export type AnyDataNode = | WebOverviewQuery | WebStatsTableQuery | WebTopClicksQuery + | WebGoalsQuery | SessionAttributionExplorerQuery | ErrorTrackingQuery @@ -137,6 +139,7 @@ export type QuerySchema = | WebOverviewQuery | WebStatsTableQuery | WebTopClicksQuery + | WebGoalsQuery | SessionAttributionExplorerQuery | ErrorTrackingQuery @@ -555,6 +558,7 @@ export interface DataTableNode | WebOverviewQuery | WebStatsTableQuery | WebTopClicksQuery + | WebGoalsQuery | SessionAttributionExplorerQuery | ErrorTrackingQuery )['response'] @@ -572,6 +576,7 @@ export interface DataTableNode | WebOverviewQuery | WebStatsTableQuery | WebTopClicksQuery + | WebGoalsQuery | SessionAttributionExplorerQuery | ErrorTrackingQuery /** Columns shown in the table, unless the `source` provides them. */ @@ -1291,6 +1296,7 @@ export enum WebStatsBreakdown { Page = 'Page', InitialPage = 'InitialPage', ExitPage = 'ExitPage', // not supported in the legacy version + ExitClick = 'ExitClick', InitialChannelType = 'InitialChannelType', InitialReferringDomain = 'InitialReferringDomain', InitialUTMSource = 'InitialUTMSource', @@ -1323,9 +1329,24 @@ export interface WebStatsTableQueryResponse extends AnalyticsQueryResponseBase +export interface WebGoalsQuery extends WebAnalyticsQueryBase { + kind: NodeKind.WebGoalsQuery + limit?: integer +} + +export interface WebGoalsQueryResponse extends AnalyticsQueryResponseBase { + types?: unknown[] + columns?: unknown[] + hogql?: string + samplingRate?: SamplingRate + hasMore?: boolean + limit?: integer + offset?: integer +} +export type CachedWebGoalsQueryResponse = CachedQueryResponse + export enum SessionAttributionGroupBy { ChannelType = 'ChannelType', Medium = 'Medium', diff --git a/frontend/src/queries/utils.ts b/frontend/src/queries/utils.ts index 3062354c7df91..db8c0d505ae93 100644 --- a/frontend/src/queries/utils.ts +++ b/frontend/src/queries/utils.ts @@ -35,6 +35,7 @@ import { SessionAttributionExplorerQuery, StickinessQuery, TrendsQuery, + WebGoalsQuery, WebOverviewQuery, WebStatsTableQuery, WebTopClicksQuery, @@ -129,6 +130,10 @@ export function isWebTopClicksQuery(node?: Record | null): node is return node?.kind === NodeKind.WebTopClicksQuery } +export function isWebGoalsQuery(node?: Record | null): node is WebGoalsQuery { + return node?.kind === NodeKind.WebGoalsQuery +} + export function isSessionAttributionExplorerQuery( node?: Record | null ): node is SessionAttributionExplorerQuery { diff --git a/frontend/src/scenes/App.tsx b/frontend/src/scenes/App.tsx index 933fb93e70b5b..33e5804d9aa88 100644 --- a/frontend/src/scenes/App.tsx +++ b/frontend/src/scenes/App.tsx @@ -135,7 +135,7 @@ function AppScene(): JSX.Element | null { } const wrappedSceneElement = ( - + {activeLoadedScene?.logic ? ( {sceneElement} diff --git a/frontend/src/scenes/actions/Action.stories.tsx b/frontend/src/scenes/actions/Action.stories.tsx index f3586f06350a7..a42b8c8996f96 100644 --- a/frontend/src/scenes/actions/Action.stories.tsx +++ b/frontend/src/scenes/actions/Action.stories.tsx @@ -61,6 +61,7 @@ const MOCK_ACTION: ActionType = { deleted: false, is_calculating: false, last_calculated_at: '2024-05-21T12:57:50.894221Z', + pinned_at: null, } const meta: Meta = { diff --git a/frontend/src/scenes/actions/actionsLogic.ts b/frontend/src/scenes/actions/actionsLogic.ts index bbd05ff52e48a..5fa541eb4b6c4 100644 --- a/frontend/src/scenes/actions/actionsLogic.ts +++ b/frontend/src/scenes/actions/actionsLogic.ts @@ -56,7 +56,7 @@ export const actionsLogic = kea([ actionsFiltered: [ (s) => [s.actions, s.filterType, s.searchTerm, s.user], (actions, filterType, searchTerm, user) => { - let data = actions + let data: ActionType[] = actions if (searchTerm) { data = actionsFuse.search(searchTerm).map((result) => result.item) } diff --git a/frontend/src/scenes/billing/billingLogic.tsx b/frontend/src/scenes/billing/billingLogic.tsx index 46185c696b68d..b6ffa6a4aafff 100644 --- a/frontend/src/scenes/billing/billingLogic.tsx +++ b/frontend/src/scenes/billing/billingLogic.tsx @@ -471,7 +471,11 @@ export const billingLogic = kea([ title: 'Usage limit exceeded', message: `You have exceeded the usage limit for ${productOverLimit.name}. Please ${productOverLimit.subscribed ? 'increase your billing limit' : 'upgrade your plan'} - or data loss may occur.`, + or ${ + productOverLimit.name === 'Data warehouse' + ? 'data will not be synced.' + : 'data loss may occur.' + }.`, dismissKey: 'usage-limit-exceeded', }) return diff --git a/frontend/src/scenes/data-management/actions/ActionsTable.tsx b/frontend/src/scenes/data-management/actions/ActionsTable.tsx index 9bb24e85a09a6..17cb9f52f2643 100644 --- a/frontend/src/scenes/data-management/actions/ActionsTable.tsx +++ b/frontend/src/scenes/data-management/actions/ActionsTable.tsx @@ -1,4 +1,4 @@ -import { IconCheckCircle } from '@posthog/icons' +import { IconCheckCircle, IconPin, IconPinFilled } from '@posthog/icons' import { LemonInput, LemonSegmentedButton } from '@posthog/lemon-ui' import { useActions, useValues } from 'kea' import api from 'lib/api' @@ -28,7 +28,7 @@ import { urls } from '../../urls' export function ActionsTable(): JSX.Element { const { currentTeam } = useValues(teamLogic) const { actionsLoading } = useValues(actionsModel({ params: 'include_count=1' })) - const { loadActions } = useActions(actionsModel) + const { loadActions, pinAction, unpinAction } = useActions(actionsModel) const { filterType, searchTerm, actionsFiltered, shouldShowEmptyState } = useValues(actionsLogic) const { setFilterType, setSearchTerm } = useActions(actionsLogic) @@ -56,6 +56,24 @@ export function ActionsTable(): JSX.Element { } const columns: LemonTableColumns = [ + { + width: 0, + title: 'Pinned', + dataIndex: 'pinned_at', + sorter: (a: ActionType, b: ActionType) => + (b.pinned_at ? new Date(b.pinned_at).getTime() : 0) - + (a.pinned_at ? new Date(a.pinned_at).getTime() : 0), + render: function Render(pinned, action) { + return ( + unpinAction(action) : () => pinAction(action)} + tooltip={pinned ? 'Unpin action' : 'Pin action'} + icon={pinned ? : } + /> + ) + }, + }, { title: 'Name', dataIndex: 'name', diff --git a/frontend/src/scenes/data-warehouse/external/DataWarehouseTables.tsx b/frontend/src/scenes/data-warehouse/external/DataWarehouseTables.tsx index 6bbbbb4c376a5..ac52bd4cbe891 100644 --- a/frontend/src/scenes/data-warehouse/external/DataWarehouseTables.tsx +++ b/frontend/src/scenes/data-warehouse/external/DataWarehouseTables.tsx @@ -237,7 +237,13 @@ export const DatabaseTableTreeWithItems = ({ inline }: DatabaseTableTreeProps): } onClick={() => setCollapsed(false)} /> ) : ( <> - setCollapsed(true)} fullWidth icon={}> + setCollapsed(true)} + fullWidth + icon={} + className="font-normal" + > Sources diff --git a/frontend/src/scenes/insights/views/LineGraph/LineGraph.tsx b/frontend/src/scenes/insights/views/LineGraph/LineGraph.tsx index 7b099b067f3b2..3334e3958017c 100644 --- a/frontend/src/scenes/insights/views/LineGraph/LineGraph.tsx +++ b/frontend/src/scenes/insights/views/LineGraph/LineGraph.tsx @@ -242,7 +242,7 @@ export interface LineGraphProps { export const LineGraph = (props: LineGraphProps): JSX.Element => { return ( - + {props.type === GraphType.Pie ? : } ) @@ -485,6 +485,7 @@ export function LineGraph_({ if (tooltip.body) { const referenceDataPoint = tooltip.dataPoints[0] // Use this point as reference to get the date const dataset = datasets[referenceDataPoint.datasetIndex] + const date = dataset?.days?.[referenceDataPoint.dataIndex] const seriesData = createTooltipData(tooltip.dataPoints, (dp) => { const hasDotted = datasets.some((d) => d.dotted) && @@ -498,7 +499,12 @@ export function LineGraph_({ tooltipRoot.render( + typeof date === 'number' + ? dataset?.labels?.[referenceDataPoint.dataIndex] + : null + } timezone={timezone} seriesData={seriesData} breakdownFilter={breakdownFilter} diff --git a/frontend/src/scenes/saved-insights/SavedInsights.tsx b/frontend/src/scenes/saved-insights/SavedInsights.tsx index ed0b4fc5110a7..1ef05e3b953aa 100644 --- a/frontend/src/scenes/saved-insights/SavedInsights.tsx +++ b/frontend/src/scenes/saved-insights/SavedInsights.tsx @@ -298,6 +298,12 @@ export const QUERY_TYPES_METADATA: Record = { icon: IconPieChart, inMenu: true, }, + [NodeKind.WebGoalsQuery]: { + name: 'Goals', + description: 'View goal conversions', + icon: IconPieChart, + inMenu: true, + }, [NodeKind.HogQuery]: { name: 'Hog', description: 'Hog query', diff --git a/frontend/src/scenes/session-recordings/apm/playerInspector/ItemPerformanceEvent.tsx b/frontend/src/scenes/session-recordings/apm/playerInspector/ItemPerformanceEvent.tsx index 84097a1eb27b2..a9c9d2914f6b3 100644 --- a/frontend/src/scenes/session-recordings/apm/playerInspector/ItemPerformanceEvent.tsx +++ b/frontend/src/scenes/session-recordings/apm/playerInspector/ItemPerformanceEvent.tsx @@ -173,7 +173,13 @@ export function ItemPerformanceEvent({ return (
    - setExpanded(!expanded)} fullWidth data-attr="item-performance-event"> + setExpanded(!expanded)} + fullWidth + data-attr="item-performance-event" + className="font-normal" + >
    Replay properties
      - {properties.map(({ key, label, propertyFilterType }) => ( - onChange(key, { propertyFilterType: propertyFilterType })} - disabledReason={hasFilter(key) ? `${label} filter already added` : undefined} - > - {label} - - ))} + {properties.map(({ key, taxonomicFilterGroup, propertyFilterType }) => { + const label = getFilterLabel(key, taxonomicFilterGroup) + return ( + onChange(key, { propertyFilterType: propertyFilterType })} + disabledReason={hasFilter(key) ? `${label} filter already added` : undefined} + > + {label} + + ) + })}
    diff --git a/frontend/src/scenes/session-recordings/player/inspector/components/ItemConsoleLog.tsx b/frontend/src/scenes/session-recordings/player/inspector/components/ItemConsoleLog.tsx index 16b91ddc6bbd5..88d328691ab2c 100644 --- a/frontend/src/scenes/session-recordings/player/inspector/components/ItemConsoleLog.tsx +++ b/frontend/src/scenes/session-recordings/player/inspector/components/ItemConsoleLog.tsx @@ -13,7 +13,13 @@ export interface ItemConsoleLogProps { export function ItemConsoleLog({ item, expanded, setExpanded }: ItemConsoleLogProps): JSX.Element { return ( <> - setExpanded(!expanded)} fullWidth data-attr="item-console-log"> + setExpanded(!expanded)} + fullWidth + data-attr="item-console-log" + className="font-normal" + >
    {item.data.content}
    {(item.data.count || 1) > 1 ? ( - setExpanded(!expanded)} fullWidth data-attr="item-doctor-item"> + setExpanded(!expanded)} + fullWidth + data-attr="item-doctor-item" + className="font-normal" + >
    {item.tag}
    diff --git a/frontend/src/scenes/session-recordings/player/inspector/components/ItemEvent.tsx b/frontend/src/scenes/session-recordings/player/inspector/components/ItemEvent.tsx index 699ae426b1c6d..c88a43feaaf23 100644 --- a/frontend/src/scenes/session-recordings/player/inspector/components/ItemEvent.tsx +++ b/frontend/src/scenes/session-recordings/player/inspector/components/ItemEvent.tsx @@ -83,7 +83,7 @@ export function ItemEvent({ item, expanded, setExpanded }: ItemEventProps): JSX. return (
    - setExpanded(!expanded)} fullWidth> + setExpanded(!expanded)} fullWidth className="font-normal">
    - {tabs.length > 3 ? ( + {tabs.length > 4 ? ( { if (typeof value === 'number') { @@ -38,7 +43,9 @@ const BreakdownValueTitle: QueryContextColumnTitleComponent = (props) => { case WebStatsBreakdown.InitialPage: return <>Initial Path case WebStatsBreakdown.ExitPage: - return <>Exit Path + return <>End Path + case WebStatsBreakdown.ExitClick: + return <>Exit Click case WebStatsBreakdown.InitialChannelType: return <>Initial Channel Type case WebStatsBreakdown.InitialReferringDomain: @@ -131,7 +138,9 @@ export const webStatsBreakdownToPropertyName = ( case WebStatsBreakdown.InitialPage: return { key: '$entry_pathname', type: PropertyFilterType.Session } case WebStatsBreakdown.ExitPage: - return { key: '$exit_pathname', type: PropertyFilterType.Session } + return { key: '$end_pathname', type: PropertyFilterType.Session } + case WebStatsBreakdown.ExitClick: + return { key: '$last_external_click_url', type: PropertyFilterType.Session } case WebStatsBreakdown.InitialChannelType: return { key: '$channel_type', type: PropertyFilterType.Session } case WebStatsBreakdown.InitialReferringDomain: @@ -196,6 +205,24 @@ export const webAnalyticsDataTableQueryContext: QueryContext = { render: PercentageCell, align: 'right', }, + total_conversions: { + title: 'Total Conversions', + render: NumericCell, + align: 'right', + }, + conversion_rate: { + title: 'Conversion Rate', + render: PercentageCell, + align: 'right', + }, + converting_users: { + title: 'Converting Users', + render: NumericCell, + align: 'right', + }, + action_name: { + title: 'Action', + }, }, } @@ -390,7 +417,7 @@ export const WebStatsTableTile = ({ />
    } - checked={isPathCleaningEnabled} + checked={!!isPathCleaningEnabled} onChange={setIsPathCleaningEnabled} className="h-full" /> @@ -441,6 +468,47 @@ const getBreakdownValue = (record: unknown, breakdownBy: WebStatsBreakdown): str return breakdownValue } +export const WebGoalsTile = ({ + query, + insightProps, +}: { + query: DataTableNode + insightProps: InsightLogicProps +}): JSX.Element | null => { + const { actions, actionsLoading } = useValues(actionsModel) + const { updateHasSeenProductIntroFor } = useActions(userLogic) + + if (actionsLoading) { + return null + } + + if (!actions.length) { + return ( + updateHasSeenProductIntroFor(ProductKey.ACTIONS, true)} /> + } + /> + ) + } + + return ( +
    +
    + } type="secondary" size="small"> + Manage actions + +
    + +
    + ) +} export const WebQuery = ({ query, showIntervalSelect, @@ -464,6 +532,8 @@ export const WebQuery = ({ } if (query.kind === NodeKind.InsightVizNode) { return + } else if (query.kind === NodeKind.DataTableNode && query.source.kind === NodeKind.WebGoalsQuery) { + return } return diff --git a/frontend/src/scenes/web-analytics/webAnalyticsLogic.tsx b/frontend/src/scenes/web-analytics/webAnalyticsLogic.tsx index 31e7b45c835a5..2db3b94cefed9 100644 --- a/frontend/src/scenes/web-analytics/webAnalyticsLogic.tsx +++ b/frontend/src/scenes/web-analytics/webAnalyticsLogic.tsx @@ -59,6 +59,7 @@ export enum TileId { RETENTION = 'RETENTION', REPLAY = 'REPLAY', ERROR_TRACKING = 'ERROR_TRACKING', + GOALS = 'GOALS', } const loadPriorityMap: Record = { @@ -71,6 +72,7 @@ const loadPriorityMap: Record = { [TileId.RETENTION]: 7, [TileId.REPLAY]: 8, [TileId.ERROR_TRACKING]: 9, + [TileId.GOALS]: 10, } interface BaseTile { @@ -163,7 +165,8 @@ export enum DeviceTab { export enum PathTab { PATH = 'PATH', INITIAL_PATH = 'INITIAL_PATH', - EXIT_PATH = 'EXIT_PATH', + END_PATH = 'END_PATH', + EXIT_CLICK = 'EXIT_CLICK', } export enum GeographyTab { @@ -263,6 +266,7 @@ export const webAnalyticsLogic = kea([ reducers({ webAnalyticsFilters: [ initialWebAnalyticsFilter, + { persist: true }, { setWebAnalyticsFilters: (_, { webAnalyticsFilters }) => webAnalyticsFilters, togglePropertyFilter: (oldPropertyFilters, { key, value, type }): WebAnalyticsPropertyFilters => { @@ -335,6 +339,7 @@ export const webAnalyticsLogic = kea([ ], _graphsTab: [ null as string | null, + { persist: true }, { setGraphsTab: (_, { tab }) => tab, setStateFromUrl: (_, { state }) => state.graphsTab, @@ -343,6 +348,7 @@ export const webAnalyticsLogic = kea([ ], _sourceTab: [ null as string | null, + { persist: true }, { setSourceTab: (_, { tab }) => tab, setStateFromUrl: (_, { state }) => state.sourceTab, @@ -351,6 +357,7 @@ export const webAnalyticsLogic = kea([ ], _deviceTab: [ null as string | null, + { persist: true }, { setDeviceTab: (_, { tab }) => tab, setStateFromUrl: (_, { state }) => state.deviceTab, @@ -359,6 +366,7 @@ export const webAnalyticsLogic = kea([ ], _pathTab: [ null as string | null, + { persist: true }, { setPathTab: (_, { tab }) => tab, setStateFromUrl: (_, { state }) => state.pathTab, @@ -367,6 +375,7 @@ export const webAnalyticsLogic = kea([ ], _geographyTab: [ null as string | null, + { persist: true }, { setGeographyTab: (_, { tab }) => tab, setStateFromUrl: (_, { state }) => state.geographyTab, @@ -374,7 +383,8 @@ export const webAnalyticsLogic = kea([ }, ], isPathCleaningEnabled: [ - false as boolean, + null as boolean | null, + { persist: true }, { setIsPathCleaningEnabled: (_, { isPathCleaningEnabled }) => isPathCleaningEnabled, setStateFromUrl: (_, { state }) => state.isPathCleaningEnabled || false, @@ -396,6 +406,7 @@ export const webAnalyticsLogic = kea([ dateTo: initialDateTo, interval: initialInterval, }, + { persist: true }, { setDates: (_, { dateTo, dateFrom }) => ({ dateTo, @@ -425,6 +436,7 @@ export const webAnalyticsLogic = kea([ ], shouldFilterTestAccounts: [ false as boolean, + { persist: true }, { setShouldFilterTestAccounts: (_, { shouldFilterTestAccounts }) => shouldFilterTestAccounts, }, @@ -687,7 +699,7 @@ export const webAnalyticsLogic = kea([ showPathCleaningControls: true, }, { - id: PathTab.EXIT_PATH, + id: PathTab.END_PATH, title: 'End paths', linkText: 'End path', query: { @@ -706,10 +718,34 @@ export const webAnalyticsLogic = kea([ }, embedded: false, }, - insightProps: createInsightProps(TileId.PATHS, PathTab.EXIT_PATH), + insightProps: createInsightProps(TileId.PATHS, PathTab.END_PATH), canOpenModal: true, showPathCleaningControls: true, }, + featureFlags[FEATURE_FLAGS.WEB_ANALYTICS_LAST_CLICK] + ? { + id: PathTab.EXIT_CLICK, + title: 'Exit clicks', + linkText: 'Exit clicks', + query: { + full: true, + kind: NodeKind.DataTableNode, + source: { + kind: NodeKind.WebStatsTableQuery, + properties: webAnalyticsFilters, + breakdownBy: WebStatsBreakdown.ExitClick, + dateRange, + includeScrollDepth: false, + sampling, + limit: 10, + filterTestAccounts, + }, + embedded: false, + }, + insightProps: createInsightProps(TileId.PATHS, PathTab.END_PATH), + canOpenModal: true, + } + : null, ] as (TabsTileTab | undefined)[] ).filter(isNotNil), }, @@ -1091,6 +1127,7 @@ export const webAnalyticsLogic = kea([ layout: { colSpanClassName: 'md:col-span-2', }, + query: { kind: NodeKind.InsightVizNode, source: { @@ -1115,9 +1152,35 @@ export const webAnalyticsLogic = kea([ embedded: true, }, insightProps: createInsightProps(TileId.RETENTION), - canOpenInsight: true, - canOpenModal: false, + canOpenInsight: false, + canOpenModal: true, }, + featureFlags[FEATURE_FLAGS.WEB_ANALYTICS_CONVERSION_GOALS] + ? { + kind: 'query', + tileId: TileId.GOALS, + title: 'Goals', + layout: { + colSpanClassName: 'md:col-span-2', + }, + query: { + full: true, + kind: NodeKind.DataTableNode, + source: { + kind: NodeKind.WebGoalsQuery, + properties: webAnalyticsFilters, + dateRange, + sampling, + limit: 10, + filterTestAccounts, + }, + embedded: true, + }, + insightProps: createInsightProps(TileId.GOALS), + canOpenInsight: false, + canOpenModal: false, + } + : null, featureFlags[FEATURE_FLAGS.WEB_ANALYTICS_REPLAY] ? { kind: 'replay', @@ -1158,7 +1221,7 @@ export const webAnalyticsLogic = kea([ const { tileId, tabId } = modalTileAndTab const tile = tiles.find((tile) => tile.tileId === tileId) if (!tile) { - throw new Error('Developer Error, tile not found') + return null } const extendQuery = (query: QuerySchema): QuerySchema => { @@ -1177,7 +1240,7 @@ export const webAnalyticsLogic = kea([ if (tile.kind === 'tabs') { const tab = tile.tabs.find((tab) => tab.id === tabId) if (!tab) { - throw new Error('Developer Error, tab not found') + return null } return { tileId, @@ -1284,12 +1347,12 @@ export const webAnalyticsLogic = kea([ const tile = tiles.find((tile) => tile.tileId === tileId) if (!tile) { - throw new Error('Developer Error, tile not found') + return undefined } if (tile.kind === 'tabs') { const tab = tile.tabs.find((tab) => tab.id === tabId) if (!tab) { - throw new Error('Developer Error, tab not found') + return undefined } return urls.insightNew(undefined, undefined, formatQueryForNewInsight(tab.query)) } else if (tile.kind === 'query') { @@ -1401,12 +1464,13 @@ export const webAnalyticsLogic = kea([ const { webAnalyticsFilters, dateFilter: { dateTo, dateFrom, interval }, - sourceTab, - deviceTab, - pathTab, - geographyTab, - graphsTab, + _sourceTab, + _deviceTab, + _pathTab, + _geographyTab, + _graphsTab, isPathCleaningEnabled, + shouldFilterTestAccounts, } = values const urlParams = new URLSearchParams() @@ -1418,24 +1482,27 @@ export const webAnalyticsLogic = kea([ urlParams.set('date_to', dateTo ?? '') urlParams.set('interval', interval ?? '') } - if (deviceTab) { - urlParams.set('device_tab', deviceTab) + if (_deviceTab) { + urlParams.set('device_tab', _deviceTab) } - if (sourceTab) { - urlParams.set('source_tab', sourceTab) + if (_sourceTab) { + urlParams.set('source_tab', _sourceTab) } - if (graphsTab) { - urlParams.set('graphs_tab', graphsTab) + if (_graphsTab) { + urlParams.set('graphs_tab', _graphsTab) } - if (pathTab) { - urlParams.set('path_tab', pathTab) + if (_pathTab) { + urlParams.set('path_tab', _pathTab) } - if (geographyTab) { - urlParams.set('geography_tab', geographyTab) + if (_geographyTab) { + urlParams.set('geography_tab', _geographyTab) } - if (isPathCleaningEnabled) { + if (isPathCleaningEnabled != null) { urlParams.set('path_cleaning', isPathCleaningEnabled.toString()) } + if (shouldFilterTestAccounts != null) { + urlParams.set('filter_test_accounts', shouldFilterTestAccounts.toString()) + } return `/web?${urlParams.toString()}` } @@ -1466,22 +1533,41 @@ export const webAnalyticsLogic = kea([ path_tab, geography_tab, path_cleaning, + filter_test_accounts, } ) => { - const parsedFilters = isWebAnalyticsPropertyFilters(filters) ? filters : initialWebAnalyticsFilter + const parsedFilters = isWebAnalyticsPropertyFilters(filters) ? filters : undefined - actions.setStateFromUrl({ - filters: parsedFilters, - dateFrom: date_from || null, - dateTo: date_to || null, - interval: interval || null, - deviceTab: device_tab || null, - sourceTab: source_tab || null, - graphsTab: graphs_tab || null, - pathTab: path_tab || null, - geographyTab: geography_tab || null, - isPathCleaningEnabled: [true, 'true', 1, '1'].includes(path_cleaning), - }) + if (parsedFilters) { + actions.setWebAnalyticsFilters(parsedFilters) + } + if (date_from || date_to) { + actions.setDates(date_from, date_to) + } + if (interval) { + actions.setInterval(interval) + } + if (device_tab) { + actions.setDeviceTab(device_tab) + } + if (source_tab) { + actions.setSourceTab(source_tab) + } + if (graphs_tab) { + actions.setGraphsTab(graphs_tab) + } + if (path_tab) { + actions.setPathTab(path_tab) + } + if (geography_tab) { + actions.setGeographyTab(geography_tab) + } + if (path_cleaning) { + actions.setIsPathCleaningEnabled([true, 'true', 1, '1'].includes(path_cleaning)) + } + if (filter_test_accounts) { + actions.setShouldFilterTestAccounts([true, 'true', 1, '1'].includes(filter_test_accounts)) + } }, })), ]) diff --git a/frontend/src/test/mocks.ts b/frontend/src/test/mocks.ts index 5d2e4f2fff3c5..0e98c9c7d8064 100644 --- a/frontend/src/test/mocks.ts +++ b/frontend/src/test/mocks.ts @@ -142,6 +142,7 @@ export const mockActionDefinition = { last_calculated_at: '2022-01-24T21:32:38.359756Z', team_id: 1, created_by: null, + pinned_at: null, } export const mockCohort: CohortType = { diff --git a/frontend/src/toolbar/actions/actionsLogic.test.ts b/frontend/src/toolbar/actions/actionsLogic.test.ts index 943260c5fec46..71f562bea297c 100644 --- a/frontend/src/toolbar/actions/actionsLogic.test.ts +++ b/frontend/src/toolbar/actions/actionsLogic.test.ts @@ -6,9 +6,9 @@ import { toolbarConfigLogic } from '~/toolbar/toolbarConfigLogic' import { ActionType } from '~/types' const unsortedActions: ActionType[] = [ - { name: 'zoo', created_at: '', created_by: null, id: 1 }, - { name: 'middle', created_at: '', created_by: null, id: 2 }, - { name: 'begin', created_at: '', created_by: null, id: 3 }, + { name: 'zoo', created_at: '', created_by: null, id: 1, pinned_at: null }, + { name: 'middle', created_at: '', created_by: null, id: 2, pinned_at: null }, + { name: 'begin', created_at: '', created_by: null, id: 3, pinned_at: null }, ] const apiJson = { results: unsortedActions } @@ -45,9 +45,9 @@ describe('toolbar actionsLogic', () => { .delay(0) .toMatchValues({ sortedActions: [ - { created_at: '', created_by: null, id: 3, name: 'begin' }, - { created_at: '', created_by: null, id: 2, name: 'middle' }, - { created_at: '', created_by: null, id: 1, name: 'zoo' }, + { created_at: '', created_by: null, id: 3, name: 'begin', pinned_at: null }, + { created_at: '', created_by: null, id: 2, name: 'middle', pinned_at: null }, + { created_at: '', created_by: null, id: 1, name: 'zoo', pinned_at: null }, ], actionCount: 3, allActions: apiJson.results, @@ -62,8 +62,8 @@ describe('toolbar actionsLogic', () => { .delay(0) .toMatchValues({ sortedActions: [ - { created_at: '', created_by: null, id: 3, name: 'begin' }, - { created_at: '', created_by: null, id: 2, name: 'middle' }, + { created_at: '', created_by: null, id: 3, name: 'begin', pinned_at: null }, + { created_at: '', created_by: null, id: 2, name: 'middle', pinned_at: null }, ], }) }) diff --git a/frontend/src/toolbar/actions/actionsTabLogic.tsx b/frontend/src/toolbar/actions/actionsTabLogic.tsx index 7f1d82180be19..759d410d00f97 100644 --- a/frontend/src/toolbar/actions/actionsTabLogic.tsx +++ b/frontend/src/toolbar/actions/actionsTabLogic.tsx @@ -19,6 +19,7 @@ function newAction(element: HTMLElement | null, dataAttributes: string[] = []): return { name: '', steps: [element ? actionStepToActionStepFormItem(elementToActionStep(element, dataAttributes), true) : {}], + pinned_at: null, } } diff --git a/frontend/src/types.ts b/frontend/src/types.ts index ccc4289d19574..0e32117d3688e 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -543,6 +543,7 @@ export interface ActionType { action_id?: number // alias of id to make it compatible with event definitions uuid bytecode?: any[] bytecode_error?: string + pinned_at: string | null } /** Sync with plugin-server/src/types.ts */ @@ -3066,6 +3067,7 @@ export enum PropertyDefinitionType { Person = 'person', Group = 'group', Session = 'session', + LogEntry = 'log_entry', } export interface PropertyDefinition { diff --git a/latest_migrations.manifest b/latest_migrations.manifest index b83f42c6cc145..85b48ed0ed16f 100644 --- a/latest_migrations.manifest +++ b/latest_migrations.manifest @@ -5,7 +5,7 @@ contenttypes: 0002_remove_content_type_name ee: 0016_rolemembership_organization_member otp_static: 0002_throttling otp_totp: 0002_auto_20190420_0723 -posthog: 0464_datawarehouse_stripe_account +posthog: 0465_datawarehouse_stripe_account sessions: 0001_initial social_django: 0010_uid_db_index two_factor: 0007_auto_20201201_1019 diff --git a/plugin-server/src/types.ts b/plugin-server/src/types.ts index 1d596f034d81e..2ca7a3e593f3b 100644 --- a/plugin-server/src/types.ts +++ b/plugin-server/src/types.ts @@ -1016,6 +1016,7 @@ export interface RawAction { steps_json: ActionStep[] | null bytecode: any[] | null bytecode_error: string | null + pinned_at: string | null } /** Usable Action model. */ diff --git a/plugin-server/tests/main/db.test.ts b/plugin-server/tests/main/db.test.ts index de958e51cb628..364f014654ede 100644 --- a/plugin-server/tests/main/db.test.ts +++ b/plugin-server/tests/main/db.test.ts @@ -190,6 +190,7 @@ describe('DB', () => { expect(await db.fetchAction(69)).toEqual({ ...result[2][69], steps_json: null, // Temporary diff whilst we migrate to this new field + pinned_at: null, }) }) diff --git a/plugin-server/tests/worker/ingestion/action-matcher.test.ts b/plugin-server/tests/worker/ingestion/action-matcher.test.ts index 40a79edc7c58b..6b84b80991b10 100644 --- a/plugin-server/tests/worker/ingestion/action-matcher.test.ts +++ b/plugin-server/tests/worker/ingestion/action-matcher.test.ts @@ -96,6 +96,7 @@ describe('ActionMatcher', () => { }) ) : null, + pinned_at: null, } await insertRow(hub.db.postgres, 'posthog_action', action) await actionManager.reloadAction(action.team_id, action.id) diff --git a/posthog/api/action.py b/posthog/api/action.py index c43bf893ca7a4..4a669cef0d398 100644 --- a/posthog/api/action.py +++ b/posthog/api/action.py @@ -19,6 +19,7 @@ from .forbid_destroy_model import ForbidDestroyModel from .tagged_item import TaggedItemSerializerMixin, TaggedItemViewSetMixin +from datetime import datetime, UTC class ActionStepJSONSerializer(serializers.Serializer): @@ -58,6 +59,7 @@ class Meta: "team_id", "is_action", "bytecode_error", + "pinned_at", ] read_only_fields = [ "team_id", @@ -77,6 +79,8 @@ def validate(self, attrs): else: attrs["team_id"] = self.context["view"].team_id include_args = {"team_id": attrs["team_id"]} + if attrs.get("pinned_at") == "": + attrs["pinned_at"] = None colliding_action_ids = list( Action.objects.filter(name=attrs["name"], deleted=False, **include_args) @@ -104,6 +108,14 @@ def create(self, validated_data: Any) -> Any: return instance def update(self, instance: Any, validated_data: dict[str, Any]) -> Any: + if validated_data.get("pinned_at"): + if instance.pinned_at: + # drop it from the update + del validated_data["pinned_at"] + else: + # ignore the user-provided timestamp, generate our own + validated_data["pinned_at"] = datetime.now(UTC).isoformat() + instance = super().update(instance, validated_data) report_user_action( diff --git a/posthog/api/session.py b/posthog/api/session.py index 40a14a993a862..8a687362ce438 100644 --- a/posthog/api/session.py +++ b/posthog/api/session.py @@ -40,7 +40,10 @@ def values(self, request: request.Request, **kwargs) -> response.Response: raise ValidationError(detail=f"Key not provided") modifiers = create_default_modifiers_for_team(team) - if modifiers.sessionTableVersion == SessionTableVersion.V2: + if ( + modifiers.sessionTableVersion == SessionTableVersion.V2 + or modifiers.sessionTableVersion == SessionTableVersion.AUTO + ): result = get_lazy_session_table_values_v2(key, search_term=search_term, team=team) else: result = get_lazy_session_table_values_v1(key, search_term=search_term, team=team) @@ -61,7 +64,10 @@ def property_definitions(self, request: request.Request, **kwargs) -> response.R # unlike e.g. event properties, there's a very limited number of session properties, # so we can just return them all modifiers = create_default_modifiers_for_team(self.team) - if modifiers.sessionTableVersion == SessionTableVersion.V2: + if ( + modifiers.sessionTableVersion == SessionTableVersion.V2 + or modifiers.sessionTableVersion == SessionTableVersion.AUTO + ): results = get_lazy_session_table_properties_v2(search) else: results = get_lazy_session_table_properties_v1(search) diff --git a/posthog/api/test/__snapshots__/test_action.ambr b/posthog/api/test/__snapshots__/test_action.ambr index 44b749ef70294..a9331f33b4897 100644 --- a/posthog/api/test/__snapshots__/test_action.ambr +++ b/posthog/api/test/__snapshots__/test_action.ambr @@ -248,6 +248,7 @@ "posthog_action"."bytecode", "posthog_action"."bytecode_error", "posthog_action"."steps_json", + "posthog_action"."pinned_at", "posthog_action"."is_calculating", "posthog_action"."last_calculated_at", COUNT("posthog_action_events"."event_id") AS "count", @@ -363,6 +364,7 @@ "posthog_action"."bytecode", "posthog_action"."bytecode_error", "posthog_action"."steps_json", + "posthog_action"."pinned_at", "posthog_action"."is_calculating", "posthog_action"."last_calculated_at", COUNT("posthog_action_events"."event_id") AS "count", @@ -566,6 +568,7 @@ "posthog_action"."bytecode", "posthog_action"."bytecode_error", "posthog_action"."steps_json", + "posthog_action"."pinned_at", "posthog_action"."is_calculating", "posthog_action"."last_calculated_at", COUNT("posthog_action_events"."event_id") AS "count", diff --git a/posthog/api/test/__snapshots__/test_survey.ambr b/posthog/api/test/__snapshots__/test_survey.ambr index 24056154fe8f5..22cc1302c85cc 100644 --- a/posthog/api/test/__snapshots__/test_survey.ambr +++ b/posthog/api/test/__snapshots__/test_survey.ambr @@ -308,6 +308,7 @@ "posthog_action"."bytecode", "posthog_action"."bytecode_error", "posthog_action"."steps_json", + "posthog_action"."pinned_at", "posthog_action"."is_calculating", "posthog_action"."last_calculated_at" FROM "posthog_action" diff --git a/posthog/api/test/test_action.py b/posthog/api/test/test_action.py index 96ae589fdea35..f39a72db9b5cc 100644 --- a/posthog/api/test/test_action.py +++ b/posthog/api/test/test_action.py @@ -54,6 +54,7 @@ def test_create_action(self, patch_capture, *args): ], "created_at": ANY, "created_by": ANY, + "pinned_at": None, "deleted": False, "is_calculating": False, "last_calculated_at": ANY, @@ -79,6 +80,8 @@ def test_create_action(self, patch_capture, *args): "match_url_count": 1, "has_properties": False, "deleted": False, + "pinned": False, + "pinned_at": None, }, ) @@ -128,6 +131,7 @@ def test_cant_create_action_with_the_same_name(self, *args): self.assertEqual(Action.objects.count(), count) + @freeze_time("2021-12-12") @patch("posthog.api.action.report_user_action") def test_update_action(self, patch_capture, *args): user = self._create_user("test_user_update") @@ -159,6 +163,7 @@ def test_update_action(self, patch_capture, *args): "first_name": "person", "email": "person@email.com", }, + "pinned_at": "2021-12-11T00:00:00Z", }, HTTP_ORIGIN="http://testserver", ) @@ -213,6 +218,8 @@ def test_update_action(self, patch_capture, *args): "has_properties": True, "updated_by_creator": False, "deleted": False, + "pinned": True, + "pinned_at": "2021-12-12T00:00:00+00:00", }, ) diff --git a/posthog/api/test/test_session.py b/posthog/api/test/test_session.py index b17f2718ed514..502cf2e429a3c 100644 --- a/posthog/api/test/test_session.py +++ b/posthog/api/test/test_session.py @@ -37,8 +37,8 @@ def test_expected_session_properties(self): "$end_timestamp", "$entry_current_url", "$entry_pathname", - "$exit_current_url", - "$exit_pathname", + "$end_current_url", + "$end_pathname", "$entry_gad_source", "$entry_gclid", "$entry_referring_domain", @@ -48,9 +48,11 @@ def test_expected_session_properties(self): "$entry_utm_source", "$entry_utm_term", "$pageview_count", + "$screen_count", "$session_duration", "$start_timestamp", "$is_bounce", + "$last_external_click_url", } assert actual_properties == expected_properties diff --git a/posthog/api/test/test_survey.py b/posthog/api/test/test_survey.py index 7f7c164e6fdcc..e5aa59dd4fd79 100644 --- a/posthog/api/test/test_survey.py +++ b/posthog/api/test/test_survey.py @@ -2559,6 +2559,7 @@ def test_list_surveys_with_actions(self): "team_id": self.team.id, "is_action": True, "bytecode_error": None, + "pinned_at": None, "tags": [], } ] diff --git a/posthog/clickhouse/client/execute.py b/posthog/clickhouse/client/execute.py index 55b9862eb43a3..598a5c8b771aa 100644 --- a/posthog/clickhouse/client/execute.py +++ b/posthog/clickhouse/client/execute.py @@ -130,6 +130,8 @@ def sync_execute( query_type = tags.get("query_type", "Other") set_tag("query_type", query_type) + if team_id is not None: + set_tag("team_id", team_id) settings = { **core_settings, diff --git a/posthog/clickhouse/schema.py b/posthog/clickhouse/schema.py index 4cbd93fa231d0..e3f5044b7a419 100644 --- a/posthog/clickhouse/schema.py +++ b/posthog/clickhouse/schema.py @@ -95,6 +95,7 @@ DISTRIBUTED_RAW_SESSIONS_TABLE_SQL, WRITABLE_RAW_SESSIONS_TABLE_SQL, RAW_SESSIONS_TABLE_MV_SQL, + RAW_SESSIONS_VIEW_SQL, ) from posthog.models.sessions.sql import ( SESSIONS_TABLE_SQL, @@ -211,7 +212,7 @@ CREATE_DATA_QUERIES = (CHANNEL_DEFINITION_DATA_SQL(),) -CREATE_VIEW_QUERIES = (SESSIONS_VIEW_SQL,) +CREATE_VIEW_QUERIES = (SESSIONS_VIEW_SQL, RAW_SESSIONS_VIEW_SQL) build_query = lambda query: query if isinstance(query, str) else query() get_table_name = lambda query: re.findall(r"[\.\s]`?([a-z0-9_]+)`?\s+ON CLUSTER", build_query(query))[0] diff --git a/posthog/clickhouse/test/test_raw_sessions_model.py b/posthog/clickhouse/test/test_raw_sessions_model.py index 4bac5cfcd3f86..b259cedd28fdd 100644 --- a/posthog/clickhouse/test/test_raw_sessions_model.py +++ b/posthog/clickhouse/test/test_raw_sessions_model.py @@ -1,3 +1,4 @@ +from posthog.clickhouse.client import query_with_columns from posthog.clickhouse.client import sync_execute from posthog.models.raw_sessions.sql import RAW_SESSION_TABLE_BACKFILL_SELECT_SQL from posthog.models.utils import uuid7 @@ -7,11 +8,298 @@ BaseTest, ) +distinct_id_counter = 0 +session_id_counter = 0 + + +def create_distinct_id(): + global distinct_id_counter + distinct_id_counter += 1 + return f"d{distinct_id_counter}" + + +def create_session_id(): + global session_id_counter + session_id_counter += 1 + return str(uuid7(random=session_id_counter)) + class TestRawSessionsModel(ClickhouseTestMixin, BaseTest): + def select_by_session_id(self, session_id): + return query_with_columns( + """ + select + * + from raw_sessions_v + where + session_id_v7 = toUInt128(toUUID(%(session_id)s)) AND + team_id = %(team_id)s + """, + { + "session_id": session_id, + "team_id": self.team.id, + }, + ) + + def test_it_creates_session_when_creating_event(self): + distinct_id = create_distinct_id() + session_id = create_session_id() + _create_event( + team=self.team, + event="$pageview", + distinct_id=distinct_id, + properties={"$current_url": "/", "$session_id": session_id}, + timestamp="2024-03-08", + ) + + response = sync_execute( + """ + select + session_id_v7, + team_id + from raw_sessions_v + where + session_id_v7 = toUInt128(toUUID(%(session_id)s)) AND + team_id = %(team_id)s + """, + { + "session_id": session_id, + "team_id": self.team.id, + }, + ) + + self.assertEqual(len(response), 1) + + def test_handles_different_distinct_id_across_same_session(self): + distinct_id1 = create_distinct_id() + distinct_id2 = create_distinct_id() + session_id = create_session_id() + + _create_event( + team=self.team, + event="$pageview", + distinct_id=distinct_id1, + properties={"$session_id": session_id}, + timestamp="2024-03-08", + ) + _create_event( + team=self.team, + event="$pageview", + distinct_id=distinct_id2, + properties={"$session_id": session_id}, + timestamp="2024-03-08", + ) + + responses = self.select_by_session_id(session_id) + self.assertEqual(len(responses), 1) + self.assertIn(responses[0]["distinct_id"], {distinct_id1, distinct_id2}) + self.assertEqual(responses[0]["pageview_count"], 2) + + def test_handles_entry_and_exit_urls(self): + distinct_id = create_distinct_id() + session_id = create_session_id() + + _create_event( + team=self.team, + event="$pageview", + distinct_id=distinct_id, + properties={"$current_url": "/entry", "$session_id": session_id}, + timestamp="2024-03-08:01", + ) + _create_event( + team=self.team, + event="$pageview", + distinct_id=distinct_id, + properties={"$current_url": "/middle", "$session_id": session_id}, + timestamp="2024-03-08:02", + ) + _create_event( + team=self.team, + event="$pageview", + distinct_id=distinct_id, + properties={"$current_url": "/middle", "$session_id": session_id}, + timestamp="2024-03-08:03", + ) + _create_event( + team=self.team, + event="$pageview", + distinct_id=distinct_id, + properties={"$current_url": "/exit", "$session_id": session_id}, + timestamp="2024-03-08:04", + ) + + responses = self.select_by_session_id(session_id) + self.assertEqual(len(responses), 1) + self.assertEqual(responses[0]["entry_url"], "/entry") + self.assertEqual(responses[0]["end_url"], "/exit") + self.assertEqual(len(responses[0]["urls"]), 3) + self.assertEqual(set(responses[0]["urls"]), {"/entry", "/middle", "/exit"}) # order is not guaranteed + + def test_handles_initial_utm_properties(self): + distinct_id = create_distinct_id() + session_id = create_session_id() + + _create_event( + team=self.team, + event="$pageview", + distinct_id=distinct_id, + properties={"$session_id": session_id, "utm_source": "source"}, + timestamp="2024-03-08", + ) + _create_event( + team=self.team, + event="$pageview", + distinct_id=distinct_id, + properties={"$session_id": session_id, "utm_source": "other_source"}, + timestamp="2024-03-08", + ) + + responses = self.select_by_session_id(session_id) + self.assertEqual(len(responses), 1) + self.assertEqual(responses[0]["initial_utm_source"], "source") + + def test_counts_pageviews_autocaptures_and_events(self): + distinct_id = create_distinct_id() + session_id = create_session_id() + + _create_event( + team=self.team, + event="$pageview", + distinct_id=distinct_id, + properties={"$session_id": session_id}, + timestamp="2024-03-08", + ) + _create_event( + team=self.team, + event="$autocapture", + distinct_id=distinct_id, + properties={"$session_id": session_id}, + timestamp="2024-03-08", + ) + _create_event( + team=self.team, + event="$autocapture", + distinct_id=distinct_id, + properties={"$session_id": session_id}, + timestamp="2024-03-08", + ) + _create_event( + team=self.team, + event="other event", + distinct_id=distinct_id, + properties={"$session_id": session_id}, + timestamp="2024-03-08", + ) + _create_event( + team=self.team, + event="$pageleave", + distinct_id=distinct_id, + properties={"$session_id": session_id}, + timestamp="2024-03-08", + ) + + responses = self.select_by_session_id(session_id) + self.assertEqual(len(responses), 1) + self.assertEqual(responses[0]["pageview_count"], 1) + self.assertEqual(responses[0]["autocapture_count"], 2) + + def test_separates_sessions_across_same_user(self): + distinct_id = create_distinct_id() + session_id1 = create_session_id() + session_id2 = create_session_id() + session_id3 = create_session_id() + + _create_event( + team=self.team, + event="$pageview", + distinct_id=distinct_id, + properties={"$session_id": session_id1}, + timestamp="2024-03-08", + ) + _create_event( + team=self.team, + event="$pageview", + distinct_id=distinct_id, + properties={"$session_id": session_id2}, + timestamp="2024-03-08", + ) + + responses = self.select_by_session_id(session_id1) + self.assertEqual(len(responses), 1) + responses = self.select_by_session_id(session_id2) + self.assertEqual(len(responses), 1) + responses = self.select_by_session_id(session_id3) + self.assertEqual(len(responses), 0) + + def test_select_from_sessions(self): + # just make sure that we can select from the sessions table without error + distinct_id = create_distinct_id() + session_id = create_session_id() + _create_event( + team=self.team, + event="$pageview", + distinct_id=distinct_id, + properties={"$session_id": session_id}, + timestamp="2024-03-08", + ) + + # we can't include all the columns as this clickhouse driver doesn't support selecting states + responses = sync_execute( + """ + SELECT + session_id_v7, + team_id, + min_timestamp, + max_timestamp, + urls, + pageview_count, + autocapture_count + FROM raw_sessions + WHERE session_id_v7 = toUInt128(toUUID(%(session_id)s)) AND team_id = %(team_id)s + """, + { + "session_id": session_id, + "team_id": self.team.id, + }, + ) + self.assertEqual(len(responses), 1) + + def test_select_from_sessions_mv(self): + # just make sure that we can select from the sessions mv without error + distinct_id = create_distinct_id() + session_id = create_session_id() + _create_event( + team=self.team, + event="$pageview", + distinct_id=distinct_id, + properties={"$session_id": session_id}, + timestamp="2024-03-08", + ) + + # we can't include all the columns as this clickhouse driver doesn't support selecting states + responses = sync_execute( + """ + SELECT + session_id_v7, + team_id, + min_timestamp, + max_timestamp, + urls, + pageview_count, + autocapture_count + FROM raw_sessions_mv + WHERE session_id_v7 = toUInt128(toUUID(%(session_id)s)) AND team_id = %(team_id)s + """, + { + "session_id": session_id, + "team_id": self.team.id, + }, + ) + self.assertEqual(len(responses), 1) + def test_backfill_sql(self): - distinct_id = str(uuid7()) - session_id = str(uuid7()) + distinct_id = create_distinct_id() + session_id = create_session_id() _create_event( team=self.team, event="$pageview", diff --git a/posthog/demo/products/hedgebox/matrix.py b/posthog/demo/products/hedgebox/matrix.py index bbfc48c95fe52..0a106d0866313 100644 --- a/posthog/demo/products/hedgebox/matrix.py +++ b/posthog/demo/products/hedgebox/matrix.py @@ -120,6 +120,20 @@ def set_project_up(self, team, user): }, ], ) + Action.objects.create( + name="Visited Marius Tech Tips", + team=team, + description="Visited the best page for tech tips on the internet", + created_by=user, + steps_json=[ + { + "event": "$pageview", + "url": "mariustechtips", + "url_matching": "regex", + } + ], + pinned_at=self.now - dt.timedelta(days=3), + ) # Cohorts Cohort.objects.create( diff --git a/posthog/demo/products/hedgebox/models.py b/posthog/demo/products/hedgebox/models.py index 9b0c72afc69a7..57816af26702c 100644 --- a/posthog/demo/products/hedgebox/models.py +++ b/posthog/demo/products/hedgebox/models.py @@ -13,7 +13,7 @@ import pytz -from posthog.demo.matrix.models import Effect, SimPerson, SimSessionIntent +from posthog.demo.matrix.models import Effect, SimPerson, SimSessionIntent, EVENT_AUTOCAPTURE from .taxonomy import ( EVENT_SIGNED_UP, EVENT_LOGGED_IN, @@ -43,6 +43,8 @@ GROUP_TYPE_ACCOUNT, dyn_url_file, dyn_url_invite, + URL_PRODUCT_AD_LINK_1, + URL_PRODUCT_AD_LINK_2, ) if TYPE_CHECKING: @@ -64,6 +66,7 @@ class HedgeboxSessionIntent(SimSessionIntent): JOIN_TEAM = auto() UPGRADE_PLAN = auto() DOWNGRADE_PLAN = auto() + CHECK_LINKED_PR = auto() class HedgeboxPlan(StrEnum): @@ -355,6 +358,10 @@ def simulate_session(self): {"$referrer": "$direct" if entered_url_directly else "https://www.youtube.com/"} ) self.go_to_marius_tech_tips(None if entered_url_directly else {"utm_source": "youtube"}) + if self.cluster.random.random() < 0.2: + self.click_product_ad_1() + elif self.cluster.random.random() < 0.5: + self.click_product_ad_2() elif self.active_session_intent in ( HedgeboxSessionIntent.UPLOAD_FILE_S, HedgeboxSessionIntent.DELETE_FILE_S, @@ -810,6 +817,16 @@ def invitable_neighbors(self) -> list["HedgeboxPerson"]: if neighbor.is_invitable ] + def click_product_ad_1(self): + self.active_client.capture( + EVENT_AUTOCAPTURE, {"$event_type": "click", "$external_click_url": URL_PRODUCT_AD_LINK_1} + ) + + def click_product_ad_2(self): + self.active_client.capture( + EVENT_AUTOCAPTURE, {"$event_type": "click", "$external_click_url": URL_PRODUCT_AD_LINK_2} + ) + def add_params_to_url(url, query_params): if not query_params: diff --git a/posthog/demo/products/hedgebox/taxonomy.py b/posthog/demo/products/hedgebox/taxonomy.py index 8f1b8d36e9073..51ffa25e2dc83 100644 --- a/posthog/demo/products/hedgebox/taxonomy.py +++ b/posthog/demo/products/hedgebox/taxonomy.py @@ -17,6 +17,9 @@ URL_ACCOUNT_BILLING = f"{SITE_URL}/account/billing/" URL_ACCOUNT_TEAM = f"{SITE_URL}/account/team/" +URL_PRODUCT_AD_LINK_1 = f"https://shop.example.com/products/10ft-hedgehog-statue?utm_source=hedgebox&utm_medium=paid" +URL_PRODUCT_AD_LINK_2 = f"https://travel.example.com/cruise/hedge-watching?utm_source=hedgebox&utm_medium=paid" + # Event taxonomy EVENT_SIGNED_UP = "signed_up" # Properties: from_invite diff --git a/posthog/hogql/database/schema/sessions_v2.py b/posthog/hogql/database/schema/sessions_v2.py index 81af68a06f130..6f3ce13974811 100644 --- a/posthog/hogql/database/schema/sessions_v2.py +++ b/posthog/hogql/database/schema/sessions_v2.py @@ -463,6 +463,7 @@ def is_match(field_name: str) -> bool: "$entry_pathname": "path(finalizeAggregation(entry_url))", "$end_current_url": "finalizeAggregation(end_url)", "$end_pathname": "path(finalizeAggregation(end_url))", + "$last_external_click_url": "finalizeAggregation(last_external_click_url)", } diff --git a/posthog/hogql_queries/query_runner.py b/posthog/hogql_queries/query_runner.py index 8edc5449c1583..824e2e1e91177 100644 --- a/posthog/hogql_queries/query_runner.py +++ b/posthog/hogql_queries/query_runner.py @@ -52,6 +52,7 @@ GenericCachedQueryResponse, QueryStatus, SessionAttributionExplorerQuery, + WebGoalsQuery, ) from posthog.schema_helpers import to_dict, to_json from posthog.utils import generate_cache_key, get_from_dict_or_attr @@ -136,6 +137,7 @@ def shared_insights_execution_mode(execution_mode: ExecutionMode) -> ExecutionMo WebOverviewQuery, WebStatsTableQuery, WebTopClicksQuery, + WebGoalsQuery, SessionAttributionExplorerQuery, ] @@ -317,6 +319,17 @@ def get_query_runner( limit_context=limit_context, ) + if kind == "WebGoalsQuery": + from .web_analytics.web_goals import WebGoalsQueryRunner + + return WebGoalsQueryRunner( + query=query, + team=team, + timings=timings, + modifiers=modifiers, + limit_context=limit_context, + ) + if kind == "SessionAttributionExplorerQuery": from .web_analytics.session_attribution_explorer_query_runner import SessionAttributionExplorerQueryRunner diff --git a/posthog/hogql_queries/web_analytics/stats_table.py b/posthog/hogql_queries/web_analytics/stats_table.py index 889cceab5e22b..882536812f1e9 100644 --- a/posthog/hogql_queries/web_analytics/stats_table.py +++ b/posthog/hogql_queries/web_analytics/stats_table.py @@ -58,11 +58,11 @@ def to_main_query(self) -> ast.SelectQuery: """ SELECT breakdown_value AS "context.columns.breakdown_value", - uniq(person_id) AS "context.columns.visitors", + uniq(filtered_person_id) AS "context.columns.visitors", sum(filtered_pageview_count) AS "context.columns.views" FROM ( SELECT - any(person_id) AS person_id, + any(person_id) AS filtered_person_id, count() AS filtered_pageview_count, {breakdown_value} AS breakdown_value FROM events @@ -97,11 +97,11 @@ def _to_main_query_with_session_properties(self) -> ast.SelectQuery: """ SELECT breakdown_value AS "context.columns.breakdown_value", - uniq(person_id) AS "context.columns.visitors", + uniq(filtered_person_id) AS "context.columns.visitors", sum(filtered_pageview_count) AS "context.columns.views" FROM ( SELECT - any(person_id) AS person_id, + any(person_id) AS filtered_person_id, count() AS filtered_pageview_count, {breakdown_value} AS breakdown_value, session.session_id AS session_id @@ -139,12 +139,12 @@ def to_entry_bounce_query(self) -> ast.SelectQuery: """ SELECT breakdown_value AS "context.columns.breakdown_value", - uniq(person_id) AS "context.columns.visitors", + uniq(filtered_person_id) AS "context.columns.visitors", sum(filtered_pageview_count) AS "context.columns.views", avg(is_bounce) AS "context.columns.bounce_rate" FROM ( SELECT - any(person_id) AS person_id, + any(person_id) AS filtered_person_id, count() AS filtered_pageview_count, {bounce_breakdown} AS breakdown_value, any(session.$is_bounce) AS is_bounce, @@ -194,11 +194,11 @@ def to_path_scroll_bounce_query(self) -> ast.SelectQuery: FROM ( SELECT breakdown_value, - uniq(person_id) AS visitors, + uniq(filtered_person_id) AS visitors, sum(filtered_pageview_count) AS views FROM ( SELECT - any(person_id) AS person_id, + any(person_id) AS filtered_person_id, count() AS filtered_pageview_count, {breakdown_value} AS breakdown_value, session.session_id AS session_id @@ -301,11 +301,11 @@ def to_path_bounce_query(self) -> ast.SelectQuery: FROM ( SELECT breakdown_value, - uniq(person_id) AS visitors, + uniq(filtered_person_id) AS visitors, sum(filtered_pageview_count) AS views FROM ( SELECT - any(person_id) AS person_id, + any(person_id) AS filtered_person_id, count() AS filtered_pageview_count, {breakdown_value} AS breakdown_value, session.session_id AS session_id @@ -418,9 +418,10 @@ def _date_from(self) -> ast.Expr: return self.query_date_range.date_from_as_hogql() def calculate(self): + query = self.to_query() response = self.paginator.execute_hogql_query( query_type="stats_table_query", - query=self.to_query(), + query=query, team=self.team, timings=self.timings, modifiers=self.modifiers, @@ -454,7 +455,9 @@ def _counts_breakdown_value(self): case WebStatsBreakdown.INITIAL_PAGE: return self._apply_path_cleaning(ast.Field(chain=["session", "$entry_pathname"])) case WebStatsBreakdown.EXIT_PAGE: - return self._apply_path_cleaning(ast.Field(chain=["session", "$exit_pathname"])) + return self._apply_path_cleaning(ast.Field(chain=["session", "$end_pathname"])) + case WebStatsBreakdown.EXIT_CLICK: + return ast.Field(chain=["session", "$last_external_click_url"]) case WebStatsBreakdown.INITIAL_REFERRING_DOMAIN: return ast.Field(chain=["session", "$entry_referring_domain"]) case WebStatsBreakdown.INITIAL_UTM_SOURCE: diff --git a/posthog/hogql_queries/web_analytics/test/test_web_goals.py b/posthog/hogql_queries/web_analytics/test/test_web_goals.py new file mode 100644 index 0000000000000..1a3e3d308a010 --- /dev/null +++ b/posthog/hogql_queries/web_analytics/test/test_web_goals.py @@ -0,0 +1,216 @@ +from datetime import datetime +from typing import Optional + +from freezegun import freeze_time + +from posthog.hogql_queries.web_analytics.web_goals import WebGoalsQueryRunner +from posthog.models import Action, Person, Element +from posthog.models.utils import uuid7 +from posthog.schema import ( + DateRange, + SessionTableVersion, + HogQLQueryModifiers, + WebGoalsQuery, +) +from posthog.test.base import ( + APIBaseTest, + ClickhouseTestMixin, + _create_event, + _create_person, +) + + +class TestWebGoalsQueryRunner(ClickhouseTestMixin, APIBaseTest): + def _create_events(self, data, event="$pageview"): + person_result = [] + for id, timestamps in data: + with freeze_time(timestamps[0][0]): + person_result.append( + _create_person( + team_id=self.team.pk, + distinct_ids=[id], + properties={ + "name": id, + **({"email": "test@posthog.com"} if id == "test" else {}), + }, + ) + ) + for timestamp, session_id, pathname in timestamps: + _create_event( + team=self.team, + event=event, + distinct_id=id, + timestamp=timestamp, + properties={"$session_id": session_id, "$pathname": pathname}, + ) + return person_result + + def _create_person(self): + distinct_id = str(uuid7()) + return _create_person( + uuid=distinct_id, + team_id=self.team.pk, + distinct_ids=[distinct_id], + properties={ + "name": distinct_id, + **({"email": "test@posthog.com"} if distinct_id == "test" else {}), + }, + ) + + def _visit_web_analytics(self, person: Person, session_id: Optional[str] = None): + _create_event( + team=self.team, + event="$pageview", + distinct_id=person.uuid, + properties={ + "$pathname": "/project/2/web", + "$current_url": "https://us.posthog.com/project/2/web", + "$session_id": session_id or person.uuid, + }, + ) + + def _click_pay(self, person: Person, session_id: Optional[str] = None): + _create_event( + team=self.team, + event="$autocapture", + distinct_id=person.uuid, + elements=[Element(nth_of_type=1, nth_child=0, tag_name="button", text="Pay $10")], + properties={"$session_id": session_id or person.uuid}, + ) + + def _create_actions(self): + Action.objects.create( + team=self.team, + name="Clicked Pay", + steps_json=[ + { + "event": "$autocapture", + "tag_name": "button", + "text": "Pay $10", + } + ], + ) + Action.objects.create( + team=self.team, + name="Contacted Sales", + steps_json=[ + { + "event": "$autocapture", + "tag_name": "button", + "text": "Contacted Sales", + } + ], + pinned_at=datetime.now(), + ) + Action.objects.create( + team=self.team, + name="Visited Web Analytics", + steps_json=[ + { + "event": "$pageview", + "url": "https://(app|eu|us)\\.posthog\\.com/project/\\d+/web.*", + "url_matching": "regex", + } + ], + ) + + def _run_web_goals_query( + self, + date_from, + date_to, + limit=None, + path_cleaning_filters=None, + properties=None, + session_table_version: SessionTableVersion = SessionTableVersion.V2, + filter_test_accounts: Optional[bool] = False, + ): + modifiers = HogQLQueryModifiers(sessionTableVersion=session_table_version) + query = WebGoalsQuery( + dateRange=DateRange(date_from=date_from, date_to=date_to), + properties=properties or [], + limit=limit, + filterTestAccounts=filter_test_accounts, + ) + self.team.path_cleaning_filters = path_cleaning_filters or [] + runner = WebGoalsQueryRunner(team=self.team, query=query, modifiers=modifiers) + return runner.calculate() + + def test_no_crash_when_no_data_or_actions(self): + results = self._run_web_goals_query("all", None).results + assert results == [] + + def test_no_crash_when_no_data_and_some_actions(self): + self._create_actions() + results = self._run_web_goals_query("all", None).results + assert results == [ + ["Contacted Sales", 0, 0, None], + ["Visited Web Analytics", 0, 0, None], + ["Clicked Pay", 0, 0, None], + ] + + def test_one_user_one_action(self): + self._create_actions() + p1 = self._create_person() + self._visit_web_analytics(p1) + results = self._run_web_goals_query("all", None).results + assert results == [["Contacted Sales", 0, 0, 0], ["Visited Web Analytics", 1, 1, 1], ["Clicked Pay", 0, 0, 0]] + + def test_one_user_two_similar_actions_across_sessions(self): + self._create_actions() + p1 = self._create_person() + self._visit_web_analytics(p1) + s2 = str(uuid7()) + self._visit_web_analytics(p1, s2) + results = self._run_web_goals_query("all", None).results + assert results == [["Contacted Sales", 0, 0, 0], ["Visited Web Analytics", 1, 2, 1], ["Clicked Pay", 0, 0, 0]] + + def test_one_user_two_different_actions(self): + self._create_actions() + p1 = self._create_person() + self._visit_web_analytics(p1) + self._click_pay(p1) + results = self._run_web_goals_query("all", None).results + assert results == [["Contacted Sales", 0, 0, 0], ["Visited Web Analytics", 1, 1, 1], ["Clicked Pay", 1, 1, 1]] + + def test_one_users_one_action_each(self): + self._create_actions() + p1 = self._create_person() + p2 = self._create_person() + self._visit_web_analytics(p1) + self._click_pay(p2) + results = self._run_web_goals_query("all", None).results + assert results == [ + ["Contacted Sales", 0, 0, 0], + ["Visited Web Analytics", 1, 1, 0.5], + ["Clicked Pay", 1, 1, 0.5], + ] + + def test_many_users_and_actions(self): + self._create_actions() + # create some users who visited web analytics + for _ in range(8): + p = self._create_person() + self._visit_web_analytics(p) + # create some users who clicked pay + for _ in range(4): + p = self._create_person() + self._click_pay(p) + # create some users who did both + for _ in range(2): + p = self._create_person() + self._visit_web_analytics(p) + self._click_pay(p) + # create one user who did both twice + for _ in range(1): + p = self._create_person() + self._visit_web_analytics(p) + self._visit_web_analytics(p) + self._click_pay(p) + self._click_pay(p) + + results = self._run_web_goals_query("all", None).results + assert results == [ + ["Contacted Sales", 0, 0, 0], + ["Visited Web Analytics", 11, 12, 11 / 15], + ["Clicked Pay", 7, 8, 7 / 15], + ] diff --git a/posthog/hogql_queries/web_analytics/test/test_web_stats_table.py b/posthog/hogql_queries/web_analytics/test/test_web_stats_table.py index 953e94319de4f..10ce9ab1ebcb4 100644 --- a/posthog/hogql_queries/web_analytics/test/test_web_stats_table.py +++ b/posthog/hogql_queries/web_analytics/test/test_web_stats_table.py @@ -4,6 +4,7 @@ from parameterized import parameterized from posthog.hogql_queries.web_analytics.stats_table import WebStatsTableQueryRunner +from posthog.models import Cohort from posthog.models.utils import uuid7 from posthog.schema import ( DateRange, @@ -19,6 +20,7 @@ ClickhouseTestMixin, _create_event, _create_person, + flush_persons_and_events, ) @@ -990,3 +992,65 @@ def test_no_session_id(self): breakdown_by=WebStatsBreakdown.PAGE, ).results assert [["/path", 1, 1]] == results + + def test_cohort_test_filters(self): + d1 = "d1" + s1 = str(uuid7("2024-07-30")) + d2 = "d2" + s2 = str(uuid7("2024-07-30")) + _create_person( + team_id=self.team.pk, + distinct_ids=[d1], + properties={"name": d1, "email": "test@example.com"}, + ) + _create_event( + team=self.team, + event="$pageview", + distinct_id=d1, + timestamp="2024-07-30", + properties={"$session_id": s1, "$pathname": "/path1"}, + ) + _create_person( + team_id=self.team.pk, + distinct_ids=[d2], + properties={"name": d2, "email": "d2@hedgebox.net"}, + ) + _create_event( + team=self.team, + event="$pageview", + distinct_id=d2, + timestamp="2024-07-30", + properties={"$session_id": s2, "$pathname": "/path2"}, + ) + + real_users_cohort = Cohort.objects.create( + team=self.team, + name="Real persons", + description="People who don't belong to the Hedgebox team.", + groups=[ + { + "properties": [ + { + "key": "email", + "type": "person", + "value": "@hedgebox.net$", + "operator": "not_regex", + } + ] + } + ], + ) + self.team.test_account_filters = [{"key": "id", "type": "cohort", "value": real_users_cohort.pk}] + + flush_persons_and_events() + real_users_cohort.calculate_people_ch(pending_version=0) + + # Test that the cohort filter works + results = self._run_web_stats_table_query( + "all", + None, + filter_test_accounts=True, + breakdown_by=WebStatsBreakdown.PAGE, + ).results + + assert results == [["/path1", 1, 1]] diff --git a/posthog/hogql_queries/web_analytics/web_analytics_query_runner.py b/posthog/hogql_queries/web_analytics/web_analytics_query_runner.py index 75a3d8cc4e820..7f0f2790fd5b2 100644 --- a/posthog/hogql_queries/web_analytics/web_analytics_query_runner.py +++ b/posthog/hogql_queries/web_analytics/web_analytics_query_runner.py @@ -24,10 +24,11 @@ PersonPropertyFilter, SamplingRate, SessionPropertyFilter, + WebGoalsQuery, ) from posthog.utils import generate_cache_key, get_safe_cache -WebQueryNode = Union[WebOverviewQuery, WebTopClicksQuery, WebStatsTableQuery] +WebQueryNode = Union[WebOverviewQuery, WebTopClicksQuery, WebStatsTableQuery, WebGoalsQuery] class WebAnalyticsQueryRunner(QueryRunner, ABC): diff --git a/posthog/hogql_queries/web_analytics/web_goals.py b/posthog/hogql_queries/web_analytics/web_goals.py new file mode 100644 index 0000000000000..2b14c6236f6f8 --- /dev/null +++ b/posthog/hogql_queries/web_analytics/web_goals.py @@ -0,0 +1,204 @@ +from typing import Optional + +from datetime import datetime + +from posthog.hogql import ast +from posthog.hogql.parser import parse_select +from posthog.hogql.property import property_to_expr, get_property_type, action_to_expr +from posthog.hogql.query import execute_hogql_query +from posthog.hogql_queries.utils.query_date_range import QueryDateRange +from posthog.hogql_queries.web_analytics.web_analytics_query_runner import ( + WebAnalyticsQueryRunner, +) +from posthog.models import Action +from posthog.models.filters.mixins.utils import cached_property +from posthog.schema import WebGoalsQueryResponse, WebGoalsQuery, CachedWebGoalsQueryResponse + + +class NoActionsError(Exception): + pass + + +class WebGoalsQueryRunner(WebAnalyticsQueryRunner): + query: WebGoalsQuery + response: WebGoalsQueryResponse + cached_response: CachedWebGoalsQueryResponse + + def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: + with self.timings.measure("date_expr"): + start = self.query_date_range.date_from_as_hogql() + end = self.query_date_range.date_to_as_hogql() + + actions = Action.objects.filter(team=self.team).order_by("pinned_at", "-last_calculated_at")[:5] + if not actions: + raise NoActionsError("No actions found") + + inner_aliases: list[ast.Expr] = [] + outer_aliases: list[ast.Expr] = [] + action_exprs: list[ast.Expr] = [] + for n, action in enumerate(actions): + expr = action_to_expr(action) + action_exprs.append(expr) + inner_aliases.append(ast.Alias(alias=f"action_count_{n}", expr=ast.Call(name="countIf", args=[expr]))) + inner_aliases.append( + ast.Alias( + alias=f"action_person_id_{n}", + expr=ast.Call( + name="if", + args=[ + ast.CompareOperation( + op=ast.CompareOperationOp.Gt, + left=ast.Field(chain=[f"action_count_{n}"]), + right=ast.Constant(value=0), + ), + ast.Field(chain=["person_id"]), + ast.Constant(value=None), + ], + ), + ) + ) + outer_aliases.append(ast.Alias(alias=f"action_name_{n}", expr=ast.Constant(value=action.name))) + outer_aliases.append( + ast.Alias( + alias=f"action_total_{n}", expr=ast.Call(name="sum", args=[ast.Field(chain=[f"action_count_{n}"])]) + ), + ) + outer_aliases.append( + ast.Alias( + alias=f"action_uniques_{n}", + expr=ast.Call(name="uniq", args=[ast.Field(chain=[f"action_person_id_{n}"])]), + ), + ) + + inner_select = parse_select( + """ +SELECT + any(events.person_id) as person_id +FROM events +WHERE and( + events.`$session_id` IS NOT NULL, + event = '$pageview' OR {action_where}, + timestamp >= {start}, + timestamp < {end}, + {event_properties}, + {session_properties} +) +GROUP BY events.`$session_id` + """, + placeholders={ + "start": start, + "end": end, + "event_properties": self.event_properties(), + "session_properties": self.session_properties(), + "action_where": ast.Or(exprs=action_exprs), + }, + ) + assert isinstance(inner_select, ast.SelectQuery) + for alias in inner_aliases: + inner_select.select.append(alias) + + outer_select = parse_select( + """ +SELECT + uniq(person_id) as total_people +FROM {inner_select} + """, + placeholders={ + "inner_select": inner_select, + }, + ) + assert isinstance(outer_select, ast.SelectQuery) + for alias in outer_aliases: + outer_select.select.append(alias) + + return outer_select + + def calculate(self): + try: + query = self.to_query() + except NoActionsError: + return WebGoalsQueryResponse(results=[], samplingRate=self._sample_rate, modifiers=self.modifiers) + + response = execute_hogql_query( + query_type="web_goals_query", + query=query, + team=self.team, + timings=self.timings, + modifiers=self.modifiers, + limit_context=self.limit_context, + ) + assert response.results + + row = response.results[0] + num_visitors = row[0] + num_actions = (len(row) - 1) // 3 + + results = [] + for i in range(num_actions): + action_name = row[(i * 3) + 1] + action_total = row[(i * 3) + 2] + action_unique = row[(i * 3) + 3] + action_rate = action_unique / num_visitors if num_visitors else None + results.append([action_name, action_unique, action_total, action_rate]) + + return WebGoalsQueryResponse( + columns=[ + "context.columns.action_name", + "context.columns.converting_users", + "context.columns.total_conversions", + "context.columns.conversion_rate", + ], + results=results, + samplingRate=self._sample_rate, + modifiers=self.modifiers, + ) + + @cached_property + def query_date_range(self): + return QueryDateRange( + date_range=self.query.dateRange, + team=self.team, + interval=None, + now=datetime.now(), + ) + + def all_properties(self) -> ast.Expr: + properties = self.query.properties + self._test_account_filters + return property_to_expr(properties, team=self.team) + + def event_properties(self) -> ast.Expr: + properties = [ + p for p in self.query.properties + self._test_account_filters if get_property_type(p) in ["event", "person"] + ] + return property_to_expr(properties, team=self.team, scope="event") + + def session_properties(self) -> ast.Expr: + properties = [ + p for p in self.query.properties + self._test_account_filters if get_property_type(p) == "session" + ] + return property_to_expr(properties, team=self.team, scope="event") + + +def to_data( + key: str, + kind: str, + value: Optional[float], + previous: Optional[float], + is_increase_bad: Optional[bool] = None, +) -> dict: + if kind == "percentage": + if value is not None: + value = value * 100 + if previous is not None: + previous = previous * 100 + + return { + "key": key, + "kind": kind, + "isIncreaseBad": is_increase_bad, + "value": value, + "previous": previous, + "changeFromPreviousPct": round(100 * (value - previous) / previous) + if value is not None and previous is not None and previous != 0 + else None, + } diff --git a/posthog/migrations/0464_action_pinned_at.py b/posthog/migrations/0464_action_pinned_at.py new file mode 100644 index 0000000000000..9666ace05ea71 --- /dev/null +++ b/posthog/migrations/0464_action_pinned_at.py @@ -0,0 +1,17 @@ +# Generated by Django 4.2.14 on 2024-08-12 17:24 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + dependencies = [ + ("posthog", "0463_datawarehousemodelpath_and_more"), + ] + + operations = [ + migrations.AddField( + model_name="action", + name="pinned_at", + field=models.DateTimeField(blank=True, default=None, null=True), + ), + ] diff --git a/posthog/migrations/0464_datawarehouse_stripe_account.py b/posthog/migrations/0465_datawarehouse_stripe_account.py similarity index 93% rename from posthog/migrations/0464_datawarehouse_stripe_account.py rename to posthog/migrations/0465_datawarehouse_stripe_account.py index 93eccf8811219..a28ec898fbf24 100644 --- a/posthog/migrations/0464_datawarehouse_stripe_account.py +++ b/posthog/migrations/0465_datawarehouse_stripe_account.py @@ -27,7 +27,7 @@ def reverse(apps, _): class Migration(migrations.Migration): dependencies = [ - ("posthog", "0463_datawarehousemodelpath_and_more"), + ("posthog", "0464_action_pinned_at"), ] operations = [ diff --git a/posthog/models/action/action.py b/posthog/models/action/action.py index 1b4d6767f461e..512a6c75d3cfb 100644 --- a/posthog/models/action/action.py +++ b/posthog/models/action/action.py @@ -43,6 +43,7 @@ class Action(models.Model): bytecode = models.JSONField(null=True, blank=True) bytecode_error = models.TextField(blank=True, null=True) steps_json = models.JSONField(null=True, blank=True) + pinned_at = models.DateTimeField(blank=True, null=True, default=None) # DEPRECATED: these were used before ClickHouse was our database is_calculating = models.BooleanField(default=False) @@ -71,6 +72,8 @@ def get_analytics_metadata(self): "match_url_count": sum(1 if step.url else 0 for step in self.steps), "has_properties": any(step.properties for step in self.steps), "deleted": self.deleted, + "pinned": bool(self.pinned_at), + "pinned_at": self.pinned_at, } @property diff --git a/posthog/schema.py b/posthog/schema.py index d76254fae84a0..557a1fb215f17 100644 --- a/posthog/schema.py +++ b/posthog/schema.py @@ -819,6 +819,7 @@ class NodeKind(StrEnum): WEB_OVERVIEW_QUERY = "WebOverviewQuery" WEB_TOP_CLICKS_QUERY = "WebTopClicksQuery" WEB_STATS_TABLE_QUERY = "WebStatsTableQuery" + WEB_GOALS_QUERY = "WebGoalsQuery" DATABASE_SCHEMA_QUERY = "DatabaseSchemaQuery" @@ -1384,6 +1385,41 @@ class VizSpecificOptions(BaseModel): RETENTION: Optional[RETENTION] = None +class Sampling(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + enabled: Optional[bool] = None + forceSamplingRate: Optional[SamplingRate] = None + + +class WebGoalsQueryResponse(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + columns: Optional[list] = None + error: Optional[str] = Field( + default=None, + description="Query error. Returned only if 'explain' or `modifiers.debug` is true. Throws an error otherwise.", + ) + hasMore: Optional[bool] = None + hogql: Optional[str] = Field(default=None, description="Generated HogQL query.") + limit: Optional[int] = None + modifiers: Optional[HogQLQueryModifiers] = Field( + default=None, description="Modifiers used when performing the query" + ) + offset: Optional[int] = None + query_status: Optional[QueryStatus] = Field( + default=None, description="Query status indicates whether next to the provided data, a query is still running." + ) + results: list + samplingRate: Optional[SamplingRate] = None + timings: Optional[list[QueryTiming]] = Field( + default=None, description="Measured timings for different parts of the query generation process" + ) + types: Optional[list] = None + + class Kind2(StrEnum): UNIT = "unit" DURATION_S = "duration_s" @@ -1402,14 +1438,6 @@ class WebOverviewItem(BaseModel): value: Optional[float] = None -class Sampling(BaseModel): - model_config = ConfigDict( - extra="forbid", - ) - enabled: Optional[bool] = None - forceSamplingRate: Optional[SamplingRate] = None - - class WebOverviewQueryResponse(BaseModel): model_config = ConfigDict( extra="forbid", @@ -1438,6 +1466,7 @@ class WebStatsBreakdown(StrEnum): PAGE = "Page" INITIAL_PAGE = "InitialPage" EXIT_PAGE = "ExitPage" + EXIT_CLICK = "ExitClick" INITIAL_CHANNEL_TYPE = "InitialChannelType" INITIAL_REFERRING_DOMAIN = "InitialReferringDomain" INITIAL_UTM_SOURCE = "InitialUTMSource" @@ -1943,6 +1972,42 @@ class CachedTrendsQueryResponse(BaseModel): ) +class CachedWebGoalsQueryResponse(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + cache_key: str + cache_target_age: Optional[AwareDatetime] = None + calculation_trigger: Optional[str] = Field( + default=None, description="What triggered the calculation of the query, leave empty if user/immediate" + ) + columns: Optional[list] = None + error: Optional[str] = Field( + default=None, + description="Query error. Returned only if 'explain' or `modifiers.debug` is true. Throws an error otherwise.", + ) + hasMore: Optional[bool] = None + hogql: Optional[str] = Field(default=None, description="Generated HogQL query.") + is_cached: bool + last_refresh: AwareDatetime + limit: Optional[int] = None + modifiers: Optional[HogQLQueryModifiers] = Field( + default=None, description="Modifiers used when performing the query" + ) + next_allowed_client_refresh: AwareDatetime + offset: Optional[int] = None + query_status: Optional[QueryStatus] = Field( + default=None, description="Query status indicates whether next to the provided data, a query is still running." + ) + results: list + samplingRate: Optional[SamplingRate] = None + timezone: str + timings: Optional[list[QueryTiming]] = Field( + default=None, description="Measured timings for different parts of the query generation process" + ) + types: Optional[list] = None + + class CachedWebOverviewQueryResponse(BaseModel): model_config = ConfigDict( extra="forbid", @@ -2223,7 +2288,8 @@ class Response6(BaseModel): query_status: Optional[QueryStatus] = Field( default=None, description="Query status indicates whether next to the provided data, a query is still running." ) - results: Any + results: list + samplingRate: Optional[SamplingRate] = None timings: Optional[list[QueryTiming]] = Field( default=None, description="Measured timings for different parts of the query generation process" ) @@ -2231,6 +2297,32 @@ class Response6(BaseModel): class Response7(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + columns: Optional[list] = None + error: Optional[str] = Field( + default=None, + description="Query error. Returned only if 'explain' or `modifiers.debug` is true. Throws an error otherwise.", + ) + hasMore: Optional[bool] = None + hogql: Optional[str] = Field(default=None, description="Generated HogQL query.") + limit: Optional[int] = None + modifiers: Optional[HogQLQueryModifiers] = Field( + default=None, description="Modifiers used when performing the query" + ) + offset: Optional[int] = None + query_status: Optional[QueryStatus] = Field( + default=None, description="Query status indicates whether next to the provided data, a query is still running." + ) + results: Any + timings: Optional[list[QueryTiming]] = Field( + default=None, description="Measured timings for different parts of the query generation process" + ) + types: Optional[list] = None + + +class Response8(BaseModel): model_config = ConfigDict( extra="forbid", ) @@ -2902,7 +2994,8 @@ class QueryResponseAlternative12(BaseModel): query_status: Optional[QueryStatus] = Field( default=None, description="Query status indicates whether next to the provided data, a query is still running." ) - results: Any + results: list + samplingRate: Optional[SamplingRate] = None timings: Optional[list[QueryTiming]] = Field( default=None, description="Measured timings for different parts of the query generation process" ) @@ -2910,6 +3003,32 @@ class QueryResponseAlternative12(BaseModel): class QueryResponseAlternative13(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + columns: Optional[list] = None + error: Optional[str] = Field( + default=None, + description="Query error. Returned only if 'explain' or `modifiers.debug` is true. Throws an error otherwise.", + ) + hasMore: Optional[bool] = None + hogql: Optional[str] = Field(default=None, description="Generated HogQL query.") + limit: Optional[int] = None + modifiers: Optional[HogQLQueryModifiers] = Field( + default=None, description="Modifiers used when performing the query" + ) + offset: Optional[int] = None + query_status: Optional[QueryStatus] = Field( + default=None, description="Query status indicates whether next to the provided data, a query is still running." + ) + results: Any + timings: Optional[list[QueryTiming]] = Field( + default=None, description="Measured timings for different parts of the query generation process" + ) + types: Optional[list] = None + + +class QueryResponseAlternative14(BaseModel): model_config = ConfigDict( extra="forbid", ) @@ -2934,7 +3053,7 @@ class QueryResponseAlternative13(BaseModel): ) -class QueryResponseAlternative14(BaseModel): +class QueryResponseAlternative15(BaseModel): model_config = ConfigDict( extra="forbid", ) @@ -2960,7 +3079,7 @@ class QueryResponseAlternative14(BaseModel): types: list[str] -class QueryResponseAlternative15(BaseModel): +class QueryResponseAlternative16(BaseModel): model_config = ConfigDict( extra="forbid", ) @@ -2987,7 +3106,7 @@ class QueryResponseAlternative15(BaseModel): types: list[str] -class QueryResponseAlternative16(BaseModel): +class QueryResponseAlternative17(BaseModel): model_config = ConfigDict( extra="forbid", ) @@ -3017,7 +3136,7 @@ class QueryResponseAlternative16(BaseModel): types: Optional[list] = Field(default=None, description="Types of returned columns") -class QueryResponseAlternative17(BaseModel): +class QueryResponseAlternative18(BaseModel): model_config = ConfigDict( extra="forbid", ) @@ -3041,7 +3160,7 @@ class QueryResponseAlternative17(BaseModel): ) -class QueryResponseAlternative18(BaseModel): +class QueryResponseAlternative19(BaseModel): model_config = ConfigDict( extra="forbid", ) @@ -3068,7 +3187,7 @@ class QueryResponseAlternative18(BaseModel): types: Optional[list] = None -class QueryResponseAlternative19(BaseModel): +class QueryResponseAlternative20(BaseModel): model_config = ConfigDict( extra="forbid", ) @@ -3092,7 +3211,34 @@ class QueryResponseAlternative19(BaseModel): types: Optional[list] = None -class QueryResponseAlternative20(BaseModel): +class QueryResponseAlternative21(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + columns: Optional[list] = None + error: Optional[str] = Field( + default=None, + description="Query error. Returned only if 'explain' or `modifiers.debug` is true. Throws an error otherwise.", + ) + hasMore: Optional[bool] = None + hogql: Optional[str] = Field(default=None, description="Generated HogQL query.") + limit: Optional[int] = None + modifiers: Optional[HogQLQueryModifiers] = Field( + default=None, description="Modifiers used when performing the query" + ) + offset: Optional[int] = None + query_status: Optional[QueryStatus] = Field( + default=None, description="Query status indicates whether next to the provided data, a query is still running." + ) + results: list + samplingRate: Optional[SamplingRate] = None + timings: Optional[list[QueryTiming]] = Field( + default=None, description="Measured timings for different parts of the query generation process" + ) + types: Optional[list] = None + + +class QueryResponseAlternative22(BaseModel): model_config = ConfigDict( extra="forbid", ) @@ -3118,7 +3264,7 @@ class QueryResponseAlternative20(BaseModel): types: Optional[list] = None -class QueryResponseAlternative21(BaseModel): +class QueryResponseAlternative23(BaseModel): model_config = ConfigDict( extra="forbid", ) @@ -3143,7 +3289,7 @@ class QueryResponseAlternative21(BaseModel): ) -class QueryResponseAlternative22(BaseModel): +class QueryResponseAlternative24(BaseModel): model_config = ConfigDict( extra="forbid", ) @@ -3165,7 +3311,7 @@ class QueryResponseAlternative22(BaseModel): ) -class QueryResponseAlternative23(BaseModel): +class QueryResponseAlternative25(BaseModel): model_config = ConfigDict( extra="forbid", ) @@ -3186,7 +3332,7 @@ class QueryResponseAlternative23(BaseModel): ) -class QueryResponseAlternative25(BaseModel): +class QueryResponseAlternative27(BaseModel): model_config = ConfigDict( extra="forbid", ) @@ -3207,7 +3353,7 @@ class QueryResponseAlternative25(BaseModel): ) -class QueryResponseAlternative28(BaseModel): +class QueryResponseAlternative30(BaseModel): model_config = ConfigDict( extra="forbid", ) @@ -3406,6 +3552,23 @@ class TableSettings(BaseModel): columns: Optional[list[ChartAxis]] = None +class WebGoalsQuery(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + dateRange: Optional[DateRange] = None + filterTestAccounts: Optional[bool] = None + kind: Literal["WebGoalsQuery"] = "WebGoalsQuery" + limit: Optional[int] = None + modifiers: Optional[HogQLQueryModifiers] = Field( + default=None, description="Modifiers used when performing the query" + ) + properties: list[Union[EventPropertyFilter, PersonPropertyFilter, SessionPropertyFilter]] + response: Optional[WebGoalsQueryResponse] = None + sampling: Optional[Sampling] = None + useSessionsTable: Optional[bool] = None + + class WebOverviewQuery(BaseModel): model_config = ConfigDict( extra="forbid", @@ -4189,7 +4352,7 @@ class PropertyGroupFilterValue(BaseModel): ] -class QueryResponseAlternative24(BaseModel): +class QueryResponseAlternative26(BaseModel): model_config = ConfigDict( extra="forbid", ) @@ -4888,7 +5051,7 @@ class LifecycleQuery(BaseModel): ) -class QueryResponseAlternative29(BaseModel): +class QueryResponseAlternative31(BaseModel): model_config = ConfigDict( extra="forbid", ) @@ -4920,8 +5083,8 @@ class QueryResponseAlternative( QueryResponseAlternative11, QueryResponseAlternative12, QueryResponseAlternative13, - Any, QueryResponseAlternative14, + Any, QueryResponseAlternative15, QueryResponseAlternative16, QueryResponseAlternative17, @@ -4933,8 +5096,10 @@ class QueryResponseAlternative( QueryResponseAlternative23, QueryResponseAlternative24, QueryResponseAlternative25, - QueryResponseAlternative28, - QueryResponseAlternative29, + QueryResponseAlternative26, + QueryResponseAlternative27, + QueryResponseAlternative30, + QueryResponseAlternative31, ] ] ): @@ -4953,8 +5118,8 @@ class QueryResponseAlternative( QueryResponseAlternative11, QueryResponseAlternative12, QueryResponseAlternative13, - Any, QueryResponseAlternative14, + Any, QueryResponseAlternative15, QueryResponseAlternative16, QueryResponseAlternative17, @@ -4966,8 +5131,10 @@ class QueryResponseAlternative( QueryResponseAlternative23, QueryResponseAlternative24, QueryResponseAlternative25, - QueryResponseAlternative28, - QueryResponseAlternative29, + QueryResponseAlternative26, + QueryResponseAlternative27, + QueryResponseAlternative30, + QueryResponseAlternative31, ] @@ -5248,7 +5415,18 @@ class DataTableNode(BaseModel): kind: Literal["DataTableNode"] = "DataTableNode" propertiesViaUrl: Optional[bool] = Field(default=None, description="Link properties via the URL (default: false)") response: Optional[ - Union[dict[str, Any], Response, Response1, Response2, Response3, Response4, Response5, Response6, Response7] + Union[ + dict[str, Any], + Response, + Response1, + Response2, + Response3, + Response4, + Response5, + Response6, + Response7, + Response8, + ] ] = None showActions: Optional[bool] = Field(default=None, description="Show the kebab menu at the end of the row") showColumnConfigurator: Optional[bool] = Field( @@ -5285,6 +5463,7 @@ class DataTableNode(BaseModel): WebOverviewQuery, WebStatsTableQuery, WebTopClicksQuery, + WebGoalsQuery, SessionAttributionExplorerQuery, ErrorTrackingQuery, ] = Field(..., description="Source of the events") @@ -5321,6 +5500,7 @@ class HogQLAutocomplete(BaseModel): WebOverviewQuery, WebStatsTableQuery, WebTopClicksQuery, + WebGoalsQuery, SessionAttributionExplorerQuery, ErrorTrackingQuery, ] @@ -5361,6 +5541,7 @@ class HogQLMetadata(BaseModel): WebOverviewQuery, WebStatsTableQuery, WebTopClicksQuery, + WebGoalsQuery, SessionAttributionExplorerQuery, ErrorTrackingQuery, ] @@ -5403,6 +5584,7 @@ class QueryRequest(BaseModel): WebOverviewQuery, WebStatsTableQuery, WebTopClicksQuery, + WebGoalsQuery, SessionAttributionExplorerQuery, ErrorTrackingQuery, DataVisualizationNode, @@ -5449,6 +5631,7 @@ class QuerySchemaRoot( WebOverviewQuery, WebStatsTableQuery, WebTopClicksQuery, + WebGoalsQuery, SessionAttributionExplorerQuery, ErrorTrackingQuery, DataVisualizationNode, @@ -5483,6 +5666,7 @@ class QuerySchemaRoot( WebOverviewQuery, WebStatsTableQuery, WebTopClicksQuery, + WebGoalsQuery, SessionAttributionExplorerQuery, ErrorTrackingQuery, DataVisualizationNode, diff --git a/posthog/session_recordings/queries/session_recording_list_from_filters.py b/posthog/session_recordings/queries/session_recording_list_from_filters.py index 0885d97a38b3c..3cba03d260500 100644 --- a/posthog/session_recordings/queries/session_recording_list_from_filters.py +++ b/posthog/session_recordings/queries/session_recording_list_from_filters.py @@ -208,12 +208,22 @@ def _where_predicates(self) -> Union[ast.And, ast.Or]: optional_exprs.append(property_to_expr(remaining_properties, team=self._team, scope="replay")) if self._filter.console_log_filters.values: - # print(self._filter.console_log_filters.type) console_logs_subquery = ast.SelectQuery( select=[ast.Field(chain=["log_source_id"])], select_from=ast.JoinExpr(table=ast.Field(chain=["console_logs_log_entries"])), - where=self._filter.ast_operand( - exprs=[property_to_expr(self._filter.console_log_filters, team=self._team)] + where=ast.And( + exprs=[ + self._filter.ast_operand( + exprs=[ + property_to_expr(self._filter.console_log_filters, team=self._team), + ] + ), + ast.CompareOperation( + op=ast.CompareOperationOp.Eq, + left=ast.Field(chain=["log_source"]), + right=ast.Constant(value="session_replay"), + ), + ] ), ) diff --git a/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_filters.ambr b/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_filters.ambr index bb30391e92f19..bcd56921df145 100644 --- a/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_filters.ambr +++ b/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_filters.ambr @@ -2505,10 +2505,10 @@ WHERE and(equals(s.team_id, 2), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-31 20:00:00.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-14 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-21 20:00:00.000000', 6, 'UTC')), 0), in(s.session_id, (SELECT console_logs_log_entries.log_source_id AS log_source_id FROM - (SELECT log_entries.log_source_id AS log_source_id, log_entries.level AS level, log_entries.message AS message + (SELECT log_entries.log_source_id AS log_source_id, log_entries.level AS level, log_entries.message AS message, log_entries.log_source AS log_source FROM log_entries WHERE and(equals(log_entries.team_id, 2), equals(log_entries.log_source, 'session_replay'))) AS console_logs_log_entries - WHERE or(or(ifNull(equals(console_logs_log_entries.level, 'warn'), 0), ifNull(equals(console_logs_log_entries.level, 'error'), 0)), ifNull(equals(console_logs_log_entries.message, 'message 4'), 0))))) + WHERE and(or(or(ifNull(equals(console_logs_log_entries.level, 'warn'), 0), ifNull(equals(console_logs_log_entries.level, 'error'), 0)), ifNull(equals(console_logs_log_entries.message, 'message 4'), 0)), ifNull(equals(console_logs_log_entries.log_source, 'session_replay'), 0))))) GROUP BY s.session_id HAVING 1 ORDER BY start_time DESC @@ -2543,10 +2543,10 @@ WHERE and(equals(s.team_id, 2), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-31 20:00:00.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-14 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-21 20:00:00.000000', 6, 'UTC')), 0), in(s.session_id, (SELECT console_logs_log_entries.log_source_id AS log_source_id FROM - (SELECT log_entries.log_source_id AS log_source_id, log_entries.level AS level, log_entries.message AS message + (SELECT log_entries.log_source_id AS log_source_id, log_entries.level AS level, log_entries.message AS message, log_entries.log_source AS log_source FROM log_entries WHERE and(equals(log_entries.team_id, 2), equals(log_entries.log_source, 'session_replay'))) AS console_logs_log_entries - WHERE and(or(ifNull(equals(console_logs_log_entries.level, 'warn'), 0), ifNull(equals(console_logs_log_entries.level, 'error'), 0)), ifNull(ilike(console_logs_log_entries.message, '%message 5%'), 0))))) + WHERE and(and(or(ifNull(equals(console_logs_log_entries.level, 'warn'), 0), ifNull(equals(console_logs_log_entries.level, 'error'), 0)), ifNull(ilike(console_logs_log_entries.message, '%message 5%'), 0)), ifNull(equals(console_logs_log_entries.log_source, 'session_replay'), 0))))) GROUP BY s.session_id HAVING 1 ORDER BY start_time DESC @@ -2581,10 +2581,10 @@ WHERE and(equals(s.team_id, 2), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-31 20:00:00.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-14 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-21 20:00:00.000000', 6, 'UTC')), 0), in(s.session_id, (SELECT console_logs_log_entries.log_source_id AS log_source_id FROM - (SELECT log_entries.log_source_id AS log_source_id, log_entries.level AS level, log_entries.message AS message + (SELECT log_entries.log_source_id AS log_source_id, log_entries.level AS level, log_entries.message AS message, log_entries.log_source AS log_source FROM log_entries WHERE and(equals(log_entries.team_id, 2), equals(log_entries.log_source, 'session_replay'))) AS console_logs_log_entries - WHERE and(ifNull(equals(console_logs_log_entries.level, 'info'), 0), ifNull(ilike(console_logs_log_entries.message, '%message 5%'), 0))))) + WHERE and(and(ifNull(equals(console_logs_log_entries.level, 'info'), 0), ifNull(ilike(console_logs_log_entries.message, '%message 5%'), 0)), ifNull(equals(console_logs_log_entries.log_source, 'session_replay'), 0))))) GROUP BY s.session_id HAVING 1 ORDER BY start_time DESC @@ -2683,10 +2683,10 @@ WHERE and(equals(s.team_id, 2), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-31 20:00:00.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-14 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-21 20:00:00.000000', 6, 'UTC')), 0), in(s.session_id, (SELECT console_logs_log_entries.log_source_id AS log_source_id FROM - (SELECT log_entries.log_source_id AS log_source_id, log_entries.level AS level + (SELECT log_entries.log_source_id AS log_source_id, log_entries.level AS level, log_entries.log_source AS log_source FROM log_entries WHERE and(equals(log_entries.team_id, 2), equals(log_entries.log_source, 'session_replay'))) AS console_logs_log_entries - WHERE ifNull(equals(console_logs_log_entries.level, 'error'), 0)))) + WHERE and(ifNull(equals(console_logs_log_entries.level, 'error'), 0), ifNull(equals(console_logs_log_entries.log_source, 'session_replay'), 0))))) GROUP BY s.session_id HAVING 1 ORDER BY start_time DESC @@ -2721,10 +2721,10 @@ WHERE and(equals(s.team_id, 2), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-31 20:00:00.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-14 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-21 20:00:00.000000', 6, 'UTC')), 0), in(s.session_id, (SELECT console_logs_log_entries.log_source_id AS log_source_id FROM - (SELECT log_entries.log_source_id AS log_source_id, log_entries.level AS level + (SELECT log_entries.log_source_id AS log_source_id, log_entries.level AS level, log_entries.log_source AS log_source FROM log_entries WHERE and(equals(log_entries.team_id, 2), equals(log_entries.log_source, 'session_replay'))) AS console_logs_log_entries - WHERE ifNull(equals(console_logs_log_entries.level, 'info'), 0)))) + WHERE and(ifNull(equals(console_logs_log_entries.level, 'info'), 0), ifNull(equals(console_logs_log_entries.log_source, 'session_replay'), 0))))) GROUP BY s.session_id HAVING 1 ORDER BY start_time DESC @@ -2759,10 +2759,10 @@ WHERE and(equals(s.team_id, 2), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-31 20:00:00.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-14 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-21 20:00:00.000000', 6, 'UTC')), 0), in(s.session_id, (SELECT console_logs_log_entries.log_source_id AS log_source_id FROM - (SELECT log_entries.log_source_id AS log_source_id, log_entries.level AS level + (SELECT log_entries.log_source_id AS log_source_id, log_entries.level AS level, log_entries.log_source AS log_source FROM log_entries WHERE and(equals(log_entries.team_id, 2), equals(log_entries.log_source, 'session_replay'))) AS console_logs_log_entries - WHERE ifNull(equals(console_logs_log_entries.level, 'info'), 0)))) + WHERE and(ifNull(equals(console_logs_log_entries.level, 'info'), 0), ifNull(equals(console_logs_log_entries.log_source, 'session_replay'), 0))))) GROUP BY s.session_id HAVING 1 ORDER BY start_time DESC @@ -2797,10 +2797,10 @@ WHERE and(equals(s.team_id, 2), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-31 20:00:00.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-14 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-21 20:00:00.000000', 6, 'UTC')), 0), in(s.session_id, (SELECT console_logs_log_entries.log_source_id AS log_source_id FROM - (SELECT log_entries.log_source_id AS log_source_id, log_entries.level AS level + (SELECT log_entries.log_source_id AS log_source_id, log_entries.level AS level, log_entries.log_source AS log_source FROM log_entries WHERE and(equals(log_entries.team_id, 2), equals(log_entries.log_source, 'session_replay'))) AS console_logs_log_entries - WHERE ifNull(equals(console_logs_log_entries.level, 'warn'), 0)))) + WHERE and(ifNull(equals(console_logs_log_entries.level, 'warn'), 0), ifNull(equals(console_logs_log_entries.log_source, 'session_replay'), 0))))) GROUP BY s.session_id HAVING 1 ORDER BY start_time DESC @@ -2835,10 +2835,10 @@ WHERE and(equals(s.team_id, 2), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-31 20:00:00.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-14 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-21 20:00:00.000000', 6, 'UTC')), 0), in(s.session_id, (SELECT console_logs_log_entries.log_source_id AS log_source_id FROM - (SELECT log_entries.log_source_id AS log_source_id, log_entries.level AS level + (SELECT log_entries.log_source_id AS log_source_id, log_entries.level AS level, log_entries.log_source AS log_source FROM log_entries WHERE and(equals(log_entries.team_id, 2), equals(log_entries.log_source, 'session_replay'))) AS console_logs_log_entries - WHERE ifNull(equals(console_logs_log_entries.level, 'warn'), 0)))) + WHERE and(ifNull(equals(console_logs_log_entries.level, 'warn'), 0), ifNull(equals(console_logs_log_entries.log_source, 'session_replay'), 0))))) GROUP BY s.session_id HAVING 1 ORDER BY start_time DESC @@ -2873,10 +2873,10 @@ WHERE and(equals(s.team_id, 2), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-31 20:00:00.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-14 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-21 20:00:00.000000', 6, 'UTC')), 0), in(s.session_id, (SELECT console_logs_log_entries.log_source_id AS log_source_id FROM - (SELECT log_entries.log_source_id AS log_source_id, log_entries.level AS level + (SELECT log_entries.log_source_id AS log_source_id, log_entries.level AS level, log_entries.log_source AS log_source FROM log_entries WHERE and(equals(log_entries.team_id, 2), equals(log_entries.log_source, 'session_replay'))) AS console_logs_log_entries - WHERE ifNull(equals(console_logs_log_entries.level, 'info'), 0)))) + WHERE and(ifNull(equals(console_logs_log_entries.level, 'info'), 0), ifNull(equals(console_logs_log_entries.log_source, 'session_replay'), 0))))) GROUP BY s.session_id HAVING 1 ORDER BY start_time DESC @@ -2911,10 +2911,10 @@ WHERE and(equals(s.team_id, 2), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-31 20:00:00.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-14 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-21 20:00:00.000000', 6, 'UTC')), 0), in(s.session_id, (SELECT console_logs_log_entries.log_source_id AS log_source_id FROM - (SELECT log_entries.log_source_id AS log_source_id, log_entries.level AS level + (SELECT log_entries.log_source_id AS log_source_id, log_entries.level AS level, log_entries.log_source AS log_source FROM log_entries WHERE and(equals(log_entries.team_id, 2), equals(log_entries.log_source, 'session_replay'))) AS console_logs_log_entries - WHERE or(ifNull(equals(console_logs_log_entries.level, 'warn'), 0), ifNull(equals(console_logs_log_entries.level, 'error'), 0))))) + WHERE and(or(ifNull(equals(console_logs_log_entries.level, 'warn'), 0), ifNull(equals(console_logs_log_entries.level, 'error'), 0)), ifNull(equals(console_logs_log_entries.log_source, 'session_replay'), 0))))) GROUP BY s.session_id HAVING 1 ORDER BY start_time DESC @@ -2949,10 +2949,10 @@ WHERE and(equals(s.team_id, 2), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-31 20:00:00.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-14 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-21 20:00:00.000000', 6, 'UTC')), 0), in(s.session_id, (SELECT console_logs_log_entries.log_source_id AS log_source_id FROM - (SELECT log_entries.log_source_id AS log_source_id, log_entries.level AS level + (SELECT log_entries.log_source_id AS log_source_id, log_entries.level AS level, log_entries.log_source AS log_source FROM log_entries WHERE and(equals(log_entries.team_id, 2), equals(log_entries.log_source, 'session_replay'))) AS console_logs_log_entries - WHERE ifNull(equals(console_logs_log_entries.level, 'info'), 0)))) + WHERE and(ifNull(equals(console_logs_log_entries.level, 'info'), 0), ifNull(equals(console_logs_log_entries.log_source, 'session_replay'), 0))))) GROUP BY s.session_id HAVING 1 ORDER BY start_time DESC @@ -3631,10 +3631,10 @@ WHERE and(equals(s.team_id, 2), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-25 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0), in(s.session_id, (SELECT console_logs_log_entries.log_source_id AS log_source_id FROM - (SELECT log_entries.log_source_id AS log_source_id, log_entries.level AS level, log_entries.message AS message + (SELECT log_entries.log_source_id AS log_source_id, log_entries.level AS level, log_entries.message AS message, log_entries.log_source AS log_source FROM log_entries WHERE and(equals(log_entries.team_id, 2), equals(log_entries.log_source, 'session_replay'))) AS console_logs_log_entries - WHERE and(ifNull(equals(console_logs_log_entries.level, 'warn'), 0), ifNull(equals(console_logs_log_entries.message, 'random'), 0))))) + WHERE and(and(ifNull(equals(console_logs_log_entries.level, 'warn'), 0), ifNull(equals(console_logs_log_entries.message, 'random'), 0)), ifNull(equals(console_logs_log_entries.log_source, 'session_replay'), 0))))) GROUP BY s.session_id HAVING 1 ORDER BY start_time DESC @@ -3669,10 +3669,10 @@ WHERE and(equals(s.team_id, 2), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-11 13:46:23.000000', 6, 'UTC')), 0), ifNull(greaterOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2020-12-25 00:00:00.000000', 6, 'UTC')), 0), ifNull(lessOrEquals(toTimeZone(s.min_first_timestamp, 'UTC'), toDateTime64('2021-01-01 13:46:23.000000', 6, 'UTC')), 0), in(s.session_id, (SELECT console_logs_log_entries.log_source_id AS log_source_id FROM - (SELECT log_entries.log_source_id AS log_source_id, log_entries.level AS level, log_entries.message AS message + (SELECT log_entries.log_source_id AS log_source_id, log_entries.level AS level, log_entries.message AS message, log_entries.log_source AS log_source FROM log_entries WHERE and(equals(log_entries.team_id, 2), equals(log_entries.log_source, 'session_replay'))) AS console_logs_log_entries - WHERE or(ifNull(equals(console_logs_log_entries.level, 'warn'), 0), ifNull(equals(console_logs_log_entries.message, 'random'), 0))))) + WHERE and(or(ifNull(equals(console_logs_log_entries.level, 'warn'), 0), ifNull(equals(console_logs_log_entries.message, 'random'), 0)), ifNull(equals(console_logs_log_entries.log_source, 'session_replay'), 0))))) GROUP BY s.session_id HAVING 1 ORDER BY start_time DESC diff --git a/posthog/tasks/test/test_warehouse.py b/posthog/tasks/test/test_warehouse.py index 66248cf2a2489..8a83cad234d88 100644 --- a/posthog/tasks/test/test_warehouse.py +++ b/posthog/tasks/test/test_warehouse.py @@ -6,7 +6,7 @@ validate_data_warehouse_table_columns, capture_external_data_rows_synced, ) -from posthog.warehouse.models import ExternalDataSource, ExternalDataJob +from posthog.warehouse.models import ExternalDataSource, ExternalDataJob, ExternalDataSchema from freezegun import freeze_time import datetime @@ -35,8 +35,20 @@ def test_check_synced_row_limits_of_team_monthly_limit( source_type="Stripe", ) + schema = ExternalDataSchema.objects.create( + source=source, + name="test_schema", + team=self.team, + status="Running", + ) + job = ExternalDataJob.objects.create( - pipeline=source, workflow_id="fake_workflow_id", team=self.team, status="Running", rows_synced=100000 + pipeline=source, + workflow_id="fake_workflow_id", + team=self.team, + status="Running", + rows_synced=100000, + schema=schema, ) check_synced_row_limits_of_team(self.team.pk) @@ -44,6 +56,9 @@ def test_check_synced_row_limits_of_team_monthly_limit( source.refresh_from_db() self.assertEqual(source.status, ExternalDataSource.Status.PAUSED) + schema.refresh_from_db() + self.assertEqual(schema.status, ExternalDataSchema.Status.PAUSED) + job.refresh_from_db() self.assertEqual(job.status, ExternalDataJob.Status.CANCELLED) @@ -70,8 +85,20 @@ def test_check_synced_row_limits_of_team( source_type="Stripe", ) + schema = ExternalDataSchema.objects.create( + source=source, + name="test_schema", + team=self.team, + status="Running", + ) + job = ExternalDataJob.objects.create( - pipeline=source, workflow_id="fake_workflow_id", team=self.team, status="Running", rows_synced=100000 + pipeline=source, + workflow_id="fake_workflow_id", + team=self.team, + status="Running", + rows_synced=100000, + schema=schema, ) check_synced_row_limits_of_team(self.team.pk) @@ -79,6 +106,9 @@ def test_check_synced_row_limits_of_team( source.refresh_from_db() self.assertEqual(source.status, ExternalDataSource.Status.PAUSED) + schema.refresh_from_db() + self.assertEqual(schema.status, ExternalDataSchema.Status.PAUSED) + job.refresh_from_db() self.assertEqual(job.status, ExternalDataJob.Status.CANCELLED) diff --git a/posthog/tasks/usage_report.py b/posthog/tasks/usage_report.py index 788a20d9a431b..07e384e29d531 100644 --- a/posthog/tasks/usage_report.py +++ b/posthog/tasks/usage_report.py @@ -276,7 +276,7 @@ def get_org_owner_or_first_user(organization_id: str) -> Optional[User]: return user -@shared_task(**USAGE_REPORT_TASK_KWARGS, max_retries=3) +@shared_task(**USAGE_REPORT_TASK_KWARGS, max_retries=3, rate_limit="10/s") def send_report_to_billing_service(org_id: str, report: dict[str, Any]) -> None: if not settings.EE_AVAILABLE: return diff --git a/posthog/tasks/warehouse.py b/posthog/tasks/warehouse.py index 0426aade2991f..518d4fc3027c4 100644 --- a/posthog/tasks/warehouse.py +++ b/posthog/tasks/warehouse.py @@ -8,7 +8,7 @@ pause_external_data_schedule, unpause_external_data_schedule, ) -from posthog.warehouse.models import ExternalDataJob, ExternalDataSource +from posthog.warehouse.models import ExternalDataJob, ExternalDataSource, ExternalDataSchema from posthog.ph_client import get_ph_client from posthog.models import Team from django.db.models import Q @@ -62,6 +62,7 @@ def check_synced_row_limits_of_team(team_id: int) -> None: total_rows_synced = sum(rows_synced_list) if team_model.api_token in limited_team_tokens_rows_synced or total_rows_synced > MONTHLY_LIMIT: + # stop active jobs running_jobs = ExternalDataJob.objects.filter(team_id=team_id, status=ExternalDataJob.Status.RUNNING) for job in running_jobs: try: @@ -79,16 +80,39 @@ def check_synced_row_limits_of_team(team_id: int) -> None: job.pipeline.status = ExternalDataSource.Status.PAUSED job.pipeline.save() + + if job.schema: + job.schema.status = ExternalDataSchema.Status.PAUSED + job.schema.save() + + # pause active schemas + all_schemas = ExternalDataSchema.objects.filter( + team_id=team_id, status__in=[ExternalDataSchema.Status.COMPLETED, ExternalDataSchema.Status.RUNNING] + ) + for schema in all_schemas: + try: + pause_external_data_schedule(str(schema.id)) + except Exception as e: + logger.exception("Could not pause external data schedule", exc_info=e) + + schema.status = ExternalDataSchema.Status.PAUSED + schema.save() + + schema.source.status = ExternalDataSource.Status.PAUSED + schema.source.save() else: - all_sources = ExternalDataSource.objects.filter(team_id=team_id, status=ExternalDataSource.Status.PAUSED) - for source in all_sources: + all_schemas = ExternalDataSchema.objects.filter(team_id=team_id, status=ExternalDataSchema.Status.PAUSED) + for schema in all_schemas: try: - unpause_external_data_schedule(str(source.id)) + unpause_external_data_schedule(str(schema.id)) except Exception as e: logger.exception("Could not unpause external data schedule", exc_info=e) - source.status = ExternalDataSource.Status.COMPLETED - source.save() + schema.status = ExternalDataSchema.Status.COMPLETED + schema.save() + + schema.source.status = ExternalDataSource.Status.RUNNING + schema.source.save() @shared_task(ignore_result=True) diff --git a/posthog/warehouse/api/external_data_schema.py b/posthog/warehouse/api/external_data_schema.py index 6db5379a2fe96..154fd848ff524 100644 --- a/posthog/warehouse/api/external_data_schema.py +++ b/posthog/warehouse/api/external_data_schema.py @@ -15,7 +15,7 @@ from posthog.warehouse.data_load.service import ( external_data_workflow_exists, - is_any_external_data_job_paused, + is_any_external_data_schema_paused, sync_external_data_job_workflow, pause_external_data_schedule, trigger_external_data_workflow, @@ -213,10 +213,10 @@ def destroy(self, request: Request, *args: Any, **kwargs: Any) -> Response: def reload(self, request: Request, *args: Any, **kwargs: Any): instance: ExternalDataSchema = self.get_object() - if is_any_external_data_job_paused(self.team_id): + if is_any_external_data_schema_paused(self.team_id): return Response( status=status.HTTP_400_BAD_REQUEST, - data={"message": "Monthly sync limit reached. Please contact PostHog support to increase your limit."}, + data={"message": "Monthly sync limit reached. Please increase your billing limit to resume syncing."}, ) try: @@ -236,10 +236,10 @@ def reload(self, request: Request, *args: Any, **kwargs: Any): def resync(self, request: Request, *args: Any, **kwargs: Any): instance: ExternalDataSchema = self.get_object() - if is_any_external_data_job_paused(self.team_id): + if is_any_external_data_schema_paused(self.team_id): return Response( status=status.HTTP_400_BAD_REQUEST, - data={"message": "Monthly sync limit reached. Please contact PostHog support to increase your limit."}, + data={"message": "Monthly sync limit reached. Please increase your billing limit to resume syncing."}, ) latest_running_job = ( diff --git a/posthog/warehouse/api/external_data_source.py b/posthog/warehouse/api/external_data_source.py index 8781082a86bcb..54a3960f0960a 100644 --- a/posthog/warehouse/api/external_data_source.py +++ b/posthog/warehouse/api/external_data_source.py @@ -16,7 +16,7 @@ delete_external_data_schedule, cancel_external_data_workflow, delete_data_import_folder, - is_any_external_data_job_paused, + is_any_external_data_schema_paused, trigger_external_data_source_workflow, ) from posthog.warehouse.models import ExternalDataSource, ExternalDataSchema, ExternalDataJob @@ -258,10 +258,10 @@ def create(self, request: Request, *args: Any, **kwargs: Any) -> Response: elif self.prefix_exists(source_type, prefix): return Response(status=status.HTTP_400_BAD_REQUEST, data={"message": "Prefix already exists"}) - if is_any_external_data_job_paused(self.team_id): + if is_any_external_data_schema_paused(self.team_id): return Response( status=status.HTTP_400_BAD_REQUEST, - data={"message": "Monthly sync limit reached. Please contact PostHog support to increase your limit."}, + data={"message": "Monthly sync limit reached. Please increase your billing limit to resume syncing."}, ) # TODO: remove dummy vars @@ -635,10 +635,10 @@ def destroy(self, request: Request, *args: Any, **kwargs: Any) -> Response: def reload(self, request: Request, *args: Any, **kwargs: Any): instance: ExternalDataSource = self.get_object() - if is_any_external_data_job_paused(self.team_id): + if is_any_external_data_schema_paused(self.team_id): return Response( status=status.HTTP_400_BAD_REQUEST, - data={"message": "Monthly sync limit reached. Please contact PostHog support to increase your limit."}, + data={"message": "Monthly sync limit reached. Please increase your billing limit to resume syncing."}, ) try: diff --git a/posthog/warehouse/data_load/service.py b/posthog/warehouse/data_load/service.py index 3b981c3715355..ab02a7f558508 100644 --- a/posthog/warehouse/data_load/service.py +++ b/posthog/warehouse/data_load/service.py @@ -188,11 +188,11 @@ def delete_data_import_folder(folder_path: str): s3.delete(f"{bucket_name}/{folder_path}", recursive=True) -def is_any_external_data_job_paused(team_id: int) -> bool: - from posthog.warehouse.models import ExternalDataSource +def is_any_external_data_schema_paused(team_id: int) -> bool: + from posthog.warehouse.models import ExternalDataSchema return ( - ExternalDataSource.objects.exclude(deleted=True) - .filter(team_id=team_id, status=ExternalDataSource.Status.PAUSED) + ExternalDataSchema.objects.exclude(deleted=True) + .filter(team_id=team_id, status=ExternalDataSchema.Status.PAUSED) .exists() ) diff --git a/rust/capture/src/router.rs b/rust/capture/src/router.rs index f03228764f748..e1ba579684585 100644 --- a/rust/capture/src/router.rs +++ b/rust/capture/src/router.rs @@ -19,7 +19,7 @@ use crate::prometheus::{setup_metrics_recorder, track_metrics}; const EVENT_BODY_SIZE: usize = 2 * 1024 * 1024; // 2MB const BATCH_BODY_SIZE: usize = 20 * 1024 * 1024; // 20MB, up from the default 2MB used for normal event payloads -const RECORDING_BODY_SIZE: usize = 20 * 1024 * 1024; // 20MB, up from the default 2MB used for normal event payloads +const RECORDING_BODY_SIZE: usize = 25 * 1024 * 1024; // 25MB, up from the default 2MB used for normal event payloads #[derive(Clone)] pub struct State { @@ -27,6 +27,7 @@ pub struct State { pub timesource: Arc, pub redis: Arc, pub billing_limiter: RedisLimiter, + pub event_size_limit: usize, } async fn index() -> &'static str { @@ -47,12 +48,14 @@ pub fn router< metrics: bool, capture_mode: CaptureMode, concurrency_limit: Option, + event_size_limit: usize, ) -> Router { let state = State { sink: Arc::new(sink), timesource: Arc::new(timesource), redis, billing_limiter, + event_size_limit, }; // Very permissive CORS policy, as old SDK versions diff --git a/rust/capture/src/server.rs b/rust/capture/src/server.rs index 5d006df5282f4..93ff3f646c3bc 100644 --- a/rust/capture/src/server.rs +++ b/rust/capture/src/server.rs @@ -31,6 +31,8 @@ where ) .expect("failed to create billing limiter"); + let event_max_bytes = config.kafka.kafka_producer_message_max_bytes as usize; + let app = if config.print_sink { // Print sink is only used for local debug, don't allow a container with it to run on prod liveness @@ -48,6 +50,7 @@ where config.export_prometheus, config.capture_mode, config.concurrency_limit, + event_max_bytes, ) } else { let sink_liveness = liveness @@ -90,6 +93,7 @@ where config.export_prometheus, config.capture_mode, config.concurrency_limit, + event_max_bytes, ) }; diff --git a/rust/capture/src/sinks/kafka.rs b/rust/capture/src/sinks/kafka.rs index 17266af2a7d07..2e008a900ccd6 100644 --- a/rust/capture/src/sinks/kafka.rs +++ b/rust/capture/src/sinks/kafka.rs @@ -187,10 +187,14 @@ impl KafkaSink { CaptureError::NonRetryableSinkError })?; + let token = event.token.clone(); + let data_type = event.data_type; let event_key = event.key(); - let session_id = event.session_id.as_deref(); + let session_id = event.session_id.clone(); - let (topic, partition_key): (&str, Option<&str>) = match &event.data_type { + drop(event); // Events can be EXTREMELY memory hungry + + let (topic, partition_key): (&str, Option<&str>) = match data_type { DataType::AnalyticsHistorical => (&self.historical_topic, Some(event_key.as_str())), // We never trigger overflow on historical events DataType::AnalyticsMain => { // TODO: deprecate capture-led overflow or move logic in handler @@ -212,7 +216,11 @@ impl KafkaSink { DataType::ExceptionMain => (&self.exceptions_topic, Some(event_key.as_str())), DataType::SnapshotMain => ( &self.main_topic, - Some(session_id.ok_or(CaptureError::MissingSessionId)?), + Some( + session_id + .as_deref() + .ok_or(CaptureError::MissingSessionId)?, + ), ), }; @@ -224,7 +232,7 @@ impl KafkaSink { timestamp: None, headers: Some(OwnedHeaders::new().insert(Header { key: "token", - value: Some(&event.token), + value: Some(&token), })), }) { Ok(ack) => Ok(ack), diff --git a/rust/capture/src/v0_endpoint.rs b/rust/capture/src/v0_endpoint.rs index 6077f40246550..03b550cd9cdaf 100644 --- a/rust/capture/src/v0_endpoint.rs +++ b/rust/capture/src/v0_endpoint.rs @@ -77,12 +77,12 @@ async fn handle_common( tracing::error!("failed to decode form data: {}", e); CaptureError::RequestDecodingError(String::from("missing data field")) })?; - RawRequest::from_bytes(payload.into()) + RawRequest::from_bytes(payload.into(), state.event_size_limit) } ct => { tracing::Span::current().record("content_type", ct); - RawRequest::from_bytes(body) + RawRequest::from_bytes(body, state.event_size_limit) } }?; @@ -102,6 +102,7 @@ async fn handle_common( tracing::Span::current().record("batch_size", events.len()); if events.is_empty() { + tracing::log::warn!("rejected empty batch"); return Err(CaptureError::EmptyBatch); } @@ -244,7 +245,8 @@ pub async fn recording( })), Err(err) => Err(err), Ok((context, events)) => { - if let Err(err) = process_replay_events(state.sink.clone(), &events, &context).await { + let count = events.len() as u64; + if let Err(err) = process_replay_events(state.sink.clone(), events, &context).await { let cause = match err { CaptureError::EmptyDistinctId => "empty_distinct_id", CaptureError::MissingDistinctId => "missing_distinct_id", @@ -259,7 +261,7 @@ pub async fn recording( CaptureError::MissingSnapshotData => "missing_snapshot_data", _ => "process_events_error", }; - report_dropped_events(cause, events.len() as u64); + report_dropped_events(cause, count); tracing::log::warn!("rejected invalid payload: {}", err); return Err(err); } @@ -336,49 +338,69 @@ pub async fn process_events<'a>( #[instrument(skip_all, fields(events = events.len()))] pub async fn process_replay_events<'a>( sink: Arc, - events: &'a [RawEvent], + mut events: Vec, context: &'a ProcessingContext, ) -> Result<(), CaptureError> { - let snapshot_items: Vec = events - .iter() - .map(|e| match e.properties.get("$snapshot_data") { - // We can either have an array or single object - Some(Value::Array(value)) => Ok(value.to_vec()), - // Wrap a single object in a vec to simplify processing. - Some(Value::Object(value)) => Ok([Value::Object(value.clone())].to_vec()), - _ => Err(CaptureError::MissingSnapshotData), - }) - .collect::>, CaptureError>>()? - .into_iter() - .flatten() - .collect(); - + // Grab metadata about the whole batch from the first event before + // we drop all the events as we rip out the snapshot data let session_id = events[0] .properties - .get("$session_id") + .remove("$session_id") .ok_or(CaptureError::MissingSessionId)?; - let window_id = events[0].properties.get("$window_id").unwrap_or(session_id); + let window_id = events[0] + .properties + .remove("$window_id") + .unwrap_or(session_id.clone()); + let uuid = events[0].uuid.unwrap_or_else(uuid_v7); + let distinct_id = events[0].extract_distinct_id()?; + let snapshot_source = events[0] + .properties + .remove("$snapshot_source") + .unwrap_or(Value::String(String::from("web"))); + + let mut snapshot_items: Vec = Vec::with_capacity(events.len()); + for mut event in events { + let Some(snapshot_data) = event.properties.remove("$snapshot_data") else { + return Err(CaptureError::MissingSnapshotData); + }; + match snapshot_data { + Value::Array(value) => { + snapshot_items.extend(value); + } + Value::Object(value) => { + snapshot_items.push(Value::Object(value)); + } + _ => { + return Err(CaptureError::MissingSnapshotData); + } + } + } + let event = ProcessedEvent { data_type: DataType::SnapshotMain, - uuid: events[0].uuid.unwrap_or_else(uuid_v7), - distinct_id: events[0].extract_distinct_id()?, + uuid, + distinct_id: distinct_id.clone(), ip: context.client_ip.clone(), data: json!({ "event": "$snapshot_items", "properties": { - "distinct_id": events[0].extract_distinct_id()?, + "distinct_id": distinct_id, "$session_id": session_id, "$window_id": window_id, - "$snapshot_source": events[0].properties.get("$snapshot_source").unwrap_or(&Value::String(String::from("web"))), + "$snapshot_source": snapshot_source, "$snapshot_items": snapshot_items, } - }).to_string(), + }) + .to_string(), now: context.now.clone(), sent_at: context.sent_at, token: context.token.clone(), - session_id: Some(session_id - .as_str() - .ok_or(CaptureError::InvalidSessionId)?.to_string()), + session_id: Some( + session_id + .as_str() + .ok_or(CaptureError::InvalidSessionId)? + .to_string(), + ), }; sink.send(event).await diff --git a/rust/capture/src/v0_request.rs b/rust/capture/src/v0_request.rs index ae0c80fece453..d25a1280b7538 100644 --- a/rust/capture/src/v0_request.rs +++ b/rust/capture/src/v0_request.rs @@ -11,6 +11,7 @@ use tracing::instrument; use uuid::Uuid; use crate::api::CaptureError; +use crate::prometheus::report_dropped_events; use crate::token::validate_token; #[derive(Deserialize, Default)] @@ -124,22 +125,54 @@ impl RawRequest { /// Instead of trusting the parameter, we peek at the payload's first three bytes to /// detect gzip, fallback to uncompressed utf8 otherwise. #[instrument(skip_all)] - pub fn from_bytes(bytes: Bytes) -> Result { + pub fn from_bytes(bytes: Bytes, limit: usize) -> Result { tracing::debug!(len = bytes.len(), "decoding new event"); let payload = if bytes.starts_with(&GZIP_MAGIC_NUMBERS) { - let mut d = GzDecoder::new(bytes.reader()); - let mut s = String::new(); - d.read_to_string(&mut s).map_err(|e| { - tracing::error!("failed to decode gzip: {}", e); - CaptureError::RequestDecodingError(String::from("invalid gzip data")) - })?; - s + let len = bytes.len(); + let mut zipstream = GzDecoder::new(bytes.reader()); + let chunk = &mut [0; 1024]; + let mut buf = Vec::with_capacity(len); + loop { + let got = match zipstream.read(chunk) { + Ok(got) => got, + Err(e) => { + tracing::error!("failed to read gzip stream: {}", e); + return Err(CaptureError::RequestDecodingError(String::from( + "invalid gzip data", + ))); + } + }; + if got == 0 { + break; + } + buf.extend_from_slice(&chunk[..got]); + if buf.len() > limit { + tracing::error!("GZIP decompression limit reached"); + report_dropped_events("event_too_big", 1); + return Err(CaptureError::EventTooBig); + } + } + match String::from_utf8(buf) { + Ok(s) => s, + Err(e) => { + tracing::error!("failed to decode gzip: {}", e); + return Err(CaptureError::RequestDecodingError(String::from( + "invalid gzip data", + ))); + } + } } else { - String::from_utf8(bytes.into()).map_err(|e| { + let s = String::from_utf8(bytes.into()).map_err(|e| { tracing::error!("failed to decode body: {}", e); CaptureError::RequestDecodingError(String::from("invalid body encoding")) - })? + })?; + if s.len() > limit { + tracing::error!("Request size limit reached"); + report_dropped_events("event_too_big", 1); + return Err(CaptureError::EventTooBig); + } + s }; tracing::debug!(json = payload, "decoded event data"); @@ -286,7 +319,7 @@ mod tests { .expect("payload is not base64"), ); - let events = RawRequest::from_bytes(compressed_bytes) + let events = RawRequest::from_bytes(compressed_bytes, 1024) .expect("failed to parse") .events(); assert_eq!(1, events.len()); @@ -308,7 +341,7 @@ mod tests { .expect("payload is not base64"), ); - let events = RawRequest::from_bytes(compressed_bytes) + let events = RawRequest::from_bytes(compressed_bytes, 2048) .expect("failed to parse") .events(); assert_eq!(1, events.len()); @@ -325,7 +358,7 @@ mod tests { #[test] fn extract_distinct_id() { let parse_and_extract = |input: &'static str| -> Result { - let parsed = RawRequest::from_bytes(input.into()) + let parsed = RawRequest::from_bytes(input.into(), 2048) .expect("failed to parse") .events(); parsed[0].extract_distinct_id() @@ -393,7 +426,7 @@ mod tests { "distinct_id": distinct_id }]); - let parsed = RawRequest::from_bytes(input.to_string().into()) + let parsed = RawRequest::from_bytes(input.to_string().into(), 2048) .expect("failed to parse") .events(); assert_eq!( @@ -405,7 +438,7 @@ mod tests { #[test] fn extract_and_verify_token() { let parse_and_extract = |input: &'static str| -> Result { - RawRequest::from_bytes(input.into()) + RawRequest::from_bytes(input.into(), 2048) .expect("failed to parse") .extract_and_verify_token() }; diff --git a/rust/capture/tests/django_compat.rs b/rust/capture/tests/django_compat.rs index 55fa838aaef13..d08be11c7506c 100644 --- a/rust/capture/tests/django_compat.rs +++ b/rust/capture/tests/django_compat.rs @@ -113,6 +113,7 @@ async fn it_matches_django_capture_behaviour() -> anyhow::Result<()> { false, CaptureMode::Events, None, + 25 * 1024 * 1024, ); let client = TestClient::new(app);