diff --git a/frontend/__snapshots__/exporter-exporter--dashboard--light.png b/frontend/__snapshots__/exporter-exporter--dashboard--light.png index ac90639ab4b759..61f6b619057b78 100644 Binary files a/frontend/__snapshots__/exporter-exporter--dashboard--light.png and b/frontend/__snapshots__/exporter-exporter--dashboard--light.png differ diff --git a/frontend/__snapshots__/lemon-ui-lemon-segmented-button--default--dark.png b/frontend/__snapshots__/lemon-ui-lemon-segmented-button--default--dark.png index 24c69cdf6e2428..62362066d2fe90 100644 Binary files a/frontend/__snapshots__/lemon-ui-lemon-segmented-button--default--dark.png and b/frontend/__snapshots__/lemon-ui-lemon-segmented-button--default--dark.png differ diff --git a/frontend/__snapshots__/lemon-ui-lemon-segmented-button--default--light.png b/frontend/__snapshots__/lemon-ui-lemon-segmented-button--default--light.png index 374263b278274b..acfa1466bbf0e1 100644 Binary files a/frontend/__snapshots__/lemon-ui-lemon-segmented-button--default--light.png and b/frontend/__snapshots__/lemon-ui-lemon-segmented-button--default--light.png differ diff --git a/frontend/__snapshots__/lemon-ui-lemon-segmented-button--full-width--dark.png b/frontend/__snapshots__/lemon-ui-lemon-segmented-button--full-width--dark.png index 24c69cdf6e2428..62362066d2fe90 100644 Binary files a/frontend/__snapshots__/lemon-ui-lemon-segmented-button--full-width--dark.png and b/frontend/__snapshots__/lemon-ui-lemon-segmented-button--full-width--dark.png differ diff --git a/frontend/__snapshots__/lemon-ui-lemon-segmented-button--full-width--light.png b/frontend/__snapshots__/lemon-ui-lemon-segmented-button--full-width--light.png index 374263b278274b..acfa1466bbf0e1 100644 Binary files a/frontend/__snapshots__/lemon-ui-lemon-segmented-button--full-width--light.png and b/frontend/__snapshots__/lemon-ui-lemon-segmented-button--full-width--light.png differ diff --git a/frontend/__snapshots__/lemon-ui-lemon-segmented-button--small--dark.png b/frontend/__snapshots__/lemon-ui-lemon-segmented-button--small--dark.png index 4cd35e1d356a8e..a5b3229a280124 100644 Binary files a/frontend/__snapshots__/lemon-ui-lemon-segmented-button--small--dark.png and b/frontend/__snapshots__/lemon-ui-lemon-segmented-button--small--dark.png differ diff --git a/frontend/__snapshots__/lemon-ui-lemon-segmented-button--small--light.png b/frontend/__snapshots__/lemon-ui-lemon-segmented-button--small--light.png index 554190a86b835d..028dc2ea80f763 100644 Binary files a/frontend/__snapshots__/lemon-ui-lemon-segmented-button--small--light.png and b/frontend/__snapshots__/lemon-ui-lemon-segmented-button--small--light.png differ diff --git a/frontend/__snapshots__/replay-player-failure--recent-recordings-404--dark.png b/frontend/__snapshots__/replay-player-failure--recent-recordings-404--dark.png index 51657e70ba0bc2..176e35273415bb 100644 Binary files a/frontend/__snapshots__/replay-player-failure--recent-recordings-404--dark.png and b/frontend/__snapshots__/replay-player-failure--recent-recordings-404--dark.png differ diff --git a/frontend/__snapshots__/replay-player-failure--recent-recordings-404--light.png b/frontend/__snapshots__/replay-player-failure--recent-recordings-404--light.png index 43f2a7af67fb66..82b82d8fc63291 100644 Binary files a/frontend/__snapshots__/replay-player-failure--recent-recordings-404--light.png and b/frontend/__snapshots__/replay-player-failure--recent-recordings-404--light.png differ diff --git a/frontend/__snapshots__/scenes-app-feature-flags--new-feature-flag--dark.png b/frontend/__snapshots__/scenes-app-feature-flags--new-feature-flag--dark.png index 9063864aa032e2..87a76f1653c404 100644 Binary files a/frontend/__snapshots__/scenes-app-feature-flags--new-feature-flag--dark.png and b/frontend/__snapshots__/scenes-app-feature-flags--new-feature-flag--dark.png differ diff --git a/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-breakdown-edit--dark--webkit.png b/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-breakdown-edit--dark--webkit.png index 2f4388a0828022..e63bfbe8f2cde2 100644 Binary files a/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-breakdown-edit--dark--webkit.png and b/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-breakdown-edit--dark--webkit.png differ diff --git a/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-breakdown-edit--light--webkit.png b/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-breakdown-edit--light--webkit.png index f552b2017da2a6..27f62918859d14 100644 Binary files a/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-breakdown-edit--light--webkit.png and b/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-breakdown-edit--light--webkit.png differ diff --git a/frontend/src/lib/api.ts b/frontend/src/lib/api.ts index 658073a0865576..16f62fdc9ca8c5 100644 --- a/frontend/src/lib/api.ts +++ b/frontend/src/lib/api.ts @@ -927,6 +927,9 @@ const api = { ? new ApiRequest().notebook(`${activityLogProps.id}`).withAction('activity') : new ApiRequest().notebooks().withAction('activity') }, + [ActivityScope.TEAM]: () => { + return new ApiRequest().projectsDetail().withAction('activity') + }, } const pagingParameters = { page: page || 1, limit: ACTIVITY_PAGE_SIZE } diff --git a/frontend/src/lib/components/ActivityLog/ActivityLog.stories.tsx b/frontend/src/lib/components/ActivityLog/ActivityLog.stories.tsx index c048adb88355e3..72a1551eb43be3 100644 --- a/frontend/src/lib/components/ActivityLog/ActivityLog.stories.tsx +++ b/frontend/src/lib/components/ActivityLog/ActivityLog.stories.tsx @@ -3,6 +3,7 @@ import { featureFlagsActivityResponseJson, insightsActivityResponseJson, personActivityResponseJson, + teamActivityResponseJson, } from 'lib/components/ActivityLog/__mocks__/activityLogMocks' import { ActivityLog } from 'lib/components/ActivityLog/ActivityLog' @@ -37,6 +38,10 @@ const meta: Meta = { ctx.status(200), ctx.json({ results: personActivityResponseJson }), ], + '/api/projects/:id/activity': (_, __, ctx) => [ + ctx.status(200), + ctx.json({ results: teamActivityResponseJson }), + ], }, }), ], @@ -59,6 +64,10 @@ export function PersonsActivity(): JSX.Element { return } +export function TeamActivity(): JSX.Element { + return +} + export function WithCaption(): JSX.Element { return ( defaultDescriber(logActivity, asNotification) } diff --git a/frontend/src/lib/components/BridgePage/BridgePage.scss b/frontend/src/lib/components/BridgePage/BridgePage.scss index a4f2196df895a4..a95676cd869fd7 100644 --- a/frontend/src/lib/components/BridgePage/BridgePage.scss +++ b/frontend/src/lib/components/BridgePage/BridgePage.scss @@ -5,9 +5,8 @@ display: flex; flex: 1; flex-direction: column; - height: 100%; - min-height: 100vh; - overflow: hidden; + height: 100vh; + overflow: scroll; background-color: var(--bg-bridge); -ms-overflow-style: none; diff --git a/frontend/src/lib/components/ExportButton/exporter.tsx b/frontend/src/lib/components/ExportButton/exporter.tsx index e1ccc83cb518fd..7215c3edd1201a 100644 --- a/frontend/src/lib/components/ExportButton/exporter.tsx +++ b/frontend/src/lib/components/ExportButton/exporter.tsx @@ -92,14 +92,23 @@ export async function triggerExport(asset: TriggerExportProps): Promise { await delay(POLL_DELAY_MS) - exportedAsset = await api.exports.get(exportedAsset.id) + // Keep polling for pure network errors, but not any HTTP errors + // Example: `NetworkError when attempting to fetch resource` + try { + exportedAsset = await api.exports.get(exportedAsset.id) + } catch (e: any) { + if (e.name === 'NetworkError' || e.message?.message?.startsWith('NetworkError')) { + continue + } + throw e + } } reject('Content not loaded in time...') } catch (e: any) { trackingProperties.total_time_ms = performance.now() - startTime posthog.capture('export failed', trackingProperties) - reject(`Export failed: ${JSON.stringify(e)}`) + reject(new Error(`Export failed: ${JSON.stringify(e.detail ?? e)}`)) } }) await lemonToast.promise( diff --git a/frontend/src/lib/components/PropertyFilters/PropertyFilters.tsx b/frontend/src/lib/components/PropertyFilters/PropertyFilters.tsx index ca2157fd321b4f..34e2cb8f646b64 100644 --- a/frontend/src/lib/components/PropertyFilters/PropertyFilters.tsx +++ b/frontend/src/lib/components/PropertyFilters/PropertyFilters.tsx @@ -28,6 +28,7 @@ interface PropertyFiltersProps { orFiltering?: boolean propertyGroupType?: FilterLogicalOperator | null addText?: string | null + buttonText?: string hasRowOperator?: boolean sendAllKeyUpdates?: boolean allowNew?: boolean @@ -51,6 +52,7 @@ export function PropertyFilters({ logicalRowDivider = false, propertyGroupType = null, addText = null, + buttonText = 'Add filter', hasRowOperator = true, sendAllKeyUpdates = false, allowNew = true, @@ -91,7 +93,7 @@ export function PropertyFilters({ pageKey={pageKey} showConditionBadge={showConditionBadge} disablePopover={disablePopover || orFiltering} - label="Add filter" + label={buttonText} onRemove={remove} orFiltering={orFiltering} filterComponent={(onComplete) => ( diff --git a/frontend/src/lib/lemon-ui/LemonSegmentedButton/LemonSegmentedButton.scss b/frontend/src/lib/lemon-ui/LemonSegmentedButton/LemonSegmentedButton.scss index c8019214081edf..1f37b601f58db5 100644 --- a/frontend/src/lib/lemon-ui/LemonSegmentedButton/LemonSegmentedButton.scss +++ b/frontend/src/lib/lemon-ui/LemonSegmentedButton/LemonSegmentedButton.scss @@ -22,100 +22,7 @@ .LemonButton__content { white-space: nowrap; } - } -} - -body:not(.posthog-3000) { - .LemonSegmentedButton { - background: var(--bg-light); - border: 1px solid var(--border); - } - - .LemonSegmentedButton__slider { - position: absolute; - top: -1px; // 1px of border - left: -1px; // 1px of border - width: calc(var(--lemon-segmented-button-slider-width) + 2px); // 1px of border (left + right) - height: calc(100% + 2px); // 1px of border (top + bottom) - background: var(--primary); - - // This is a real element and not ::after to avoid initial transition from 0 width - transition: width 200ms ease, transform 200ms ease, border-radius 200ms ease; - transform: translateX(var(--lemon-segmented-button-slider-offset)); - will-change: width, transform, border-radius; - - &.LemonSegmentedButton__slider--first { - border-top-left-radius: var(--radius); - border-bottom-left-radius: var(--radius); - } - - &.LemonSegmentedButton__slider--last { - border-top-right-radius: var(--radius); - border-bottom-right-radius: var(--radius); - } - } - .LemonSegmentedButton__option { - .LemonButton { - min-height: calc(var(--lemon-button-height) - 2px); - border-radius: 0; - outline: 1px solid transparent; - - // Original transition with outline added - transition: background-color 200ms ease, color 200ms ease, border 200ms ease, opacity 200ms ease, - outline 200ms ease; - - &:hover { - > span { - border-color: none !important; - } - } - } - - &:first-child, - &:first-child .LemonButton { - border-top-left-radius: var(--radius); - border-bottom-left-radius: var(--radius); - } - - &:last-child, - &:last-child .LemonButton { - border-top-right-radius: var(--radius); - border-bottom-right-radius: var(--radius); - } - - &:not(:last-child) { - border-right: 1px solid var(--border); - } - - &:not(.LemonSegmentedButton__option--disabled, .LemonSegmentedButton__option--selected) { - &:hover .LemonButton { - outline-color: var(--primary); - } - - &:active .LemonButton { - outline-color: var(--primary-dark); - } - } - - &.LemonSegmentedButton__option--selected { - .LemonButton, - .LemonButton__icon { - color: #fff; - } - - .LemonButton { - &:hover, - &:active { - background: none; // Disable LemonButton's hover styles for the selected option - } - } - } - } -} - -.posthog-3000 { - .LemonSegmentedButton__option { & .LemonButton, & .LemonButton > .LemonButton__chrome::after, & .LemonButton > .LemonButton__chrome::before { @@ -152,21 +59,19 @@ body:not(.posthog-3000) { border-bottom-right-radius: var(--radius) !important; } - &:not(:last-child) { + &:not(:first-child) { .LemonButton__chrome { - border-right: none; + margin-left: -1px; } } &.LemonSegmentedButton__option--selected { + z-index: 2; + .LemonButton { --lemon-button-icon-opacity: 1; --lemon-button-profile-picture-opacity: 1; } - - .LemonButton__chrome::after { - background-color: var(--bg-light); - } } &--disabled { diff --git a/frontend/src/lib/lemon-ui/LemonSegmentedButton/LemonSegmentedButton.tsx b/frontend/src/lib/lemon-ui/LemonSegmentedButton/LemonSegmentedButton.tsx index 5ccd01b69d8d47..1e0bfbfd2fa9d1 100644 --- a/frontend/src/lib/lemon-ui/LemonSegmentedButton/LemonSegmentedButton.tsx +++ b/frontend/src/lib/lemon-ui/LemonSegmentedButton/LemonSegmentedButton.tsx @@ -85,7 +85,7 @@ export function LemonSegmentedButton({ ref={option.value === value ? selectionRef : undefined} > ([ // @ts-expect-error const eventIndex = new EventIndex(playerData?.snapshots || []) const payload: Partial = { - snapshots_load_time: durations.snapshots?.duration, - metadata_load_time: durations.metadata?.duration, - events_load_time: durations.events?.duration, - first_paint_load_time: durations.firstPaint?.duration, + snapshots_load_time: durations.snapshots, + metadata_load_time: durations.metadata, + events_load_time: durations.events, + first_paint_load_time: durations.firstPaint, duration: eventIndex.getDuration(), start_time: playerData.start?.valueOf() ?? 0, end_time: playerData.end?.valueOf() ?? 0, page_change_events_length: eventIndex.pageChangeEvents().length, recording_width: eventIndex.getRecordingScreenMetadata(0)[0]?.width, - load_time: durations.firstPaint?.duration ?? 0, // TODO: DEPRECATED field. Keep around so dashboards don't break + load_time: durations.firstPaint ?? 0, // TODO: DEPRECATED field. Keep around so dashboards don't break } posthog.capture(`recording ${type}`, payload) }, diff --git a/frontend/src/queries/nodes/HogQLQuery/HogQLQueryEditor.tsx b/frontend/src/queries/nodes/HogQLQuery/HogQLQueryEditor.tsx index 20fccfdf535bfd..9bc09e1089cb79 100644 --- a/frontend/src/queries/nodes/HogQLQuery/HogQLQueryEditor.tsx +++ b/frontend/src/queries/nodes/HogQLQuery/HogQLQueryEditor.tsx @@ -82,6 +82,16 @@ const convertCompletionItemKind = (kind: AutocompleteCompletionItem['kind']): la } } +const kindToSortText = (kind: AutocompleteCompletionItem['kind'], label: string): string => { + if (kind === 'Variable') { + return `1-${label}` + } + if (kind === 'Method' || kind === 'Function') { + return `2-${label}` + } + return `3-${label}` +} + export interface HogQLQueryEditorProps { query: HogQLQuery setQuery?: (query: HogQLQuery) => void @@ -224,7 +234,10 @@ export function HogQLQueryEditor(props: HogQLQueryEditorProps): JSX.Element { const completionItems = response.suggestions - const suggestions = completionItems.map((item) => { + const suggestions = completionItems.map((item) => { + const kind = convertCompletionItemKind(item.kind) + const sortText = kindToSortText(item.kind, item.label) + return { label: item.label, documentation: item.documentation, @@ -235,7 +248,15 @@ export function HogQLQueryEditor(props: HogQLQueryEditorProps): JSX.Element { startColumn: word.startColumn, endColumn: word.endColumn, }, - kind: convertCompletionItemKind(item.kind), + kind, + sortText, + command: + kind === languages.CompletionItemKind.Function + ? { + id: 'cursorLeft', + title: 'Move cursor left', + } + : undefined, } }) diff --git a/frontend/src/queries/schema.json b/frontend/src/queries/schema.json index 61209cba2a8b70..4c14f1fd1c36da 100644 --- a/frontend/src/queries/schema.json +++ b/frontend/src/queries/schema.json @@ -5073,6 +5073,9 @@ "dateRange": { "$ref": "#/definitions/DateRange" }, + "doPathCleaning": { + "type": "boolean" + }, "includeBounceRate": { "type": "boolean" }, diff --git a/frontend/src/queries/schema.ts b/frontend/src/queries/schema.ts index 0e1532814c6755..57f0e90f1a9db0 100644 --- a/frontend/src/queries/schema.ts +++ b/frontend/src/queries/schema.ts @@ -1017,6 +1017,7 @@ export interface WebStatsTableQuery extends WebAnalyticsQueryBase { response?: WebStatsTableQueryResponse includeScrollDepth?: boolean // automatically sets includeBounceRate to true includeBounceRate?: boolean + doPathCleaning?: boolean /** @asType integer */ limit?: number } diff --git a/frontend/src/scenes/onboarding/Onboarding.stories.tsx b/frontend/src/scenes/onboarding/Onboarding.stories.tsx index 0f0638fd9fd857..6231e7b483959f 100644 --- a/frontend/src/scenes/onboarding/Onboarding.stories.tsx +++ b/frontend/src/scenes/onboarding/Onboarding.stories.tsx @@ -88,3 +88,21 @@ export const _OnboardingOtherProducts = (): JSX.Element => { return } _OnboardingOtherProducts.tags = ['test-skip'] // FIXME: For some reason this is captured correctly the first time, but then is written over a second time with SDKs view + +export const _OnboardingProductIntroduction = (): JSX.Element => { + useStorybookMocks({ + get: { + '/api/billing-v2/': { + ...billingJson, + }, + }, + }) + + const { setProduct } = useActions(onboardingLogic) + + useEffect(() => { + setProduct(billingJson.products[0]) + router.actions.push(urls.onboarding(ProductKey.PRODUCT_ANALYTICS, OnboardingStepKey.PRODUCT_INTRO)) + }, []) + return +} diff --git a/frontend/src/scenes/session-recordings/filters/AdvancedSessionRecordingsFilters.tsx b/frontend/src/scenes/session-recordings/filters/AdvancedSessionRecordingsFilters.tsx index 449e765af6a93d..0772194653150b 100644 --- a/frontend/src/scenes/session-recordings/filters/AdvancedSessionRecordingsFilters.tsx +++ b/frontend/src/scenes/session-recordings/filters/AdvancedSessionRecordingsFilters.tsx @@ -41,7 +41,6 @@ export const AdvancedSessionRecordingsFilters = ({ }} typeKey="session-recordings" mathAvailability={MathAvailability.None} - buttonCopy="Add filter" hideRename hideDuplicate showNestedArrow={false} @@ -72,6 +71,7 @@ export const AdvancedSessionRecordingsFilters = ({ {showPropertyFilters && ( { diff --git a/frontend/src/scenes/session-recordings/player/sessionRecordingDataLogic.test.ts b/frontend/src/scenes/session-recordings/player/sessionRecordingDataLogic.test.ts index 48068e668c1129..78f0c7d4cb17f1 100644 --- a/frontend/src/scenes/session-recordings/player/sessionRecordingDataLogic.test.ts +++ b/frontend/src/scenes/session-recordings/player/sessionRecordingDataLogic.test.ts @@ -117,7 +117,11 @@ describe('sessionRecordingDataLogic', () => { logic.actions.loadRecordingMeta() logic.actions.loadRecordingSnapshots() }) - .toDispatchActions(['loadRecordingMetaSuccess', 'loadRecordingSnapshotsSuccess']) + .toDispatchActions([ + 'loadRecordingMetaSuccess', + 'loadRecordingSnapshotsSuccess', + 'reportUsageIfFullyLoaded', + ]) .toFinishAllListeners() const actual = logic.values.sessionPlayerData @@ -248,7 +252,7 @@ describe('sessionRecordingDataLogic', () => { }) describe('report usage', () => { - it('send `recording loaded` event only when entire recording has loaded', async () => { + it('sends `recording loaded` event only when entire recording has loaded', async () => { await expectLogic(logic, () => { logic.actions.loadRecordingSnapshots() }) @@ -260,7 +264,7 @@ describe('sessionRecordingDataLogic', () => { ]) .toDispatchActions([eventUsageLogic.actionTypes.reportRecording]) }) - it('send `recording viewed` and `recording analyzed` event on first contentful paint', async () => { + it('sends `recording viewed` and `recording analyzed` event on first contentful paint', async () => { await expectLogic(logic, () => { logic.actions.loadRecordingSnapshots() }) @@ -271,6 +275,20 @@ describe('sessionRecordingDataLogic', () => { eventUsageLogic.actionTypes.reportRecording, // analyzed ]) }) + it('clears the cache after unmounting', async () => { + await expectLogic(logic, () => { + logic.actions.loadRecordingSnapshots() + }) + expect(Object.keys(logic.cache)).toEqual( + expect.arrayContaining(['metaStartTime', 'snapshotsStartTime', 'eventsStartTime']) + ) + expect(typeof logic.cache.metaStartTime).toBe('number') + + logic.unmount() + expect(logic.cache.metaStartTime).toBeNull() + expect(logic.cache.snapshotsStartTime).toBeNull() + expect(logic.cache.eventsStartTime).toBeNull() + }) }) describe('prepareRecordingSnapshots', () => { @@ -345,13 +363,13 @@ describe('sessionRecordingDataLogic', () => { action.type === logic.actionTypes.loadRecordingSnapshots && action.payload.source?.source === 'blob', 'loadRecordingSnapshotsSuccess', - // the response to that triggers loading of the second item which is the realtime source + // and then we report having viewed the recording + 'reportViewed', + // the response to the success action triggers loading of the second item which is the realtime source (action) => action.type === logic.actionTypes.loadRecordingSnapshots && action.payload.source?.source === 'realtime', 'loadRecordingSnapshotsSuccess', - // and then we report having viewed the recording - 'reportViewed', // having loaded any real time data we start polling to check for more 'startRealTimePolling', ]) diff --git a/frontend/src/scenes/session-recordings/player/sessionRecordingDataLogic.ts b/frontend/src/scenes/session-recordings/player/sessionRecordingDataLogic.ts index f5d6af53dcd176..cd54309a54fdb6 100644 --- a/frontend/src/scenes/session-recordings/player/sessionRecordingDataLogic.ts +++ b/frontend/src/scenes/session-recordings/player/sessionRecordingDataLogic.ts @@ -3,6 +3,7 @@ import { EventType, eventWithTime } from '@rrweb/types' import { captureException } from '@sentry/react' import { actions, + beforeUnmount, BreakPointFunction, connect, defaults, @@ -148,28 +149,25 @@ export const prepareRecordingSnapshots = ( .sort((a, b) => a.timestamp - b.timestamp) } -const generateRecordingReportDurations = ( - cache: Record, - values: Record -): RecordingReportLoadTimes => { - // TODO: This any typing is super hard to manage - we should either type it or move it to a selector. +const generateRecordingReportDurations = (cache: Record): RecordingReportLoadTimes => { return { - metadata: { - size: values.segments.length, - duration: Math.round(performance.now() - cache.metaStartTime), - }, - snapshots: { - size: (values.sessionPlayerSnapshotData?.segments ?? []).length, - duration: Math.round(performance.now() - cache.snapshotsStartTime), - }, - events: { - size: values.sessionEventsData?.length ?? 0, - duration: Math.round(performance.now() - cache.eventsStartTime), - }, - firstPaint: cache.firstPaintDurationRow, + metadata: cache.metadataLoadDuration || Math.round(performance.now() - cache.metaStartTime), + snapshots: cache.snapshotsLoadDuration || Math.round(performance.now() - cache.snapshotsStartTime), + events: cache.eventsLoadDuration || Math.round(performance.now() - cache.eventsStartTime), + firstPaint: cache.firstPaintDuration, } } +const resetTimingsCache = (cache: Record): void => { + cache.metaStartTime = null + cache.metadataLoadDuration = null + cache.snapshotsStartTime = null + cache.snapshotsLoadDuration = null + cache.eventsStartTime = null + cache.eventsLoadDuration = null + cache.firstPaintDuration = null +} + export interface SessionRecordingDataLogicProps { sessionRecordingId: SessionRecordingId realTimePollingIntervalMilliseconds?: number @@ -321,35 +319,44 @@ export const sessionRecordingDataLogic = kea([ loadRecordingSnapshots: () => { actions.loadEvents() }, + loadRecordingMetaSuccess: () => { + cache.metadataLoadDuration = Math.round(performance.now() - cache.metaStartTime) + actions.reportUsageIfFullyLoaded() + }, + loadRecordingMetaFailure: () => { + cache.metadataLoadDuration = Math.round(performance.now() - cache.metaStartTime) + }, loadRecordingSnapshotsSuccess: () => { const { snapshots, sources } = values.sessionPlayerSnapshotData ?? {} - if (snapshots && !snapshots.length && sources?.length === 1) { - // We got only a single source to load, loaded it successfully, but it had no snapshots. - posthog.capture('recording_snapshots_v2_empty_response', { - source: sources[0], - }) + if (snapshots) { + if (!snapshots.length && sources?.length === 1) { + // We got only a single source to load, loaded it successfully, but it had no snapshots. + posthog.capture('recording_snapshots_v2_empty_response', { + source: sources[0], + }) - // If we only have a realtime source and its empty, start polling it anyway - if (sources[0].source === SnapshotSourceType.realtime) { - actions.startRealTimePolling() - } + // If we only have a realtime source and its empty, start polling it anyway + if (sources[0].source === SnapshotSourceType.realtime) { + actions.startRealTimePolling() + } - return - } + return + } - cache.firstPaintDurationRow = { - size: (values.sessionPlayerSnapshotData?.snapshots ?? []).length, - duration: Math.round(performance.now() - cache.snapshotsStartTime), + if (!cache.firstPaintDuration) { + cache.firstPaintDuration = Math.round(performance.now() - cache.snapshotsStartTime) + actions.reportViewed() + } } - actions.reportUsageIfFullyLoaded() - const nextSourceToLoad = sources?.find((s) => !s.loaded) if (nextSourceToLoad) { actions.loadRecordingSnapshots(nextSourceToLoad) } else { - actions.reportViewed() + cache.snapshotsLoadDuration = Math.round(performance.now() - cache.snapshotsStartTime) + actions.reportUsageIfFullyLoaded() + // If we have a realtime source, start polling it const realTimeSource = sources?.find((s) => s.source === SnapshotSourceType.realtime) if (realTimeSource) { @@ -357,26 +364,31 @@ export const sessionRecordingDataLogic = kea([ } } }, + loadRecordingSnapshotsFailure: () => { + cache.snapshotsLoadDuration = Math.round(performance.now() - cache.snapshotsStartTime) + }, loadEventsSuccess: () => { + cache.eventsLoadDuration = Math.round(performance.now() - cache.eventsStartTime) actions.reportUsageIfFullyLoaded() }, - reportUsageIfFullyLoaded: () => { + loadEventsFailure: () => { + cache.eventsLoadDuration = Math.round(performance.now() - cache.eventsStartTime) + }, + reportUsageIfFullyLoaded: (_, breakpoint) => { + breakpoint() if (values.fullyLoaded) { eventUsageLogic.actions.reportRecording( values.sessionPlayerData, - generateRecordingReportDurations(cache, values), + generateRecordingReportDurations(cache), SessionRecordingUsageType.LOADED, 0 ) // Reset cache now that final usage report has been sent - cache.metaStartTime = null - cache.snapshotsStartTime = null - cache.eventsStartTime = null - cache.firstPaintDurationRow = null + resetTimingsCache(cache) } }, reportViewed: async (_, breakpoint) => { - const durations = generateRecordingReportDurations(cache, values) + const durations = generateRecordingReportDurations(cache) breakpoint() // Triggered on first paint eventUsageLogic.actions.reportRecording( @@ -407,10 +419,12 @@ export const sessionRecordingDataLogic = kea([ loaders(({ values, props, cache, actions }) => ({ sessionPlayerMetaData: { loadRecordingMeta: async (_, breakpoint) => { - cache.metaStartTime = performance.now() if (!props.sessionRecordingId) { return null } + + cache.metaStartTime = performance.now() + const response = await api.recordings.get(props.sessionRecordingId, { save_view: true, }) @@ -534,6 +548,10 @@ export const sessionRecordingDataLogic = kea([ null as null | RecordingEventType[], { loadEvents: async () => { + if (!cache.eventsStartTime) { + cache.eventsStartTime = performance.now() + } + const { start, end, person } = values.sessionPlayerData if (!person || !start || !end) { @@ -812,4 +830,7 @@ export const sessionRecordingDataLogic = kea([ }, ], }), + beforeUnmount(({ cache }) => { + resetTimingsCache(cache) + }), ]) diff --git a/frontend/src/scenes/settings/project/SessionRecordingSettings.tsx b/frontend/src/scenes/settings/project/SessionRecordingSettings.tsx index 34d77bd3aa3d25..dcd3b410c982d9 100644 --- a/frontend/src/scenes/settings/project/SessionRecordingSettings.tsx +++ b/frontend/src/scenes/settings/project/SessionRecordingSettings.tsx @@ -191,7 +191,10 @@ function NetworkCaptureSettings(): JSX.Element {

When network capture is enabled, we always captured network timings. Use these switches to choose - whether to capture headers and payloads of requests + whether to capture headers and payloads of requests.{' '} + + Learn how to mask header and payload values in our docs +

diff --git a/frontend/src/scenes/teamActivityDescriber.tsx b/frontend/src/scenes/teamActivityDescriber.tsx new file mode 100644 index 00000000000000..a58fd574c3ebdf --- /dev/null +++ b/frontend/src/scenes/teamActivityDescriber.tsx @@ -0,0 +1,236 @@ +import { + ActivityChange, + ActivityLogItem, + ChangeMapping, + defaultDescriber, + Description, + HumanizedChange, + userNameForLogItem, +} from 'lib/components/ActivityLog/humanizeActivity' +import { SentenceList } from 'lib/components/ActivityLog/SentenceList' +import { Link } from 'lib/lemon-ui/Link' +import { pluralize } from 'lib/utils' +import { urls } from 'scenes/urls' + +import { ActivityScope, TeamType } from '~/types' + +const teamActionsMapping: Record< + keyof TeamType, + (change?: ActivityChange, logItem?: ActivityLogItem) => ChangeMapping | null +> = { + // session replay + session_recording_minimum_duration_milliseconds: (change) => { + const after = change?.after + if (after === undefined || typeof after !== 'number') { + return null + } + let prefix = 'changed' + if (change?.action === 'created') { + prefix = 'set' + } + return { + description: [ + <> + {prefix} the minimum session recording duration to {after / 1000} seconds + , + ], + } + }, + capture_console_log_opt_in(change: ActivityChange | undefined): ChangeMapping | null { + return { description: [<>{change?.after ? 'enabled' : 'disabled'} console log capture in session replay] } + }, + capture_performance_opt_in(change: ActivityChange | undefined): ChangeMapping | null { + return { + description: [ + <>{change?.after ? 'enabled' : 'disabled'} console network performance capture in session replay, + ], + } + }, + recording_domains(change: ActivityChange | undefined): ChangeMapping | null { + const before: string[] | null = Array.isArray(change?.before) ? change!.before : null + const after: string[] | null = Array.isArray(change?.after) ? change!.after : null + if (after === null && before === null) { + return null + } + + const descriptions = [] + + const adds: string[] = [] + if (after) { + for (const domain of after) { + if ((!before || !before.includes(domain)) && domain.trim().length > 0) { + adds.push(domain) + } + } + } + if (adds.length) { + descriptions.push( + <> + added {adds.join(', ')} to session recording authorised{' '} + {pluralize(adds.length, 'domain', 'domains', false)} + + ) + } + + const removes: string[] = [] + if (before) { + for (const domain of before) { + if ((!after || !after.includes(domain)) && domain.trim().length > 0) { + removes.push(domain) + } + } + } + + if (removes.length) { + descriptions.push( + <> + removed {removes.join(', ')} from session recording authorised{' '} + {pluralize(removes.length, 'domain', 'domains', false)} + + ) + } + return { description: descriptions } + }, + session_recording_linked_flag(change: ActivityChange | undefined): ChangeMapping | null { + return { + description: [ + <> + {change?.after ? 'linked' : 'unlinked'} session recording to feature flag {change?.after} + , + ], + } + }, + session_recording_network_payload_capture_config(change: ActivityChange | undefined): ChangeMapping | null { + return { + description: [<>{change?.after ? 'enabled' : 'disabled'} network payload capture in session replay], + } + }, + session_recording_opt_in(change: ActivityChange | undefined): ChangeMapping | null { + return { description: [<>{change?.after ? 'enabled' : 'disabled'} session recording] } + }, + session_recording_sample_rate(change: ActivityChange | undefined): ChangeMapping | null { + return { + description: [ + <> + {change?.action === 'created' ? 'set' : 'changed'} the session recording sample rate to{' '} + {change?.after}% + , + ], + } + }, + session_replay_config(_change: ActivityChange | undefined): ChangeMapping | null { + // TODO we'll eventually need a deeper mapping for this nested object + const recordCanvasAfter = typeof _change?.after === 'object' ? _change?.after?.record_canvas : null + + if (recordCanvasAfter === null) { + return null + } + return { description: [<>{recordCanvasAfter ? 'enabled' : 'disabled'} canvas recording in session replay] } + }, + // autocapture + autocapture_exceptions_errors_to_ignore: () => null, + autocapture_exceptions_opt_in: () => null, + autocapture_opt_out(change: ActivityChange | undefined): ChangeMapping | null { + return { description: [<>{change?.after ? 'enabled' : 'disabled'} autocapture] } + }, + // and.... many more + name(change: ActivityChange | undefined): ChangeMapping | null { + return { + description: [ + <> + {change?.action === 'created' ? 'set' : 'changed'} the team name to {change?.after} + , + ], + } + }, + // TODO if I had to test and describe every single one of this I'd never release this + // we can add descriptions here as the need arises + access_control: () => null, + anonymize_ips: () => null, + app_urls: () => null, + completed_snippet_onboarding: () => null, + correlation_config: () => null, + data_attributes: () => null, + effective_membership_level: () => null, + groups_on_events_querying_enabled: () => null, + has_group_types: () => null, + ingested_event: () => null, + is_demo: () => null, + live_events_columns: () => null, + organization: () => null, + path_cleaning_filters: () => null, + person_display_name_properties: () => null, + person_on_events_querying_enabled: () => null, + primary_dashboard: () => null, + slack_incoming_webhook: () => null, + test_account_filters: () => null, + test_account_filters_default_checked: () => null, + timezone: () => null, + surveys_opt_in: () => null, + week_start_day: () => null, + extra_settings: () => null, + has_completed_onboarding_for: () => null, + // should never come from the backend + created_at: () => null, + api_token: () => null, + id: () => null, + updated_at: () => null, + uuid: () => null, +} + +function nameAndLink(logItem?: ActivityLogItem): JSX.Element { + return logItem?.detail?.short_id ? ( + {logItem?.detail.name || 'unknown'} + ) : logItem?.detail.name ? ( + <>{logItem?.detail.name} + ) : ( + Untitled + ) +} + +export function teamActivityDescriber(logItem: ActivityLogItem, asNotification?: boolean): HumanizedChange { + if (logItem.scope !== ActivityScope.TEAM) { + console.error('team describer received a non-Team activity') + return { description: null } + } + + if (logItem.activity == 'changed' || logItem.activity == 'updated') { + let changes: Description[] = [] + let changeSuffix: Description = <>on {nameAndLink(logItem)} + + for (const change of logItem.detail.changes || []) { + if (!change?.field || !teamActionsMapping[change.field]) { + continue // not all notebook fields are describable + } + + const actionHandler = teamActionsMapping[change.field] + const processedChange = actionHandler(change, logItem) + if (processedChange === null) { + continue // // unexpected log from backend is indescribable + } + + const { description, suffix } = processedChange + if (description) { + changes = changes.concat(description) + } + + if (suffix) { + changeSuffix = suffix + } + } + + if (changes.length) { + return { + description: ( + {userNameForLogItem(logItem)}} + suffix={changeSuffix} + /> + ), + } + } + } + + return defaultDescriber(logItem, asNotification, nameAndLink(logItem)) +} diff --git a/frontend/src/scenes/web-analytics/WebAnalyticsModal.tsx b/frontend/src/scenes/web-analytics/WebAnalyticsModal.tsx index 438486433e5c7e..422f38acbefd06 100644 --- a/frontend/src/scenes/web-analytics/WebAnalyticsModal.tsx +++ b/frontend/src/scenes/web-analytics/WebAnalyticsModal.tsx @@ -30,7 +30,7 @@ export const WebAnalyticsModal = (): JSX.Element | null => { fullScreen={false} closable={true} > -
+
{ query={modal.query} insightProps={modal.insightProps} showIntervalSelect={modal.showIntervalSelect} + showPathCleaningControls={modal.showPathCleaningControls} />
diff --git a/frontend/src/scenes/web-analytics/WebAnalyticsTile.tsx b/frontend/src/scenes/web-analytics/WebAnalyticsTile.tsx index 542f94a9b32272..c89754e0951d9e 100644 --- a/frontend/src/scenes/web-analytics/WebAnalyticsTile.tsx +++ b/frontend/src/scenes/web-analytics/WebAnalyticsTile.tsx @@ -1,8 +1,12 @@ +import { IconGear } from '@posthog/icons' import { useActions, useValues } from 'kea' import { IntervalFilterStandalone } from 'lib/components/IntervalFilter' +import { LemonButton } from 'lib/lemon-ui/LemonButton' +import { LemonSwitch } from 'lib/lemon-ui/LemonSwitch' import { UnexpectedNeverError } from 'lib/utils' import { useCallback, useMemo } from 'react' import { countryCodeToFlag, countryCodeToName } from 'scenes/insights/views/WorldMap' +import { urls } from 'scenes/urls' import { DeviceTab, GeographyTab, webAnalyticsLogic } from 'scenes/web-analytics/webAnalyticsLogic' import { Query } from '~/queries/Query/Query' @@ -307,12 +311,16 @@ export const WebStatsTableTile = ({ query, breakdownBy, insightProps, + showPathCleaningControls, }: { query: DataTableNode breakdownBy: WebStatsBreakdown insightProps: InsightLogicProps + showPathCleaningControls?: boolean }): JSX.Element => { - const { togglePropertyFilter } = useActions(webAnalyticsLogic) + const { togglePropertyFilter, setIsPathCleaningEnabled } = useActions(webAnalyticsLogic) + const { isPathCleaningEnabled } = useValues(webAnalyticsLogic) + const { key, type } = webStatsBreakdownToPropertyName(breakdownBy) || {} const onClick = useCallback( @@ -327,6 +335,15 @@ export const WebStatsTableTile = ({ const context = useMemo((): QueryContext => { const rowProps: QueryContext['rowProps'] = (record: unknown) => { + if ( + (breakdownBy === WebStatsBreakdown.InitialPage || breakdownBy === WebStatsBreakdown.Page) && + isPathCleaningEnabled + ) { + // if the path cleaning is enabled, don't allow toggling a path by clicking a row, as this wouldn't + // work due to the order that the regex and filters are applied + return {} + } + const breakdownValue = getBreakdownValue(record, breakdownBy) if (breakdownValue === undefined) { return {} @@ -342,7 +359,37 @@ export const WebStatsTableTile = ({ } }, [onClick, insightProps]) - return + const pathCleaningSettingsUrl = urls.settings('project-product-analytics', 'path-cleaning') + return ( +
+ {showPathCleaningControls && ( +
+
+ + Enable path cleaning + } + type="tertiary" + status="alt" + size="small" + noPadding={true} + tooltip="Edit path cleaning settings" + to={pathCleaningSettingsUrl} + /> +
+ } + checked={isPathCleaningEnabled} + onChange={setIsPathCleaningEnabled} + className="h-full" + /> +
+
+ )} + +
+ ) } const getBreakdownValue = (record: unknown, breakdownBy: WebStatsBreakdown): string | undefined => { @@ -383,14 +430,23 @@ const getBreakdownValue = (record: unknown, breakdownBy: WebStatsBreakdown): str export const WebQuery = ({ query, showIntervalSelect, + showPathCleaningControls, insightProps, }: { query: QuerySchema showIntervalSelect?: boolean + showPathCleaningControls?: boolean insightProps: InsightLogicProps }): JSX.Element => { if (query.kind === NodeKind.DataTableNode && query.source.kind === NodeKind.WebStatsTableQuery) { - return + return ( + + ) } if (query.kind === NodeKind.InsightVizNode) { return diff --git a/frontend/src/scenes/web-analytics/WebDashboard.tsx b/frontend/src/scenes/web-analytics/WebDashboard.tsx index e7fc272557d5ef..638c0c73ba4e90 100644 --- a/frontend/src/scenes/web-analytics/WebDashboard.tsx +++ b/frontend/src/scenes/web-analytics/WebDashboard.tsx @@ -66,7 +66,7 @@ const Tiles = (): JSX.Element => { } const QueryTileItem = ({ tile }: { tile: QueryTile }): JSX.Element => { - const { query, title, layout, insightProps } = tile + const { query, title, layout, insightProps, showPathCleaningControls, showIntervalSelect } = tile const { openModal } = useActions(webAnalyticsLogic) const { getNewInsightUrl } = useValues(webAnalyticsLogic) @@ -107,7 +107,12 @@ const QueryTileItem = ({ tile }: { tile: QueryTile }): JSX.Element => { )} > {title &&

{title}

} - + {buttonsRow.length > 0 ?
{buttonsRow}
: null}
) @@ -137,6 +142,7 @@ const TabsTileItem = ({ tile }: { tile: TabsTile }): JSX.Element => { key={tab.id} query={tab.query} showIntervalSelect={tab.showIntervalSelect} + showPathCleaningControls={tab.showPathCleaningControls} insightProps={tab.insightProps} /> ), diff --git a/frontend/src/scenes/web-analytics/webAnalyticsLogic.ts b/frontend/src/scenes/web-analytics/webAnalyticsLogic.ts index f3a871d4cde4c8..fea26dcf8fa8e1 100644 --- a/frontend/src/scenes/web-analytics/webAnalyticsLogic.ts +++ b/frontend/src/scenes/web-analytics/webAnalyticsLogic.ts @@ -71,6 +71,8 @@ interface BaseTile { export interface QueryTile extends BaseTile { title?: string query: QuerySchema + showIntervalSelect?: boolean + showPathCleaningControls?: boolean insightProps: InsightLogicProps canOpenModal: boolean canOpenInsight?: boolean @@ -85,6 +87,7 @@ export interface TabsTile extends BaseTile { linkText: string query: QuerySchema showIntervalSelect?: boolean + showPathCleaningControls?: boolean insightProps: InsightLogicProps canOpenModal?: boolean canOpenInsight?: boolean @@ -100,6 +103,7 @@ export interface WebDashboardModalQuery { query: QuerySchema insightProps: InsightLogicProps showIntervalSelect?: boolean + showPathCleaningControls?: boolean canOpenInsight?: boolean } @@ -196,6 +200,7 @@ export const webAnalyticsLogic = kea([ setGeographyTab: (tab: string) => ({ tab }), setDates: (dateFrom: string | null, dateTo: string | null) => ({ dateFrom, dateTo }), setInterval: (interval: IntervalType) => ({ interval }), + setIsPathCleaningEnabled: (isPathCleaningEnabled: boolean) => ({ isPathCleaningEnabled }), setStateFromUrl: (state: { filters: WebAnalyticsPropertyFilters dateFrom: string | null @@ -206,6 +211,7 @@ export const webAnalyticsLogic = kea([ deviceTab: string | null pathTab: string | null geographyTab: string | null + isPathCleaningEnabled: boolean | null }) => ({ state, }), @@ -308,6 +314,13 @@ export const webAnalyticsLogic = kea([ togglePropertyFilter: (oldTab, { tabChange }) => tabChange?.geographyTab || oldTab, }, ], + isPathCleaningEnabled: [ + false as boolean, + { + setIsPathCleaningEnabled: (_, { isPathCleaningEnabled }) => isPathCleaningEnabled, + setStateFromUrl: (_, { state }) => state.isPathCleaningEnabled || false, + }, + ], _modalTileAndTab: [ null as { tileId: TileId; tabId?: string } | null, { @@ -367,6 +380,7 @@ export const webAnalyticsLogic = kea([ s.pathTab, s.geographyTab, s.dateFilter, + s.isPathCleaningEnabled, () => values.statusCheck, () => values.isGreaterThanMd, () => values.shouldShowGeographyTile, @@ -379,6 +393,7 @@ export const webAnalyticsLogic = kea([ pathTab, geographyTab, { dateFrom, dateTo, interval }, + isPathCleaningEnabled: boolean, statusCheck, isGreaterThanMd: boolean, shouldShowGeographyTile @@ -552,12 +567,14 @@ export const webAnalyticsLogic = kea([ includeScrollDepth: statusCheck?.isSendingPageLeavesScroll, includeBounceRate: true, sampling, + doPathCleaning: isPathCleaningEnabled, limit: 10, }, embedded: false, }, insightProps: createInsightProps(TileId.PATHS, PathTab.PATH), canOpenModal: true, + showPathCleaningControls: true, }, { id: PathTab.INITIAL_PATH, @@ -573,12 +590,14 @@ export const webAnalyticsLogic = kea([ dateRange, includeScrollDepth: statusCheck?.isSendingPageLeavesScroll, sampling, + doPathCleaning: isPathCleaningEnabled, limit: 10, }, embedded: false, }, insightProps: createInsightProps(TileId.PATHS, PathTab.INITIAL_PATH), canOpenModal: true, + showPathCleaningControls: true, }, ], }, @@ -990,6 +1009,7 @@ export const webAnalyticsLogic = kea([ tabId, title: tab.title, showIntervalSelect: tab.showIntervalSelect, + showPathCleaningControls: tab.showPathCleaningControls, insightProps: { dashboardItemId: getDashboardItemId(tileId, tabId, true), loadPriority: 0, @@ -1004,6 +1024,8 @@ export const webAnalyticsLogic = kea([ return { tileId, title: tile.title, + showIntervalSelect: tile.showIntervalSelect, + showPathCleaningControls: tile.showPathCleaningControls, insightProps: { dashboardItemId: getDashboardItemId(tileId, undefined, true), loadPriority: 0, @@ -1188,6 +1210,7 @@ export const webAnalyticsLogic = kea([ pathTab, geographyTab, graphsTab, + isPathCleaningEnabled, } = values const urlParams = new URLSearchParams() @@ -1214,6 +1237,9 @@ export const webAnalyticsLogic = kea([ if (geographyTab) { urlParams.set('geography_tab', geographyTab) } + if (isPathCleaningEnabled) { + urlParams.set('path_cleaning', isPathCleaningEnabled.toString()) + } return `/web?${urlParams.toString()}` } @@ -1233,7 +1259,18 @@ export const webAnalyticsLogic = kea([ urlToAction(({ actions }) => ({ '/web': ( _, - { filters, date_from, date_to, interval, device_tab, source_tab, graphs_tab, path_tab, geography_tab } + { + filters, + date_from, + date_to, + interval, + device_tab, + source_tab, + graphs_tab, + path_tab, + geography_tab, + path_cleaning, + } ) => { const parsedFilters = isWebAnalyticsPropertyFilters(filters) ? filters : initialWebAnalyticsFilter @@ -1247,6 +1284,7 @@ export const webAnalyticsLogic = kea([ graphsTab: graphs_tab || null, pathTab: path_tab || null, geographyTab: geography_tab || null, + isPathCleaningEnabled: [true, 'true', 1, '1'].includes(path_cleaning), }) }, })), diff --git a/frontend/src/types.ts b/frontend/src/types.ts index 81f7c6b7cb1043..5b4a051c99edcb 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -3316,16 +3316,11 @@ export interface OrganizationResourcePermissionType { created_by: UserBaseType | null } -export interface RecordingReportLoadTimeRow { - size?: number - duration: number -} - export interface RecordingReportLoadTimes { - metadata: RecordingReportLoadTimeRow - snapshots: RecordingReportLoadTimeRow - events: RecordingReportLoadTimeRow - firstPaint: RecordingReportLoadTimeRow + metadata: number + snapshots: number + events: number + firstPaint: number } export type JsonType = string | number | boolean | null | { [key: string]: JsonType } | Array @@ -3369,6 +3364,7 @@ export enum ActivityScope { SURVEY = 'Survey', EARLY_ACCESS_FEATURE = 'EarlyAccessFeature', COMMENT = 'Comment', + TEAM = 'Team', } export type CommentType = { diff --git a/posthog/api/exports.py b/posthog/api/exports.py index 1317065da65200..e5fbc85fbb2190 100644 --- a/posthog/api/exports.py +++ b/posthog/api/exports.py @@ -1,8 +1,6 @@ from datetime import timedelta from typing import Any, Dict -import celery -import requests.exceptions import structlog from django.http import HttpResponse from django.utils.timezone import now @@ -67,24 +65,29 @@ def validate(self, data: Dict) -> Dict: return data def synthetic_create(self, reason: str, *args: Any, **kwargs: Any) -> ExportedAsset: - return self._create_asset(self.validated_data, user=None, asset_generation_timeout=0.01, reason=reason) + return self._create_asset(self.validated_data, user=None, reason=reason) def create(self, validated_data: Dict, *args: Any, **kwargs: Any) -> ExportedAsset: request = self.context["request"] - return self._create_asset(validated_data, user=request.user, asset_generation_timeout=10, reason=None) + return self._create_asset(validated_data, user=request.user, reason=None) def _create_asset( self, validated_data: Dict, user: User | None, - asset_generation_timeout: float, reason: str | None, ) -> ExportedAsset: if user is not None: validated_data["created_by"] = user instance: ExportedAsset = super().create(validated_data) - self.generate_export_sync(instance, timeout=asset_generation_timeout) + + if instance.export_format not in ExportedAsset.SUPPORTED_FORMATS: + raise serializers.ValidationError( + {"export_format": [f"Export format {instance.export_format} is not supported."]} + ) + + exporter.export_asset.delay(instance.id) if user is not None: report_user_action(user, "export created", instance.get_analytics_metadata()) @@ -127,24 +130,6 @@ def _create_asset( pass return instance - @staticmethod - def generate_export_sync(instance: ExportedAsset, timeout: float = 10) -> None: - task = exporter.export_asset.delay(instance.id) - try: - task.get(timeout=timeout) - instance.refresh_from_db() - except celery.exceptions.TimeoutError: - # If the rendering times out - fine, the frontend will poll instead for the response - pass - except requests.exceptions.MissingSchema: - # regression test see https://github.com/PostHog/posthog/issues/11204 - pass - except NotImplementedError as ex: - logger.error("exporters.unsupported_export_type", exception=ex, exc_info=True) - raise serializers.ValidationError( - {"export_format": ["This type of export is not supported for this resource."]} - ) - class ExportedAssetViewSet( mixins.RetrieveModelMixin, diff --git a/posthog/api/organization.py b/posthog/api/organization.py index 38e387e1eee2fc..8ee366c906014e 100644 --- a/posthog/api/organization.py +++ b/posthog/api/organization.py @@ -137,7 +137,7 @@ class OrganizationViewSet(viewsets.ModelViewSet): def get_permissions(self): if self.request.method == "POST": # Cannot use `OrganizationMemberPermissions` or `OrganizationAdminWritePermissions` - # because they require an existing org, unneded anyways because permissions are organization-based + # because they require an existing org, unneeded anyway because permissions are organization-based return [ permission() for permission in [ diff --git a/posthog/api/team.py b/posthog/api/team.py index 79529c23052c4c..2ca4a00ce575d0 100644 --- a/posthog/api/team.py +++ b/posthog/api/team.py @@ -4,6 +4,7 @@ from django.core.cache import cache from django.shortcuts import get_object_or_404 +from loginas.utils import is_impersonated_session from rest_framework import ( exceptions, permissions, @@ -19,6 +20,14 @@ from posthog.constants import AvailableFeature from posthog.event_usage import report_user_action from posthog.models import InsightCachingState, Organization, Team, User +from posthog.models.activity_logging.activity_log import ( + log_activity, + Detail, + Change, + load_activity, + dict_changes_between, +) +from posthog.models.activity_logging.activity_page import activity_page_response from posthog.models.async_deletion import AsyncDeletion, DeletionType from posthog.models.group_type_mapping import GroupTypeMapping from posthog.models.organization import OrganizationMembership @@ -28,7 +37,7 @@ set_team_in_cache, ) from posthog.models.team.util import delete_batch_exports, delete_bulky_postgres_data -from posthog.models.utils import generate_random_token_project +from posthog.models.utils import generate_random_token_project, UUIDT from posthog.permissions import ( CREATE_METHODS, OrganizationAdminWritePermissions, @@ -280,6 +289,18 @@ def create(self, validated_data: Dict[str, Any], **kwargs) -> Team: request.user.current_team = team request.user.team = request.user.current_team # Update cached property request.user.save() + + log_activity( + organization_id=organization.id, + team_id=team.pk, + user=request.user, + was_impersonated=is_impersonated_session(request), + scope="Team", + item_id=team.pk, + activity="created", + detail=Detail(name=str(team.name)), + ) + return team def _handle_timezone_update(self, team: Team) -> None: @@ -288,10 +309,28 @@ def _handle_timezone_update(self, team: Team) -> None: cache.delete_many(hashes) def update(self, instance: Team, validated_data: Dict[str, Any]) -> Team: + before_update = instance.__dict__.copy() + if "timezone" in validated_data and validated_data["timezone"] != instance.timezone: self._handle_timezone_update(instance) updated_team = super().update(instance, validated_data) + changes = dict_changes_between("Team", before_update, updated_team.__dict__, use_field_exclusions=True) + + log_activity( + organization_id=cast(UUIDT, instance.organization_id), + team_id=instance.pk, + user=cast(User, self.context["request"].user), + was_impersonated=is_impersonated_session(request), + scope="Team", + item_id=instance.pk, + activity="updated", + detail=Detail( + name=str(instance.name), + changes=changes, + ), + ) + return updated_team @@ -370,6 +409,8 @@ def team(self): def perform_destroy(self, team: Team): team_id = team.pk + organization_id = team.organization_id + team_name = team.name user = cast(User, self.request.user) @@ -392,6 +433,16 @@ def perform_destroy(self, team: Team): ignore_conflicts=True, ) + log_activity( + organization_id=cast(UUIDT, organization_id), + team_id=team_id, + user=user, + was_impersonated=is_impersonated_session(self.request), + scope="Team", + item_id=team_id, + activity="deleted", + detail=Detail(name=str(team_name)), + ) # TRICKY: We pass in Team here as otherwise the access to "current_team" can fail if it was deleted report_user_action(user, f"team deleted", team=team) @@ -409,6 +460,27 @@ def reset_token(self, request: request.Request, id: str, **kwargs) -> response.R old_token = team.api_token team.api_token = generate_random_token_project() team.save() + + log_activity( + organization_id=team.organization_id, + team_id=team.pk, + user=cast(User, request.user), + was_impersonated=is_impersonated_session(request), + scope="Team", + item_id=team.pk, + activity="updated", + detail=Detail( + name=str(team.name), + changes=[ + Change( + type="Team", + action="changed", + field="api_token", + ) + ], + ), + ) + set_team_in_cache(old_token, None) return response.Response(TeamSerializer(team, context=self.get_serializer_context()).data) @@ -422,6 +494,22 @@ def is_generating_demo_data(self, request: request.Request, id: str, **kwargs) - cache_key = f"is_generating_demo_data_{team.pk}" return response.Response({"is_generating_demo_data": cache.get(cache_key) == "True"}) + @action(methods=["GET"], detail=True) + def activity(self, request: request.Request, **kwargs): + limit = int(request.query_params.get("limit", "10")) + page = int(request.query_params.get("page", "1")) + + team = self.get_object() + + activity_page = load_activity( + scope="Team", + team_id=team.pk, + item_ids=[str(team.pk)], + limit=limit, + page=page, + ) + return activity_page_response(activity_page, limit, page, request) + @cached_property def user_permissions(self): team = self.get_object() if self.action == "reset_token" else None diff --git a/posthog/api/test/dashboards/__snapshots__/test_dashboard.ambr b/posthog/api/test/dashboards/__snapshots__/test_dashboard.ambr index 197132c69be545..9456ed8ed07eef 100644 --- a/posthog/api/test/dashboards/__snapshots__/test_dashboard.ambr +++ b/posthog/api/test/dashboards/__snapshots__/test_dashboard.ambr @@ -1235,33 +1235,6 @@ 5 /* ... */)) /*controller='project_dashboards-detail',route='api/projects/%28%3FP%3Cparent_lookup_team_id%3E%5B%5E/.%5D%2B%29/dashboards/%28%3FP%3Cpk%3E%5B%5E/.%5D%2B%29/%3F%24'*/ ''' # --- -# name: TestDashboard.test_adding_insights_is_not_nplus1_for_gets.32 - ''' - SELECT "posthog_dashboard"."id", - "posthog_dashboard"."name", - "posthog_dashboard"."description", - "posthog_dashboard"."team_id", - "posthog_dashboard"."pinned", - "posthog_dashboard"."created_at", - "posthog_dashboard"."created_by_id", - "posthog_dashboard"."deleted", - "posthog_dashboard"."last_accessed_at", - "posthog_dashboard"."filters", - "posthog_dashboard"."creation_mode", - "posthog_dashboard"."restriction_level", - "posthog_dashboard"."deprecated_tags", - "posthog_dashboard"."tags", - "posthog_dashboard"."share_token", - "posthog_dashboard"."is_shared" - FROM "posthog_dashboard" - WHERE (NOT ("posthog_dashboard"."deleted") - AND "posthog_dashboard"."id" IN (1, - 2, - 3, - 4, - 5 /* ... */)) /*controller='project_dashboards-detail',route='api/projects/%28%3FP%3Cparent_lookup_team_id%3E%5B%5E/.%5D%2B%29/dashboards/%28%3FP%3Cpk%3E%5B%5E/.%5D%2B%29/%3F%24'*/ - ''' -# --- # name: TestDashboard.test_adding_insights_is_not_nplus1_for_gets.4 ''' SELECT "posthog_dashboard"."id", @@ -2721,24 +2694,6 @@ 5 /* ... */) /*controller='project_dashboards-list',route='api/projects/%28%3FP%3Cparent_lookup_team_id%3E%5B%5E/.%5D%2B%29/dashboards/%3F%24'*/ ''' # --- -# name: TestDashboard.test_listing_dashboards_is_not_nplus1.57 - ''' - SELECT "posthog_sharingconfiguration"."id", - "posthog_sharingconfiguration"."team_id", - "posthog_sharingconfiguration"."dashboard_id", - "posthog_sharingconfiguration"."insight_id", - "posthog_sharingconfiguration"."recording_id", - "posthog_sharingconfiguration"."created_at", - "posthog_sharingconfiguration"."enabled", - "posthog_sharingconfiguration"."access_token" - FROM "posthog_sharingconfiguration" - WHERE "posthog_sharingconfiguration"."dashboard_id" IN (1, - 2, - 3, - 4, - 5 /* ... */) /*controller='project_dashboards-list',route='api/projects/%28%3FP%3Cparent_lookup_team_id%3E%5B%5E/.%5D%2B%29/dashboards/%3F%24'*/ - ''' -# --- # name: TestDashboard.test_listing_dashboards_is_not_nplus1.6 ''' SELECT "posthog_team"."id", @@ -8395,17 +8350,6 @@ LIMIT 1 /*controller='project_dashboards-detail',route='api/projects/%28%3FP%3Cparent_lookup_team_id%3E%5B%5E/.%5D%2B%29/dashboards/%28%3FP%3Cpk%3E%5B%5E/.%5D%2B%29/%3F%24'*/ ''' # --- -# name: TestDashboard.test_loading_individual_dashboard_does_not_prefetch_all_possible_tiles.315 - ''' - SELECT "posthog_instancesetting"."id", - "posthog_instancesetting"."key", - "posthog_instancesetting"."raw_value" - FROM "posthog_instancesetting" - WHERE "posthog_instancesetting"."key" = 'constance:posthog:PERSON_ON_EVENTS_ENABLED' - ORDER BY "posthog_instancesetting"."id" ASC - LIMIT 1 /*controller='project_dashboards-detail',route='api/projects/%28%3FP%3Cparent_lookup_team_id%3E%5B%5E/.%5D%2B%29/dashboards/%28%3FP%3Cpk%3E%5B%5E/.%5D%2B%29/%3F%24'*/ - ''' -# --- # name: TestDashboard.test_loading_individual_dashboard_does_not_prefetch_all_possible_tiles.32 ''' SELECT "posthog_team"."id", @@ -11655,24 +11599,6 @@ 5 /* ... */) /*controller='project_dashboards-list',route='api/projects/%28%3FP%3Cparent_lookup_team_id%3E%5B%5E/.%5D%2B%29/dashboards/%3F%24'*/ ''' # --- -# name: TestDashboard.test_retrieve_dashboard_list.33 - ''' - SELECT "posthog_sharingconfiguration"."id", - "posthog_sharingconfiguration"."team_id", - "posthog_sharingconfiguration"."dashboard_id", - "posthog_sharingconfiguration"."insight_id", - "posthog_sharingconfiguration"."recording_id", - "posthog_sharingconfiguration"."created_at", - "posthog_sharingconfiguration"."enabled", - "posthog_sharingconfiguration"."access_token" - FROM "posthog_sharingconfiguration" - WHERE "posthog_sharingconfiguration"."dashboard_id" IN (1, - 2, - 3, - 4, - 5 /* ... */) /*controller='project_dashboards-list',route='api/projects/%28%3FP%3Cparent_lookup_team_id%3E%5B%5E/.%5D%2B%29/dashboards/%3F%24'*/ - ''' -# --- # name: TestDashboard.test_retrieve_dashboard_list.4 ''' SELECT "posthog_dashboardtile"."id" diff --git a/posthog/api/test/dashboards/test_dashboard.py b/posthog/api/test/dashboards/test_dashboard.py index 74216d22804532..e1cf928728073f 100644 --- a/posthog/api/test/dashboards/test_dashboard.py +++ b/posthog/api/test/dashboards/test_dashboard.py @@ -874,7 +874,16 @@ def test_dashboard_duplication_can_duplicate_tiles_without_editing_name_if_there } ) - assert duplicate_response["tiles"][0]["insight"]["name"] is None + # this test only needs to check that insight name is still None, + # but it flaps in CI. + # my guess is that the order of the response is not guaranteed + assert duplicate_response is not None + assert len(duplicate_response.get("tiles", [])) == 2 + + insight_tile = next(tile for tile in duplicate_response["tiles"] if "insight" in tile) + text_tile = next(tile for tile in duplicate_response["tiles"] if "text" in tile) + assert insight_tile["insight"]["name"] is None + assert text_tile is not None def test_dashboard_duplication(self): existing_dashboard = Dashboard.objects.create(team=self.team, name="existing dashboard", created_by=self.user) diff --git a/posthog/api/test/test_cohort.py b/posthog/api/test/test_cohort.py index 019ee47b9fe741..a1322f0f4784ab 100644 --- a/posthog/api/test/test_cohort.py +++ b/posthog/api/test/test_cohort.py @@ -295,7 +295,7 @@ def test_csv_export_new(self): lines = self._get_export_output(f"/api/cohort/{cohort.pk}/persons") headers = lines[0].split(",") self.assertEqual(len(lines), 3) - self.assertEqual(lines[1].split(",")[headers.index("email")], "test@test.com") + self.assertEqual(lines[1].split(",")[headers.index("properties.email")], "test@test.com") self.assertEqual(lines[0].count("distinct_id"), 10) def test_filter_by_cohort(self): diff --git a/posthog/api/test/test_exports.py b/posthog/api/test/test_exports.py index e96c192a43c9d0..68c1da84f30fe2 100644 --- a/posthog/api/test/test_exports.py +++ b/posthog/api/test/test_exports.py @@ -145,7 +145,7 @@ def test_swallow_missing_schema_and_allow_front_end_to_poll(self, mock_exporter_ def test_can_create_new_valid_export_insight(self, mock_exporter_task) -> None: response = self.client.post( f"/api/projects/{self.team.id}/exports", - {"export_format": "application/pdf", "insight": self.insight.id}, + {"export_format": "image/png", "insight": self.insight.id}, ) self.assertEqual(response.status_code, status.HTTP_201_CREATED) data = response.json() @@ -155,8 +155,8 @@ def test_can_create_new_valid_export_insight(self, mock_exporter_task) -> None: "id": data["id"], "created_at": data["created_at"], "insight": self.insight.id, - "export_format": "application/pdf", - "filename": "export-example-insight.pdf", + "export_format": "image/png", + "filename": "export-example-insight.png", "has_content": False, "dashboard": None, "export_context": None, @@ -183,7 +183,7 @@ def test_can_create_new_valid_export_insight(self, mock_exporter_task) -> None: "changes": [ { "action": "exported", - "after": "application/pdf", + "after": "image/png", "before": None, "field": "export_format", "type": "Insight", @@ -231,7 +231,7 @@ def test_will_respond_even_if_task_timesout(self, mock_exporter_task) -> None: mock_exporter_task.export_asset.delay.return_value.get.side_effect = celery.exceptions.TimeoutError("timed out") response = self.client.post( f"/api/projects/{self.team.id}/exports", - {"export_format": "application/pdf", "insight": self.insight.id}, + {"export_format": "image/png", "insight": self.insight.id}, ) self.assertEqual(response.status_code, status.HTTP_201_CREATED) @@ -248,7 +248,7 @@ def test_will_error_if_export_unsupported(self, mock_exporter_task) -> None: { "attr": "export_format", "code": "invalid_input", - "detail": "This type of export is not supported for this resource.", + "detail": "Export format application/pdf is not supported.", "type": "validation_error", }, ) @@ -367,7 +367,14 @@ def test_can_download_a_csv(self, patched_request) -> None: after = (datetime.now() - timedelta(minutes=10)).isoformat() def requests_side_effect(*args, **kwargs): - return self.client.get(kwargs["url"], kwargs["json"], **kwargs["headers"]) + response = self.client.get(kwargs["url"], kwargs["json"], **kwargs["headers"]) + + def raise_for_status(): + if 400 <= response.status_code < 600: + raise requests.exceptions.HTTPError(response=response) + + response.raise_for_status = raise_for_status # type: ignore[attr-defined] + return response patched_request.side_effect = requests_side_effect @@ -446,7 +453,14 @@ def _get_export_output(self, path: str) -> List[str]: with patch("posthog.tasks.exports.csv_exporter.requests.request") as patched_request: def requests_side_effect(*args, **kwargs): - return self.client.get(kwargs["url"], kwargs["json"], **kwargs["headers"]) + response = self.client.get(kwargs["url"], kwargs["json"], **kwargs["headers"]) + + def raise_for_status(): + if 400 <= response.status_code < 600: + raise requests.exceptions.HTTPError(response=response) + + response.raise_for_status = raise_for_status # type: ignore[attr-defined] + return response patched_request.side_effect = requests_side_effect diff --git a/posthog/api/test/test_team.py b/posthog/api/test/test_team.py index 7f1df628558e44..b172a588639796 100644 --- a/posthog/api/test/test_team.py +++ b/posthog/api/test/test_team.py @@ -1,10 +1,12 @@ import json -from typing import List, cast +import uuid +from typing import List, cast, Dict, Optional from unittest import mock -from unittest.mock import MagicMock, call, patch +from unittest.mock import MagicMock, call, patch, ANY from asgiref.sync import sync_to_async from django.core.cache import cache +from freezegun import freeze_time from parameterized import parameterized from rest_framework import status from temporalio.service import RPCError @@ -12,7 +14,7 @@ from posthog.api.test.batch_exports.conftest import start_test_worker from posthog.temporal.common.schedule import describe_schedule from posthog.constants import AvailableFeature -from posthog.models import EarlyAccessFeature +from posthog.models import EarlyAccessFeature, ActivityLog from posthog.models.async_deletion.async_deletion import AsyncDeletion, DeletionType from posthog.models.dashboard import Dashboard from posthog.models.instance_setting import get_instance_setting @@ -24,6 +26,22 @@ class TestTeamAPI(APIBaseTest): + def _assert_activity_log(self, expected: List[Dict], team_id: Optional[int] = None) -> None: + if not team_id: + team_id = self.team.pk + + starting_log_response = self.client.get(f"/api/projects/{team_id}/activity") + assert starting_log_response.status_code == 200 + assert starting_log_response.json()["results"] == expected + + def _assert_organization_activity_log(self, expected: List[Dict]) -> None: + starting_log_response = self.client.get(f"/api/organizations/{self.organization.pk}/activity") + assert starting_log_response.status_code == 200 + assert starting_log_response.json()["results"] == expected + + def _assert_activity_log_is_empty(self) -> None: + self._assert_activity_log([]) + def test_list_projects(self): response = self.client.get("/api/projects/") self.assertEqual(response.status_code, status.HTTP_200_OK) @@ -140,16 +158,49 @@ def test_cant_create_a_second_project_without_license(self): response_data, ) + @freeze_time("2022-02-08") def test_update_project_timezone(self): - response = self.client.patch("/api/projects/@current/", {"timezone": "Europe/Istanbul"}) + self._assert_activity_log_is_empty() + + response = self.client.patch("/api/projects/@current/", {"timezone": "Europe/Lisbon"}) self.assertEqual(response.status_code, status.HTTP_200_OK) response_data = response.json() self.assertEqual(response_data["name"], self.team.name) - self.assertEqual(response_data["timezone"], "Europe/Istanbul") + self.assertEqual(response_data["timezone"], "Europe/Lisbon") self.team.refresh_from_db() - self.assertEqual(self.team.timezone, "Europe/Istanbul") + self.assertEqual(self.team.timezone, "Europe/Lisbon") + + self._assert_activity_log( + [ + { + "activity": "updated", + "created_at": "2022-02-08T00:00:00Z", + "detail": { + "changes": [ + { + "action": "changed", + "after": "Europe/Lisbon", + "before": "UTC", + "field": "timezone", + "type": "Team", + }, + ], + "name": "Default Project", + "short_id": None, + "trigger": None, + "type": None, + }, + "item_id": str(self.team.pk), + "scope": "Team", + "user": { + "email": "user1@posthog.com", + "first_name": "", + }, + }, + ] + ) def test_update_test_filter_default_checked(self): response = self.client.patch("/api/projects/@current/", {"test_account_filters_default_checked": "true"}) @@ -202,6 +253,50 @@ def test_filter_permission(self): [{"key": "$current_url", "value": "test"}], ) + @freeze_time("2022-02-08") + @patch("posthog.api.team.delete_bulky_postgres_data") + @patch("posthoganalytics.capture") + def test_delete_team_activity_log(self, mock_capture: MagicMock, mock_delete_bulky_postgres_data: MagicMock): + self.organization_membership.level = OrganizationMembership.Level.ADMIN + self.organization_membership.save() + + team: Team = Team.objects.create_with_data(organization=self.organization) + + response = self.client.delete(f"/api/projects/{team.id}") + assert response.status_code == 204 + + # activity log is queried in the context of the team + # and the team was deleted, so we can't (for now) view a deleted team activity via the API + # even though the activity log is recorded + + deleted_team_activity_response = self.client.get(f"/api/projects/{team.id}/activity") + assert deleted_team_activity_response.status_code == status.HTTP_404_NOT_FOUND + + # we can't query by API but can prove the log was recorded + activity = [a.__dict__ for a in ActivityLog.objects.filter(team_id=team.pk).all()] + assert activity == [ + { + "_state": ANY, + "activity": "deleted", + "created_at": ANY, + "detail": { + "changes": None, + "name": "Default Project", + "short_id": None, + "trigger": None, + "type": None, + }, + "id": ANY, + "is_system": False, + "organization_id": ANY, + "team_id": team.pk, + "item_id": str(team.pk), + "scope": "Team", + "user_id": self.user.pk, + "was_impersonated": False, + }, + ] + @patch("posthog.api.team.delete_bulky_postgres_data") @patch("posthoganalytics.capture") def test_delete_team_own_second(self, mock_capture: MagicMock, mock_delete_bulky_postgres_data: MagicMock): @@ -328,10 +423,13 @@ def test_delete_batch_exports(self): with self.assertRaises(RPCError): describe_schedule(temporal, batch_export_id) + @freeze_time("2022-02-08") def test_reset_token(self): self.organization_membership.level = OrganizationMembership.Level.ADMIN self.organization_membership.save() + self._assert_activity_log_is_empty() + self.team.api_token = "xyz" self.team.save() @@ -344,6 +442,36 @@ def test_reset_token(self): self.assertEqual(response_data["api_token"], self.team.api_token) self.assertTrue(response_data["api_token"].startswith("phc_")) + self._assert_activity_log( + [ + { + "activity": "updated", + "created_at": "2022-02-08T00:00:00Z", + "detail": { + "changes": [ + { + "action": "changed", + "after": None, + "before": None, + "field": "api_token", + "type": "Team", + }, + ], + "name": "Default Project", + "short_id": None, + "trigger": None, + "type": None, + }, + "item_id": str(self.team.pk), + "scope": "Team", + "user": { + "email": "user1@posthog.com", + "first_name": "", + }, + }, + ] + ) + def test_reset_token_insufficient_priviledges(self): self.team.api_token = "xyz" self.team.save() @@ -416,6 +544,76 @@ def test_org_member_can_create_demo_project(self, mock_create_data_for_demo_team mock_create_data_for_demo_team.assert_called_once() self.assertEqual(response.status_code, status.HTTP_201_CREATED) + @freeze_time("2022-02-08") + def test_team_float_config_can_be_serialized_to_activity_log(self): + # regression test since this isn't true by default + response = self.client.patch(f"/api/projects/@current/", {"session_recording_sample_rate": 0.4}) + assert response.status_code == status.HTTP_200_OK + self._assert_activity_log( + [ + { + "activity": "updated", + "created_at": "2022-02-08T00:00:00Z", + "detail": { + "changes": [ + { + "action": "created", + "after": "0.4", + "before": None, + "field": "session_recording_sample_rate", + "type": "Team", + }, + ], + "name": "Default Project", + "short_id": None, + "trigger": None, + "type": None, + }, + "item_id": str(self.team.pk), + "scope": "Team", + "user": { + "email": "user1@posthog.com", + "first_name": "", + }, + }, + ] + ) + + @freeze_time("2022-02-08") + def test_team_creation_is_in_activity_log(self): + Team.objects.all().delete() + + self.organization_membership.level = OrganizationMembership.Level.ADMIN + self.organization_membership.save() + + team_name = str(uuid.uuid4()) + response = self.client.post("/api/projects/", {"name": team_name, "is_demo": False}) + self.assertEqual(response.status_code, status.HTTP_201_CREATED) + + team_id = response.json()["id"] + self._assert_activity_log( + [ + { + "activity": "created", + "created_at": "2022-02-08T00:00:00Z", + "detail": { + "changes": None, + "name": team_name, + "short_id": None, + "trigger": None, + "type": None, + }, + "item_id": str(team_id), + "scope": "Team", + "user": { + "email": "user1@posthog.com", + "first_name": "", + }, + }, + ], + team_id=team_id, + ) + def test_team_is_cached_on_create_and_update(self): Team.objects.all().delete() self.organization_membership.level = OrganizationMembership.Level.ADMIN diff --git a/posthog/hogql/autocomplete.py b/posthog/hogql/autocomplete.py index d1d835f890fa72..8aba7802b256e9 100644 --- a/posthog/hogql/autocomplete.py +++ b/posthog/hogql/autocomplete.py @@ -1,7 +1,30 @@ -from typing import cast +from copy import copy +from typing import Callable, Dict, List, Optional, cast +from posthog.hogql.context import HogQLContext +from posthog.hogql.database.database import create_hogql_database +from posthog.hogql.database.models import ( + BooleanDatabaseField, + DatabaseField, + DateDatabaseField, + DateTimeDatabaseField, + FieldOrTable, + FloatDatabaseField, + IntegerDatabaseField, + LazyJoin, + LazyTable, + StringDatabaseField, + StringJSONDatabaseField, + Table, + VirtualTable, +) from posthog.hogql.filters import replace_filters +from posthog.hogql.functions.mapping import ALL_EXPOSED_FUNCTION_NAMES from posthog.hogql.parser import parse_select from posthog.hogql import ast +from posthog.hogql.base import AST, CTE, ConstantType +from posthog.hogql.resolver import resolve_types +from posthog.hogql.visitor import TraversingVisitor +from posthog.models.property_definition import PropertyDefinition from posthog.models.team.team import Team from posthog.schema import ( HogQLAutocomplete, @@ -11,22 +34,286 @@ ) +class GetNodeAtPositionTraverser(TraversingVisitor): + start: int + end: int + selects: List[ast.SelectQuery] = [] + node: Optional[AST] = None + parent_node: Optional[AST] = None + last_node: Optional[AST] = None + nearest_select_query: Optional[ast.SelectQuery] = None + + def __init__(self, expr: ast.Expr, start: int, end: int): + super().__init__() + self.start = start + self.end = end + super().visit(expr) + + def visit(self, node: AST): + if node is not None and node.start is not None and node.end is not None: + if self.start >= node.start and self.end <= node.end: + self.node = node + self.parent_node = self.last_node + self.nearest_select_query = self.selects[-1] + + self.last_node = node + super().visit(node) + + def visit_select_query(self, node): + self.selects.append(node) + node = super().visit_select_query(node) + self.selects.pop() + + +def constant_type_to_database_field(constant_type: ConstantType, name: str) -> DatabaseField: + if isinstance(constant_type, ast.BooleanType): + return BooleanDatabaseField(name=name) + if isinstance(constant_type, ast.IntegerType): + return IntegerDatabaseField(name=name) + if isinstance(constant_type, ast.FloatType): + return FloatDatabaseField(name=name) + if isinstance(constant_type, ast.StringType): + return StringDatabaseField(name=name) + if isinstance(constant_type, ast.DateTimeType): + return DateTimeDatabaseField(name=name) + if isinstance(constant_type, ast.DateType): + return DateDatabaseField(name=name) + + return DatabaseField(name=name) + + +def get_table(context: HogQLContext, join_expr: ast.JoinExpr, ctes: Optional[Dict[str, CTE]]) -> None | Table: + assert context.database is not None + + def resolve_fields_on_table(table: Table | None, table_query: ast.SelectQuery) -> Table | None: + # Resolve types and only return selected fields + if table is None: + return None + + try: + node = cast(ast.SelectQuery, resolve_types(node=table_query, dialect="hogql", context=context)) + if node.type is None: + return None + + selected_columns = node.type.columns + new_fields: Dict[str, FieldOrTable] = {} + for name, field in selected_columns.items(): + if isinstance(field, ast.FieldAliasType): + underlying_field_name = field.alias + if isinstance(field.type, ast.FieldAliasType): + underlying_field_name = field.type.alias + elif isinstance(field.type, ast.ConstantType): + constant_field = constant_type_to_database_field(field.type, name) + new_fields[name] = constant_field + continue + elif isinstance(field, ast.FieldType): + underlying_field_name = field.name + else: + underlying_field_name = name + elif isinstance(field, ast.FieldType): + underlying_field_name = field.name + else: + underlying_field_name = name + + new_fields[name] = table.fields[underlying_field_name] + + table_name = table.to_printed_hogql() + + # Return a new table with a reduced field set + class AnonTable(Table): + fields: Dict[str, FieldOrTable] = new_fields + + def to_printed_hogql(self): + # Use the base table name for resolving property definitions later + return table_name + + return AnonTable() + except Exception: + return None + + if isinstance(join_expr.table, ast.Field): + table_name = str(join_expr.table.chain[0]) + if ctes is not None: + # Handle CTEs + cte = ctes.get(table_name) + if cte is not None: + if cte.cte_type == "subquery" and isinstance(cte.expr, ast.SelectQuery): + query = cast(ast.SelectQuery, cte.expr) + if query.select_from is not None: + table = get_table(context, query.select_from, ctes) + return resolve_fields_on_table(table, query) + + # Handle a base table + if context.database.has_table(table_name): + return context.database.get_table(table_name) + elif isinstance(join_expr.table, ast.SelectQuery): + if join_expr.table.select_from is None: + return None + + # Recursively get the base table + underlying_table = get_table(context, join_expr.table.select_from, ctes) + + if underlying_table is None: + return None + + return resolve_fields_on_table(underlying_table, join_expr.table) + return None + + +def extend_responses( + keys: List[str], + suggestions: List[AutocompleteCompletionItem], + kind: Kind = Kind.Variable, + insert_text: Optional[Callable[[str], str]] = None, +) -> None: + suggestions.extend( + [ + AutocompleteCompletionItem( + insertText=insert_text(key) if insert_text is not None else key, + label=key, + kind=kind, + ) + for key in keys + ] + ) + + +MATCH_ANY_CHARACTER = "$$_POSTHOG_ANY_$$" +PROPERTY_DEFINITION_LIMIT = 220 + + +# TODO: Support ast.SelectUnionQuery nodes def get_hogql_autocomplete(query: HogQLAutocomplete, team: Team) -> HogQLAutocompleteResponse: response = HogQLAutocompleteResponse(suggestions=[]) - try: - select_ast = parse_select(query.select) - if query.filters: - select_ast = cast(ast.SelectQuery, replace_filters(select_ast, query.filters, team)) - except Exception: - pass - - response.suggestions.append( - AutocompleteCompletionItem( - insertText="some_field", - label="some_field", - kind=Kind.Field, - ) - ) + database = create_hogql_database(team_id=team.pk, team_arg=team) + context = HogQLContext(team_id=team.pk, team=team, database=database) + + original_query_select = copy(query.select) + original_end_position = copy(query.endPosition) + + for extra_characters, length_to_add in [ + ("", 0), + (MATCH_ANY_CHARACTER, len(MATCH_ANY_CHARACTER)), + (" FROM events", 0), + (f"{MATCH_ANY_CHARACTER} FROM events", len(MATCH_ANY_CHARACTER)), + ]: + try: + query.select = ( + original_query_select[:original_end_position] + + extra_characters + + original_query_select[original_end_position:] + ) + query.endPosition = original_end_position + length_to_add + + select_ast = parse_select(query.select) + if query.filters: + select_ast = cast(ast.SelectQuery, replace_filters(select_ast, query.filters, team)) + + if isinstance(select_ast, ast.SelectQuery): + ctes = select_ast.ctes + else: + ctes = select_ast.select_queries[0].ctes + + find_node = GetNodeAtPositionTraverser(select_ast, query.startPosition, query.endPosition) + node = find_node.node + parent_node = find_node.parent_node + nearest_select = find_node.nearest_select_query or select_ast + + table_has_alias = ( + nearest_select is not None + and isinstance(nearest_select, ast.SelectQuery) + and nearest_select.select_from is not None + and nearest_select.select_from.alias is not None + ) + + if ( + isinstance(node, ast.Field) + and isinstance(nearest_select, ast.SelectQuery) + and nearest_select.select_from is not None + and not isinstance(parent_node, ast.JoinExpr) + ): + # TODO: add logic for FieldTraverser field types + + # Handle fields + table = get_table(context, nearest_select.select_from, ctes) + if table is None: + continue + + chain_len = len(node.chain) + last_table: Table = table + for index, chain_part in enumerate(node.chain): + # Return just the table alias + if table_has_alias and index == 0 and chain_len == 1: + extend_responses([str(chain_part)], response.suggestions, Kind.Folder) + break + + if table_has_alias and index == 0: + continue + + # Ignore last chain part, it's likely an incomplete word or added characters + is_last_part = index >= (chain_len - 2) + + if is_last_part: + if last_table.fields.get(str(chain_part)) is None: + fields = list(table.fields.keys()) + extend_responses(fields, response.suggestions) + + available_functions = ALL_EXPOSED_FUNCTION_NAMES + extend_responses( + available_functions, + response.suggestions, + Kind.Function, + insert_text=lambda key: f"{key}()", + ) + break + + field = last_table.fields[str(chain_part)] + + if isinstance(field, StringJSONDatabaseField): + if last_table.to_printed_hogql() == "events": + property_type = PropertyDefinition.Type.EVENT + elif last_table.to_printed_hogql() == "persons": + property_type = PropertyDefinition.Type.PERSON + elif last_table.to_printed_hogql() == "groups": + property_type = PropertyDefinition.Type.GROUP + else: + property_type = None + + if property_type is not None: + match_term = query.select[query.startPosition : query.endPosition] + if match_term == MATCH_ANY_CHARACTER: + match_term = "" + + properties = PropertyDefinition.objects.filter( + name__contains=match_term, + team_id=team.pk, + type=property_type, + )[:PROPERTY_DEFINITION_LIMIT].values("name") + + extend_responses([prop["name"] for prop in properties], response.suggestions) + elif isinstance(field, VirtualTable) or isinstance(field, LazyTable): + fields = list(last_table.fields.keys()) + extend_responses(fields, response.suggestions) + elif isinstance(field, LazyJoin): + fields = list(field.join_table.fields.keys()) + extend_responses(fields, response.suggestions) + break + else: + field = last_table.fields[str(chain_part)] + if isinstance(field, Table): + last_table = field + elif isinstance(field, LazyJoin): + last_table = field.join_table + elif isinstance(node, ast.Field) and isinstance(parent_node, ast.JoinExpr): + # Handle table names + if len(node.chain) == 1: + table_names = database.get_all_tables() + extend_responses(table_names, response.suggestions, Kind.Folder) + except Exception: + pass + + if len(response.suggestions) != 0: + break return response diff --git a/posthog/hogql/database/database.py b/posthog/hogql/database/database.py index b25c7c95c4ca35..d76d4a1e99c3af 100644 --- a/posthog/hogql/database/database.py +++ b/posthog/hogql/database/database.py @@ -86,7 +86,7 @@ class Database(BaseModel): _table_names: ClassVar[List[str]] = [ "events", "groups", - "person", + "persons", "person_distinct_id2", "person_overrides", "session_replay_events", @@ -95,6 +95,8 @@ class Database(BaseModel): "log_entries", ] + _warehouse_table_names: List[str] = [] + _timezone: Optional[str] _week_start_day: Optional[WeekStartDay] @@ -120,9 +122,13 @@ def get_table(self, table_name: str) -> Table: return getattr(self, table_name) raise HogQLException(f'Table "{table_name}" not found in database') + def get_all_tables(self) -> List[str]: + return self._table_names + self._warehouse_table_names + def add_warehouse_tables(self, **field_definitions: Any): for f_name, f_def in field_definitions.items(): setattr(self, f_name, f_def) + self._warehouse_table_names.append(f_name) def create_hogql_database( diff --git a/posthog/hogql/test/test_autocomplete.py b/posthog/hogql/test/test_autocomplete.py new file mode 100644 index 00000000000000..f644ae7337e4da --- /dev/null +++ b/posthog/hogql/test/test_autocomplete.py @@ -0,0 +1,150 @@ +from posthog.hogql.autocomplete import get_hogql_autocomplete +from posthog.hogql.database.schema.events import EventsTable +from posthog.hogql.database.schema.persons import PERSONS_FIELDS +from posthog.models.property_definition import PropertyDefinition +from posthog.schema import HogQLAutocomplete, HogQLAutocompleteResponse +from posthog.test.base import APIBaseTest, ClickhouseTestMixin + + +class TestAutocomplete(ClickhouseTestMixin, APIBaseTest): + def _create_properties(self): + PropertyDefinition.objects.create( + team=self.team, + name="some_event_value", + property_type="String", + type=PropertyDefinition.Type.EVENT, + ) + PropertyDefinition.objects.create( + team=self.team, + name="some_person_value", + property_type="String", + type=PropertyDefinition.Type.PERSON, + ) + + def _query_response(self, query: str, start: int, end: int) -> HogQLAutocompleteResponse: + autocomplete = HogQLAutocomplete(kind="HogQLAutocomplete", select=query, startPosition=start, endPosition=end) + return get_hogql_autocomplete(query=autocomplete, team=self.team) + + def test_autocomplete(self): + query = "select * from events" + results = self._query_response(query=query, start=0, end=0) + assert len(results.suggestions) == 0 + + def test_autocomplete_events_suggestions(self): + query = "select from events" + results = self._query_response(query=query, start=7, end=7) + assert len(results.suggestions) != 0 + + def test_autocomplete_functions(self): + query = "select from events" + results = self._query_response(query=query, start=7, end=7) + assert "toDateTime" in [suggestion.label for suggestion in results.suggestions] + assert "toDateTime()" in [suggestion.insertText for suggestion in results.suggestions] + + def test_autocomplete_persons_suggestions(self): + query = "select from persons" + results = self._query_response(query=query, start=7, end=7) + assert len(results.suggestions) != 0 + + def test_autocomplete_assume_events_table(self): + query = "select " + results = self._query_response(query=query, start=7, end=7) + assert len(results.suggestions) != 0 + assert "event" in [suggestion.label for suggestion in results.suggestions] + + def test_autocomplete_events_properties(self): + self._create_properties() + + query = "select properties. from events" + results = self._query_response(query=query, start=18, end=18) + assert len(results.suggestions) == 1 + assert results.suggestions[0].label == "some_event_value" + + def test_autocomplete_persons_properties(self): + self._create_properties() + + query = "select properties. from persons" + results = self._query_response(query=query, start=18, end=18) + assert len(results.suggestions) == 1 + assert results.suggestions[0].label == "some_person_value" + + def test_autocomplete_lazy_join(self): + query = "select pdi. from events" + results = self._query_response(query=query, start=11, end=11) + assert len(results.suggestions) == 4 + + def test_autocomplete_virtual_table(self): + query = "select poe. from events" + results = self._query_response(query=query, start=11, end=11) + assert len(results.suggestions) != 0 + + def test_autocomplete_events_properties_partial_matching(self): + self._create_properties() + + query = "select properties.some_ from events" + results = self._query_response(query=query, start=18, end=23) + assert len(results.suggestions) == 1 + assert results.suggestions[0].label == "some_event_value" + + def test_autocomplete_nested_tables(self): + # Inner table + query = "select event, (select from persons) as blah from events" + results = self._query_response(query=query, start=22, end=22) + + keys = list(PERSONS_FIELDS.keys()) + + for index, key in enumerate(keys): + assert results.suggestions[index].label == key + + # Outer table + query = "select , (select id from persons) as blah from events" + results = self._query_response(query=query, start=7, end=7) + + keys = list(EventsTable().fields.keys()) + + for index, key in enumerate(keys): + assert results.suggestions[index].label == key + + def test_autocomplete_table_name(self): + query = "select event from " + results = self._query_response(query=query, start=18, end=18) + assert len(results.suggestions) != 0 + + def test_autocomplete_table_name_dot_notation(self): + query = "select event from events." + results = self._query_response(query=query, start=25, end=25) + assert len(results.suggestions) == 0 + + def test_autocomplete_recursive_fields(self): + self._create_properties() + + query = "select pdi.person.properties. from events" + results = self._query_response(query=query, start=29, end=29) + assert len(results.suggestions) == 1 + assert results.suggestions[0].label == "some_person_value" + + def test_autocomplete_subquery_cte(self): + query = "select e from (select event from events)" + results = self._query_response(query=query, start=7, end=8) + assert results.suggestions[0].label == "event" + assert "properties" not in [suggestion.label for suggestion in results.suggestions] + + def test_autocomplete_with_cte(self): + query = "with blah as (select event from events) select e from blah" + results = self._query_response(query=query, start=47, end=48) + assert results.suggestions[0].label == "event" + assert "properties" not in [suggestion.label for suggestion in results.suggestions] + + def test_autocomplete_cte_alias(self): + query = "select p from (select event as potato from events)" + results = self._query_response(query=query, start=7, end=8) + assert results.suggestions[0].label == "potato" + assert "event" not in [suggestion.label for suggestion in results.suggestions] + assert "properties" not in [suggestion.label for suggestion in results.suggestions] + + def test_autocomplete_cte_constant_type(self): + query = "select p from (select 'hello' as potato from events)" + results = self._query_response(query=query, start=7, end=8) + assert results.suggestions[0].label == "potato" + assert "event" not in [suggestion.label for suggestion in results.suggestions] + assert "properties" not in [suggestion.label for suggestion in results.suggestions] diff --git a/posthog/hogql/transforms/test/__snapshots__/test_in_cohort.ambr b/posthog/hogql/transforms/test/__snapshots__/test_in_cohort.ambr index d45052c06889ab..98abb1ceb6030f 100644 --- a/posthog/hogql/transforms/test/__snapshots__/test_in_cohort.ambr +++ b/posthog/hogql/transforms/test/__snapshots__/test_in_cohort.ambr @@ -31,7 +31,7 @@ FROM events LEFT JOIN ( SELECT person_static_cohort.person_id AS cohort_person_id, 1 AS matched, person_static_cohort.cohort_id AS cohort_id FROM person_static_cohort - WHERE and(equals(person_static_cohort.team_id, 420), in(person_static_cohort.cohort_id, [16]))) AS __in_cohort ON equals(__in_cohort.cohort_person_id, events.person_id) + WHERE and(equals(person_static_cohort.team_id, 420), in(person_static_cohort.cohort_id, [2]))) AS __in_cohort ON equals(__in_cohort.cohort_person_id, events.person_id) WHERE and(equals(events.team_id, 420), 1, ifNull(equals(__in_cohort.matched, 1), 0)) LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -42,7 +42,7 @@ FROM events LEFT JOIN ( SELECT person_id AS cohort_person_id, 1 AS matched, cohort_id FROM static_cohort_people - WHERE in(cohort_id, [16])) AS __in_cohort ON equals(__in_cohort.cohort_person_id, person_id) + WHERE in(cohort_id, [2])) AS __in_cohort ON equals(__in_cohort.cohort_person_id, person_id) WHERE and(1, equals(__in_cohort.matched, 1)) LIMIT 100 ''' @@ -55,7 +55,7 @@ FROM events LEFT JOIN ( SELECT person_static_cohort.person_id AS cohort_person_id, 1 AS matched, person_static_cohort.cohort_id AS cohort_id FROM person_static_cohort - WHERE and(equals(person_static_cohort.team_id, 420), in(person_static_cohort.cohort_id, [17]))) AS __in_cohort ON equals(__in_cohort.cohort_person_id, events.person_id) + WHERE and(equals(person_static_cohort.team_id, 420), in(person_static_cohort.cohort_id, [3]))) AS __in_cohort ON equals(__in_cohort.cohort_person_id, events.person_id) WHERE and(equals(events.team_id, 420), 1, ifNull(equals(__in_cohort.matched, 1), 0)) LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -66,7 +66,7 @@ FROM events LEFT JOIN ( SELECT person_id AS cohort_person_id, 1 AS matched, cohort_id FROM static_cohort_people - WHERE in(cohort_id, [17])) AS __in_cohort ON equals(__in_cohort.cohort_person_id, person_id) + WHERE in(cohort_id, [3])) AS __in_cohort ON equals(__in_cohort.cohort_person_id, person_id) WHERE and(1, equals(__in_cohort.matched, 1)) LIMIT 100 ''' diff --git a/posthog/hogql_queries/insights/funnels/__init__.py b/posthog/hogql_queries/insights/funnels/__init__.py index 35dbf5b0045c13..d6cddab2ba2935 100644 --- a/posthog/hogql_queries/insights/funnels/__init__.py +++ b/posthog/hogql_queries/insights/funnels/__init__.py @@ -1,2 +1,3 @@ from .base import FunnelBase from .funnel import Funnel +from .funnel_strict import FunnelStrict diff --git a/posthog/hogql_queries/insights/funnels/funnel_strict.py b/posthog/hogql_queries/insights/funnels/funnel_strict.py new file mode 100644 index 00000000000000..f7bc5b0d34de84 --- /dev/null +++ b/posthog/hogql_queries/insights/funnels/funnel_strict.py @@ -0,0 +1,129 @@ +from typing import List + +from posthog.hogql import ast +from posthog.hogql.parser import parse_expr +from posthog.hogql_queries.insights.funnels.base import FunnelBase + + +class FunnelStrict(FunnelBase): + def get_query(self): + max_steps = self.context.max_steps + + # breakdown_exprs = self._get_breakdown_prop_expr() + + select: List[ast.Expr] = [ + *self._get_count_columns(max_steps), + *self._get_step_time_avgs(max_steps), + *self._get_step_time_median(max_steps), + # *breakdown_exprs, + ] + + return ast.SelectQuery( + select=select, + select_from=ast.JoinExpr(table=self.get_step_counts_query()), + # group_by=[ast.Field(chain=["prop"])] if len(breakdown_exprs) > 0 else None, + ) + + def get_step_counts_query(self): + max_steps = self.context.max_steps + # breakdown_exprs = self._get_breakdown_prop_expr() + inner_timestamps, outer_timestamps = self._get_timestamp_selects() + person_and_group_properties = self._get_person_and_group_properties() + + group_by_columns: List[ast.Expr] = [ + ast.Field(chain=["aggregation_target"]), + ast.Field(chain=["steps"]), + # *breakdown_exprs, + ] + + outer_select: List[ast.Expr] = [ + *group_by_columns, + *self._get_step_time_avgs(max_steps, inner_query=True), + *self._get_step_time_median(max_steps, inner_query=True), + *self._get_matching_event_arrays(max_steps), + # *breakdown_exprs, + *outer_timestamps, + *person_and_group_properties, + ] + + max_steps_expr = parse_expr( + f"max(steps) over (PARTITION BY aggregation_target {self._get_breakdown_prop()}) as max_steps" + ) + + inner_select: List[ast.Expr] = [ + *group_by_columns, + max_steps_expr, + *self._get_step_time_names(max_steps), + *self._get_matching_events(max_steps), + # *breakdown_exprs, + *inner_timestamps, + *person_and_group_properties, + ] + + return ast.SelectQuery( + select=outer_select, + select_from=ast.JoinExpr( + table=ast.SelectQuery( + select=inner_select, + select_from=ast.JoinExpr(table=self.get_step_counts_without_aggregation_query()), + ) + ), + group_by=group_by_columns, + having=ast.CompareOperation( + left=ast.Field(chain=["steps"]), right=ast.Field(chain=["max_steps"]), op=ast.CompareOperationOp.Eq + ), + ) + + def get_step_counts_without_aggregation_query(self): + max_steps = self.context.max_steps + + select_inner: List[ast.Expr] = [ + ast.Field(chain=["aggregation_target"]), + ast.Field(chain=["timestamp"]), + *self._get_partition_cols(1, max_steps), + # *self._get_breakdown_prop_expr(group_remaining=True), + *self._get_person_and_group_properties(), + ] + select_from_inner = self._get_inner_event_query(skip_entity_filter=True, skip_step_filter=True) + inner_query = ast.SelectQuery(select=select_inner, select_from=ast.JoinExpr(table=select_from_inner)) + + select: List[ast.Expr] = [ + ast.Field(chain=["*"]), + ast.Alias(alias="steps", expr=self._get_sorting_condition(max_steps, max_steps)), + *self._get_step_times(max_steps), + *self._get_matching_events(max_steps), + *self._get_person_and_group_properties(), + ] + select_from = ast.JoinExpr(table=inner_query) + where = ast.CompareOperation( + left=ast.Field(chain=["step_0"]), right=ast.Constant(value=1), op=ast.CompareOperationOp.Eq + ) + return ast.SelectQuery(select=select, select_from=select_from, where=where) + + def _get_partition_cols(self, level_index: int, max_steps: int): + exprs: List[ast.Expr] = [] + + for i in range(0, max_steps): + exprs.append(ast.Field(chain=[f"step_{i}"])) + + if i < level_index: + exprs.append(ast.Field(chain=[f"latest_{i}"])) + + # for field in self.extra_event_fields_and_properties: + # exprs.append(ast.Field(chain=[f'"{field}_{i}"'])) + + else: + exprs.append( + parse_expr( + f"min(latest_{i}) over (PARTITION by aggregation_target {self._get_breakdown_prop()} ORDER BY timestamp DESC ROWS BETWEEN {i} PRECEDING AND {i} PRECEDING) latest_{i}" + ) + ) + + # for field in self.extra_event_fields_and_properties: + # exprs.append( + # parse_expr( + # f'min("{field}_{i}") over (PARTITION by aggregation_target {self._get_breakdown_prop()} ORDER BY timestamp DESC ROWS BETWEEN {i} PRECEDING AND {i} PRECEDING) "{field}_{i}"' + # ) + # ) + + return exprs diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr index 33bc454ab2c283..4a8a0293237736 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr @@ -350,7 +350,7 @@ if(and(equals(e.event, 'user signed up'), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 4)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 8)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0)), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, @@ -871,7 +871,7 @@ if(and(equals(e.event, 'user signed up'), ifNull(in(e__pdi.person_id, (SELECT person_static_cohort.person_id AS person_id FROM person_static_cohort - WHERE and(equals(person_static_cohort.team_id, 2), equals(person_static_cohort.cohort_id, 5)))), 0)), 1, 0) AS step_0, + WHERE and(equals(person_static_cohort.team_id, 2), equals(person_static_cohort.cohort_id, 9)))), 0)), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, if(equals(e.event, 'paid'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 diff --git a/posthog/hogql_queries/insights/funnels/test/conversion_time_cases.py b/posthog/hogql_queries/insights/funnels/test/conversion_time_cases.py index 59b62cc686ffbd..63cf914e84cc8f 100644 --- a/posthog/hogql_queries/insights/funnels/test/conversion_time_cases.py +++ b/posthog/hogql_queries/insights/funnels/test/conversion_time_cases.py @@ -1,7 +1,7 @@ from datetime import datetime from typing import cast -from posthog.constants import INSIGHT_FUNNELS +from posthog.constants import INSIGHT_FUNNELS, FunnelOrderType from posthog.hogql_queries.insights.funnels.funnels_query_runner import FunnelsQueryRunner from posthog.hogql_queries.legacy_compatibility.filter_to_query import filter_to_query from posthog.models.filters import Filter @@ -10,7 +10,7 @@ from posthog.test.test_journeys import journeys_for -def funnel_conversion_time_test_factory(Funnel, FunnelPerson, _create_event, _create_person): +def funnel_conversion_time_test_factory(funnel_order_type: FunnelOrderType, FunnelPerson): class TestFunnelConversionTime(APIBaseTest): def _get_actor_ids_at_step(self, filter, funnel_step, breakdown_value=None): filter = Filter(data=filter, team=self.team) @@ -21,6 +21,8 @@ def _get_actor_ids_at_step(self, filter, funnel_step, breakdown_value=None): def test_funnel_with_multiple_incomplete_tries(self): filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, "events": [ {"id": "user signed up", "type": "events", "order": 0}, {"id": "$pageview", "type": "events", "order": 1}, @@ -29,7 +31,6 @@ def test_funnel_with_multiple_incomplete_tries(self): "funnel_window_days": 1, "date_from": "2021-05-01 00:00:00", "date_to": "2021-05-14 00:00:00", - "insight": INSIGHT_FUNNELS, } people = journeys_for( @@ -76,12 +77,13 @@ def test_funnel_with_multiple_incomplete_tries(self): def test_funnel_step_conversion_times(self): filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, "events": [ {"id": "sign up", "order": 0}, {"id": "play movie", "order": 1}, {"id": "buy", "order": 2}, ], - "insight": INSIGHT_FUNNELS, "date_from": "2020-01-01", "date_to": "2020-01-08", "funnel_window_days": 7, @@ -120,11 +122,12 @@ def test_funnel_step_conversion_times(self): def test_funnel_times_with_different_conversion_windows(self): filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": funnel_order_type, "events": [ {"id": "user signed up", "type": "events", "order": 0}, {"id": "pageview", "type": "events", "order": 1}, ], - "insight": INSIGHT_FUNNELS, "funnel_window_interval": 14, "funnel_window_interval_unit": "day", "date_from": "2020-01-01", diff --git a/posthog/hogql_queries/insights/funnels/test/test_funnel.py b/posthog/hogql_queries/insights/funnels/test/test_funnel.py index d100cc4e399e9b..30b0cfd0df0fb1 100644 --- a/posthog/hogql_queries/insights/funnels/test/test_funnel.py +++ b/posthog/hogql_queries/insights/funnels/test/test_funnel.py @@ -5,7 +5,7 @@ from freezegun import freeze_time from posthog.api.instance_settings import get_instance_setting from posthog.clickhouse.client.execute import sync_execute -from posthog.constants import INSIGHT_FUNNELS +from posthog.constants import INSIGHT_FUNNELS, FunnelOrderType from posthog.hogql.query import execute_hogql_query from posthog.hogql_queries.insights.funnels.funnel_query_context import FunnelQueryContext from posthog.hogql_queries.insights.funnels.funnels_query_runner import FunnelsQueryRunner @@ -62,7 +62,7 @@ def _create_action(**kwargs): class TestFunnelConversionTime( ClickhouseTestMixin, - funnel_conversion_time_test_factory(Funnel, ClickhouseFunnelActors, _create_event, _create_person), # type: ignore + funnel_conversion_time_test_factory(FunnelOrderType.ORDERED, ClickhouseFunnelActors), # type: ignore ): maxDiff = None pass diff --git a/posthog/hogql_queries/insights/funnels/test/test_funnel_strict.py b/posthog/hogql_queries/insights/funnels/test/test_funnel_strict.py new file mode 100644 index 00000000000000..b101fe718be619 --- /dev/null +++ b/posthog/hogql_queries/insights/funnels/test/test_funnel_strict.py @@ -0,0 +1,617 @@ +# from datetime import datetime +from typing import cast + +from posthog.constants import INSIGHT_FUNNELS, FunnelOrderType +from posthog.hogql_queries.insights.funnels.funnels_query_runner import FunnelsQueryRunner +from posthog.hogql_queries.insights.funnels.test.conversion_time_cases import ( + funnel_conversion_time_test_factory, +) + +# from posthog.hogql_queries.insights.funnels.test.breakdown_cases import ( +# assert_funnel_results_equal, +# funnel_breakdown_test_factory, +# ) +from posthog.hogql_queries.legacy_compatibility.filter_to_query import filter_to_query +from posthog.models.action import Action +from posthog.models.action_step import ActionStep +from posthog.models.filters import Filter +from posthog.models.instance_setting import override_instance_config +from posthog.queries.funnels.funnel_strict_persons import ClickhouseFunnelStrictActors +from posthog.schema import FunnelsQuery +from posthog.test.base import ( + APIBaseTest, + ClickhouseTestMixin, + _create_event, + _create_person, +) + +# from posthog.test.test_journeys import journeys_for + +FORMAT_TIME = "%Y-%m-%d 00:00:00" + + +def _create_action(**kwargs): + team = kwargs.pop("team") + name = kwargs.pop("name") + properties = kwargs.pop("properties", {}) + action = Action.objects.create(team=team, name=name) + ActionStep.objects.create(action=action, event=name, properties=properties) + return action + + +# class TestFunnelStrictStepsBreakdown( +# ClickhouseTestMixin, +# funnel_breakdown_test_factory( # type: ignore +# FunnelStrict, +# ClickhouseFunnelStrictActors, +# _create_event, +# _create_action, +# _create_person, +# ), +# ): +# maxDiff = None + +# def test_basic_funnel_default_funnel_days_breakdown_event(self): +# # TODO: This test and the one below it fail, only for strict funnels. Figure out why and how to fix +# pass + +# def test_basic_funnel_default_funnel_days_breakdown_action(self): +# pass + +# def test_basic_funnel_default_funnel_days_breakdown_action_materialized(self): +# pass + +# def test_strict_breakdown_events_with_multiple_properties(self): +# filters = { +# "events": [{"id": "sign up", "order": 0}, {"id": "play movie", "order": 1}], +# "insight": INSIGHT_FUNNELS, +# "date_from": "2020-01-01", +# "date_to": "2020-01-08", +# "funnel_window_days": 7, +# "breakdown_type": "event", +# "breakdown": "$browser", +# } + +# people = journeys_for( +# { +# "person1": [ +# { +# "event": "sign up", +# "timestamp": datetime(2020, 1, 1, 12), +# "properties": {"$browser": "Chrome"}, +# }, +# { +# "event": "blah", +# "timestamp": datetime(2020, 1, 1, 13), +# "properties": {"$browser": "Chrome"}, +# }, +# { +# "event": "play movie", +# "timestamp": datetime(2020, 1, 1, 14), +# "properties": {"$browser": "Chrome"}, +# }, +# ], +# "person2": [ +# { +# "event": "sign up", +# "timestamp": datetime(2020, 1, 2, 13), +# "properties": {"$browser": "Safari"}, +# }, +# { +# "event": "play movie", +# "timestamp": datetime(2020, 1, 2, 14), +# "properties": {"$browser": "Safari"}, +# }, +# ], +# }, +# self.team, +# ) + +# query = cast(FunnelsQuery, filter_to_query(filters)) +# results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + +# assert_funnel_results_equal( +# results[0], +# [ +# { +# "action_id": "sign up", +# "name": "sign up", +# "custom_name": None, +# "order": 0, +# "people": [], +# "count": 1, +# "type": "events", +# "average_conversion_time": None, +# "median_conversion_time": None, +# "breakdown": ["Chrome"], +# "breakdown_value": ["Chrome"], +# }, +# { +# "action_id": "play movie", +# "name": "play movie", +# "custom_name": None, +# "order": 1, +# "people": [], +# "count": 0, +# "type": "events", +# "average_conversion_time": None, +# "median_conversion_time": None, +# "breakdown": ["Chrome"], +# "breakdown_value": ["Chrome"], +# }, +# ], +# ) +# self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, ["Chrome"]), [people["person1"].uuid]) +# self.assertCountEqual(self._get_actor_ids_at_step(filters, 2, ["Chrome"]), []) + +# assert_funnel_results_equal( +# results[1], +# [ +# { +# "action_id": "sign up", +# "name": "sign up", +# "custom_name": None, +# "order": 0, +# "people": [], +# "count": 1, +# "type": "events", +# "average_conversion_time": None, +# "median_conversion_time": None, +# "breakdown": ["Safari"], +# "breakdown_value": ["Safari"], +# }, +# { +# "action_id": "play movie", +# "name": "play movie", +# "custom_name": None, +# "order": 1, +# "people": [], +# "count": 1, +# "type": "events", +# "average_conversion_time": 3600, +# "median_conversion_time": 3600, +# "breakdown": ["Safari"], +# "breakdown_value": ["Safari"], +# }, +# ], +# ) +# self.assertCountEqual(self._get_actor_ids_at_step(filters, 1, ["Safari"]), [people["person2"].uuid]) +# self.assertCountEqual(self._get_actor_ids_at_step(filters, 2, ["Safari"]), [people["person2"].uuid]) + + +class TestFunnelStrictStepsConversionTime( + ClickhouseTestMixin, + funnel_conversion_time_test_factory(FunnelOrderType.ORDERED, ClickhouseFunnelStrictActors), # type: ignore +): + maxDiff = None + pass + + +class TestFunnelStrictSteps(ClickhouseTestMixin, APIBaseTest): + maxDiff = None + + def _get_actor_ids_at_step(self, filter, funnel_step, breakdown_value=None): + filter = Filter(data=filter, team=self.team) + person_filter = filter.shallow_clone({"funnel_step": funnel_step, "funnel_step_breakdown": breakdown_value}) + _, serialized_result, _ = ClickhouseFunnelStrictActors(person_filter, self.team).get_actors() + + return [val["id"] for val in serialized_result] + + def test_basic_strict_funnel(self): + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": "strict", + "events": [ + {"id": "user signed up", "order": 0}, + {"id": "$pageview", "order": 1}, + {"id": "insight viewed", "order": 2}, + ], + } + + person1_stopped_after_signup = _create_person( + distinct_ids=["stopped_after_signup1"], + team_id=self.team.pk, + properties={"test": "okay"}, + ) + _create_event(team=self.team, event="user signed up", distinct_id="stopped_after_signup1") + + person2_stopped_after_one_pageview = _create_person( + distinct_ids=["stopped_after_pageview1"], team_id=self.team.pk + ) + _create_event(team=self.team, event="$pageview", distinct_id="stopped_after_pageview1") + _create_event( + team=self.team, + event="user signed up", + distinct_id="stopped_after_pageview1", + ) + + person3_stopped_after_insight_view = _create_person( + distinct_ids=["stopped_after_insightview"], team_id=self.team.pk + ) + _create_event( + team=self.team, + event="user signed up", + distinct_id="stopped_after_insightview", + ) + _create_event(team=self.team, event="$pageview", distinct_id="stopped_after_insightview") + _create_event(team=self.team, event="blaah blaa", distinct_id="stopped_after_insightview") + _create_event( + team=self.team, + event="insight viewed", + distinct_id="stopped_after_insightview", + ) + + person4_stopped_after_insight_view_not_strict_order = _create_person( + distinct_ids=["stopped_after_insightview2"], team_id=self.team.pk + ) + _create_event( + team=self.team, + event="insight viewed", + distinct_id="stopped_after_insightview2", + ) + _create_event(team=self.team, event="blaah blaa", distinct_id="stopped_after_insightview2") + _create_event(team=self.team, event="$pageview", distinct_id="stopped_after_insightview2") + _create_event( + team=self.team, + event="user signed up", + distinct_id="stopped_after_insightview2", + ) + + person5_stopped_after_insight_view_random = _create_person( + distinct_ids=["stopped_after_insightview3"], team_id=self.team.pk + ) + _create_event(team=self.team, event="$pageview", distinct_id="stopped_after_insightview3") + _create_event( + team=self.team, + event="user signed up", + distinct_id="stopped_after_insightview3", + ) + _create_event(team=self.team, event="blaah blaa", distinct_id="stopped_after_insightview3") + _create_event(team=self.team, event="$pageview", distinct_id="stopped_after_insightview3") + _create_event( + team=self.team, + event="insight viewed", + distinct_id="stopped_after_insightview3", + ) + + person6 = _create_person(distinct_ids=["person6"], team_id=self.team.pk) + _create_event(team=self.team, event="blaah blaa", distinct_id="person6") + _create_event(team=self.team, event="user signed up", distinct_id="person6") + _create_event(team=self.team, event="blaah blaa", distinct_id="person6") + _create_event(team=self.team, event="$pageview", distinct_id="person6") + + person7 = _create_person(distinct_ids=["person7"], team_id=self.team.pk) + _create_event(team=self.team, event="blaah blaa", distinct_id="person7") + _create_event(team=self.team, event="user signed up", distinct_id="person7") + _create_event(team=self.team, event="$pageview", distinct_id="person7") + _create_event(team=self.team, event="insight viewed", distinct_id="person7") + _create_event(team=self.team, event="blaah blaa", distinct_id="person7") + + _create_person(distinct_ids=["stopped_after_insightview6"], team_id=self.team.pk) + _create_event( + team=self.team, + event="insight viewed", + distinct_id="stopped_after_insightview6", + ) + _create_event(team=self.team, event="$pageview", distinct_id="stopped_after_insightview6") + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + self.assertEqual(results[0]["name"], "user signed up") + self.assertEqual(results[1]["name"], "$pageview") + self.assertEqual(results[2]["name"], "insight viewed") + self.assertEqual(results[0]["count"], 7) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1), + [ + person1_stopped_after_signup.uuid, + person2_stopped_after_one_pageview.uuid, + person3_stopped_after_insight_view.uuid, + person4_stopped_after_insight_view_not_strict_order.uuid, + person5_stopped_after_insight_view_random.uuid, + person6.uuid, + person7.uuid, + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 2), + [person3_stopped_after_insight_view.uuid, person7.uuid], + ) + + self.assertCountEqual(self._get_actor_ids_at_step(filters, 3), [person7.uuid]) + + with override_instance_config("AGGREGATE_BY_DISTINCT_IDS_TEAMS", f"{self.team.pk}"): + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + self.assertEqual(results[0]["name"], "user signed up") + self.assertEqual(results[1]["name"], "$pageview") + self.assertEqual(results[2]["name"], "insight viewed") + self.assertEqual(results[0]["count"], 7) + + def test_advanced_strict_funnel(self): + sign_up_action = _create_action( + name="sign up", + team=self.team, + properties=[{"key": "key", "type": "event", "value": ["val"], "operator": "exact"}], + ) + + view_action = _create_action( + name="pageview", + team=self.team, + properties=[{"key": "key", "type": "event", "value": ["val"], "operator": "exact"}], + ) + + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": "strict", + "events": [ + {"id": "user signed up", "type": "events", "order": 0}, + {"id": "$pageview", "type": "events", "order": 2}, + ], + "actions": [ + {"id": sign_up_action.id, "math": "dau", "order": 1}, + {"id": view_action.id, "math": "weekly_active", "order": 3}, + ], + } + + person1_stopped_after_signup = _create_person(distinct_ids=["stopped_after_signup1"], team_id=self.team.pk) + _create_event(team=self.team, event="user signed up", distinct_id="stopped_after_signup1") + + person2_stopped_after_one_pageview = _create_person( + distinct_ids=["stopped_after_pageview1"], team_id=self.team.pk + ) + _create_event( + team=self.team, + event="user signed up", + distinct_id="stopped_after_pageview1", + ) + _create_event(team=self.team, event="$pageview", distinct_id="stopped_after_pageview1") + + person3_stopped_after_insight_view = _create_person( + distinct_ids=["stopped_after_insightview"], team_id=self.team.pk + ) + _create_event( + team=self.team, + event="user signed up", + distinct_id="stopped_after_insightview", + ) + _create_event( + team=self.team, + event="sign up", + distinct_id="stopped_after_insightview", + properties={"key": "val"}, + ) + _create_event( + team=self.team, + event="sign up", + distinct_id="stopped_after_insightview", + properties={"key": "val2"}, + ) + _create_event(team=self.team, event="$pageview", distinct_id="stopped_after_insightview") + _create_event(team=self.team, event="blaah blaa", distinct_id="stopped_after_insightview") + _create_event( + team=self.team, + event="insight viewed", + distinct_id="stopped_after_insightview", + ) + + person4 = _create_person(distinct_ids=["person4"], team_id=self.team.pk) + _create_event(team=self.team, event="blaah blaa", distinct_id="person4") + _create_event(team=self.team, event="user signed up", distinct_id="person4") + _create_event( + team=self.team, + event="sign up", + distinct_id="person4", + properties={"key": "val"}, + ) + _create_event( + team=self.team, + event="$pageview", + distinct_id="person4", + properties={"key": "val"}, + ) + _create_event(team=self.team, event="blaah blaa", distinct_id="person4") + + person5 = _create_person(distinct_ids=["person5"], team_id=self.team.pk) + _create_event(team=self.team, event="blaah blaa", distinct_id="person5") + _create_event(team=self.team, event="user signed up", distinct_id="person5") + _create_event( + team=self.team, + event="sign up", + distinct_id="person5", + properties={"key": "val"}, + ) + _create_event(team=self.team, event="$pageview", distinct_id="person5") + _create_event(team=self.team, event="blaah blaa", distinct_id="person5") + + person6 = _create_person(distinct_ids=["person6"], team_id=self.team.pk) + _create_event(team=self.team, event="blaah blaa", distinct_id="person6") + _create_event(team=self.team, event="user signed up", distinct_id="person6") + _create_event( + team=self.team, + event="sign up", + distinct_id="person6", + properties={"key": "val"}, + ) + _create_event(team=self.team, event="$pageview", distinct_id="person6") + _create_event( + team=self.team, + event="pageview", + distinct_id="person6", + properties={"key": "val1"}, + ) + + person7 = _create_person(distinct_ids=["person7"], team_id=self.team.pk) + _create_event(team=self.team, event="blaah blaa", distinct_id="person7") + _create_event(team=self.team, event="user signed up", distinct_id="person7") + _create_event( + team=self.team, + event="sign up", + distinct_id="person7", + properties={"key": "val"}, + ) + _create_event(team=self.team, event="$pageview", distinct_id="person7") + _create_event(team=self.team, event="user signed up", distinct_id="person7") + _create_event( + team=self.team, + event="pageview", + distinct_id="person7", + properties={"key": "val"}, + ) + + person8 = _create_person(distinct_ids=["person8"], team_id=self.team.pk) + _create_event(team=self.team, event="blaah blaa", distinct_id="person8") + _create_event(team=self.team, event="user signed up", distinct_id="person8") + _create_event(team=self.team, event="user signed up", distinct_id="person8") + _create_event( + team=self.team, + event="sign up", + distinct_id="person8", + properties={"key": "val"}, + ) + _create_event(team=self.team, event="$pageview", distinct_id="person8") + _create_event( + team=self.team, + event="pageview", + distinct_id="person8", + properties={"key": "val"}, + ) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + self.assertEqual(results[0]["name"], "user signed up") + self.assertEqual(results[1]["name"], "sign up") + self.assertEqual(results[2]["name"], "$pageview") + self.assertEqual(results[3]["name"], "pageview") + self.assertEqual(results[0]["count"], 8) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1), + [ + person1_stopped_after_signup.uuid, + person2_stopped_after_one_pageview.uuid, + person3_stopped_after_insight_view.uuid, + person4.uuid, + person5.uuid, + person6.uuid, + person7.uuid, + person8.uuid, + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 2), + [ + person3_stopped_after_insight_view.uuid, + person4.uuid, + person5.uuid, + person6.uuid, + person7.uuid, + person8.uuid, + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 3), + [person4.uuid, person5.uuid, person6.uuid, person7.uuid, person8.uuid], + ) + + self.assertCountEqual(self._get_actor_ids_at_step(filters, 4), [person8.uuid]) + + def test_basic_strict_funnel_conversion_times(self): + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_order_type": "strict", + "events": [ + {"id": "user signed up", "order": 0}, + {"id": "$pageview", "order": 1}, + {"id": "insight viewed", "order": 2}, + ], + "date_from": "2021-05-01 00:00:00", + "date_to": "2021-05-07 23:59:59", + } + + person1_stopped_after_signup = _create_person(distinct_ids=["stopped_after_signup1"], team_id=self.team.pk) + _create_event( + team=self.team, + event="user signed up", + distinct_id="stopped_after_signup1", + timestamp="2021-05-02 00:00:00", + ) + + person2_stopped_after_one_pageview = _create_person( + distinct_ids=["stopped_after_pageview1"], team_id=self.team.pk + ) + _create_event( + team=self.team, + event="user signed up", + distinct_id="stopped_after_pageview1", + timestamp="2021-05-02 00:00:00", + ) + _create_event( + team=self.team, + event="$pageview", + distinct_id="stopped_after_pageview1", + timestamp="2021-05-02 01:00:00", + ) + + person3_stopped_after_insight_view = _create_person( + distinct_ids=["stopped_after_insightview"], team_id=self.team.pk + ) + _create_event( + team=self.team, + event="user signed up", + distinct_id="stopped_after_insightview", + timestamp="2021-05-02 00:00:00", + ) + _create_event( + team=self.team, + event="$pageview", + distinct_id="stopped_after_insightview", + timestamp="2021-05-02 02:00:00", + ) + _create_event( + team=self.team, + event="insight viewed", + distinct_id="stopped_after_insightview", + timestamp="2021-05-02 04:00:00", + ) + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + self.assertEqual(results[0]["name"], "user signed up") + self.assertEqual(results[1]["name"], "$pageview") + self.assertEqual(results[2]["name"], "insight viewed") + self.assertEqual(results[0]["count"], 3) + + self.assertEqual(results[1]["average_conversion_time"], 5400) + # 1 hour for Person 2, 2 hours for Person 3, average = 1.5 hours + + self.assertEqual(results[2]["average_conversion_time"], 7200) + # 2 hours for Person 3 + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 1), + [ + person1_stopped_after_signup.uuid, + person2_stopped_after_one_pageview.uuid, + person3_stopped_after_insight_view.uuid, + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 2), + [ + person2_stopped_after_one_pageview.uuid, + person3_stopped_after_insight_view.uuid, + ], + ) + + self.assertCountEqual( + self._get_actor_ids_at_step(filters, 3), + [person3_stopped_after_insight_view.uuid], + ) diff --git a/posthog/hogql_queries/insights/funnels/utils.py b/posthog/hogql_queries/insights/funnels/utils.py index bd69ce6aefa8a9..d5e21e219309fc 100644 --- a/posthog/hogql_queries/insights/funnels/utils.py +++ b/posthog/hogql_queries/insights/funnels/utils.py @@ -6,7 +6,7 @@ def get_funnel_order_class(funnelsFilter: FunnelsFilter): from posthog.hogql_queries.insights.funnels import ( Funnel, - # FunnelStrict, + FunnelStrict, # FunnelUnordered, FunnelBase, ) @@ -15,8 +15,7 @@ def get_funnel_order_class(funnelsFilter: FunnelsFilter): return FunnelBase # return FunnelUnordered elif funnelsFilter.funnelOrderType == StepOrderValue.strict: - return FunnelBase - # return FunnelStrict + return FunnelStrict return Funnel diff --git a/posthog/hogql_queries/insights/test/__snapshots__/test_lifecycle_query_runner.ambr b/posthog/hogql_queries/insights/test/__snapshots__/test_lifecycle_query_runner.ambr index bbca1ba255e1b6..b55778fa0d1fad 100644 --- a/posthog/hogql_queries/insights/test/__snapshots__/test_lifecycle_query_runner.ambr +++ b/posthog/hogql_queries/insights/test/__snapshots__/test_lifecycle_query_runner.ambr @@ -79,7 +79,7 @@ WHERE and(equals(events.team_id, 2), greaterOrEquals(toTimeZone(events.timestamp, 'UTC'), minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 00:00:00', 6, 'UTC'))), toIntervalDay(1))), less(toTimeZone(events.timestamp, 'UTC'), plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'UTC'))), toIntervalDay(1))), ifNull(in(person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 6)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 10)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0), equals(events.event, '$pageview')) GROUP BY person_id) diff --git a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr index e0e2725b168949..64d6dcbc02e7b7 100644 --- a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr +++ b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends.ambr @@ -85,7 +85,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))), ifNull(equals(e__pdi__person.`properties___$bool_prop`, 'x'), 0), and(equals(e.event, 'sign up'), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 7)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 11)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))) GROUP BY day_start) @@ -172,7 +172,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-07 23:59:59', 6, 'UTC'))), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.person_properties, '$bool_prop'), ''), 'null'), '^"|"$', ''), 'x'), 0), and(equals(e.event, 'sign up'), ifNull(in(ifNull(nullIf(e__override.override_person_id, '00000000-0000-0000-0000-000000000000'), e.person_id), (SELECT cohortpeople.person_id AS person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 8)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 12)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))) GROUP BY day_start) @@ -688,7 +688,7 @@ WHERE and(equals(e.team_id, 2), and(equals(e.event, '$pageview'), and(or(ifNull(equals(e__pdi__person.properties___name, 'p1'), 0), ifNull(equals(e__pdi__person.properties___name, 'p2'), 0), ifNull(equals(e__pdi__person.properties___name, 'p3'), 0)), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 27)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 31)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0)))) GROUP BY value @@ -757,7 +757,7 @@ WHERE and(equals(e.team_id, 2), and(and(equals(e.event, '$pageview'), and(or(ifNull(equals(e__pdi__person.properties___name, 'p1'), 0), ifNull(equals(e__pdi__person.properties___name, 'p2'), 0), ifNull(equals(e__pdi__person.properties___name, 'p3'), 0)), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 27)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 31)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))), or(ifNull(equals(transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'val'], ['$$_posthog_breakdown_other_$$', 'val'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, 'key'), ''), 'null'), '^"|"$', ''), 'val'), 0))), ifNull(greaterOrEquals(timestamp, minus(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-01 00:00:00', 6, 'UTC')), toIntervalDay(7))), 0), ifNull(lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-12 23:59:59', 6, 'UTC'))), 0)) GROUP BY timestamp, actor_id, @@ -1592,7 +1592,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 40)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 44)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))) GROUP BY value @@ -1640,7 +1640,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), ifNull(in(e__pdi.person_id, (SELECT cohortpeople.person_id AS person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 40)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 44)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0)), or(ifNull(equals(transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'value', 'other_value'], ['$$_posthog_breakdown_other_$$', 'value', 'other_value'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0))) GROUP BY day_start, @@ -1691,7 +1691,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), ifNull(in(ifNull(nullIf(e__override.override_person_id, '00000000-0000-0000-0000-000000000000'), e.person_id), (SELECT cohortpeople.person_id AS person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 41)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 45)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0))) GROUP BY value @@ -1738,7 +1738,7 @@ WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2019-12-28 00:00:00', 6, 'UTC')))), lessOrEquals(toTimeZone(e.timestamp, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-04 23:59:59', 6, 'UTC'))), and(equals(e.event, 'sign up'), ifNull(in(ifNull(nullIf(e__override.override_person_id, '00000000-0000-0000-0000-000000000000'), e.person_id), (SELECT cohortpeople.person_id AS person_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 41)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, 45)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0))), 0)), or(ifNull(equals(transform(ifNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), '$$_posthog_breakdown_null_$$'), ['$$_posthog_breakdown_other_$$', 'value', 'other_value'], ['$$_posthog_breakdown_other_$$', 'value', 'other_value'], '$$_posthog_breakdown_other_$$'), '$$_posthog_breakdown_other_$$'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'value'), 0), ifNull(equals(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(e.properties, '$some_property'), ''), 'null'), '^"|"$', ''), 'other_value'), 0))) GROUP BY day_start, diff --git a/posthog/hogql_queries/web_analytics/stats_table.py b/posthog/hogql_queries/web_analytics/stats_table.py index bede9accc6d6d2..a7ced15c87b33a 100644 --- a/posthog/hogql_queries/web_analytics/stats_table.py +++ b/posthog/hogql_queries/web_analytics/stats_table.py @@ -9,6 +9,7 @@ ) from posthog.hogql_queries.web_analytics.web_analytics_query_runner import ( WebAnalyticsQueryRunner, + map_columns, ) from posthog.schema import ( WebStatsTableQuery, @@ -169,19 +170,13 @@ def calculate(self): assert results is not None - def to_data(col_val, col_idx): - if col_idx == 0: # breakdown_value - return col_val - elif col_idx == 1: # views - return self._unsample(col_val) - elif col_idx == 2: # visitors - return self._unsample(col_val) - elif col_idx == 3: # bounce_rate - return col_val - else: - return col_val - - results_mapped = [[to_data(c, i) for (i, c) in enumerate(r)] for r in results] + results_mapped = map_columns( + results, + { + 1: self._unsample, # views + 2: self._unsample, # visitors + }, + ) return WebStatsTableQueryResponse( columns=response.columns, @@ -195,11 +190,11 @@ def to_data(col_val, col_idx): def counts_breakdown(self): match self.query.breakdownBy: case WebStatsBreakdown.Page: - return ast.Field(chain=["properties", "$pathname"]) + return self._apply_path_cleaning(ast.Field(chain=["properties", "$pathname"])) case WebStatsBreakdown.InitialChannelType: raise NotImplementedError("Breakdown InitialChannelType not implemented") case WebStatsBreakdown.InitialPage: - return ast.Field(chain=["person", "properties", "$initial_pathname"]) + return self._apply_path_cleaning(ast.Field(chain=["person", "properties", "$initial_pathname"])) case WebStatsBreakdown.InitialReferringDomain: return ast.Field(chain=["person", "properties", "$initial_referring_domain"]) case WebStatsBreakdown.InitialUTMSource: @@ -233,9 +228,15 @@ def bounce_breakdown(self): match self.query.breakdownBy: case WebStatsBreakdown.Page: # use initial pathname for bounce rate - return ast.Call(name="any", args=[ast.Field(chain=["person", "properties", "$initial_pathname"])]) + return self._apply_path_cleaning( + ast.Call(name="any", args=[ast.Field(chain=["person", "properties", "$initial_pathname"])]) + ) case WebStatsBreakdown.InitialChannelType: raise NotImplementedError("Breakdown InitialChannelType not implemented") + case WebStatsBreakdown.InitialPage: + return self._apply_path_cleaning( + ast.Call(name="any", args=[ast.Field(chain=["person", "properties", "$initial_pathname"])]) + ) case _: return ast.Call(name="any", args=[self.counts_breakdown()]) @@ -364,3 +365,19 @@ def to_channel_query(self): ) return top_sources_query + + def _apply_path_cleaning(self, path_expr: ast.Expr) -> ast.Expr: + if not self.query.doPathCleaning or not self.team.path_cleaning_filters: + return path_expr + + for replacement in self.team.path_cleaning_filter_models(): + path_expr = ast.Call( + name="replaceRegexpAll", + args=[ + path_expr, + ast.Constant(value=replacement.regex), + ast.Constant(value=replacement.alias), + ], + ) + + return path_expr diff --git a/posthog/hogql_queries/web_analytics/test/test_web_stats_table.py b/posthog/hogql_queries/web_analytics/test/test_web_stats_table.py index 041121b4055a68..8705bc7a9796dc 100644 --- a/posthog/hogql_queries/web_analytics/test/test_web_stats_table.py +++ b/posthog/hogql_queries/web_analytics/test/test_web_stats_table.py @@ -35,13 +35,17 @@ def _create_events(self, data, event="$pageview"): ) return person_result - def _run_web_stats_table_query(self, date_from, date_to, breakdown_by=WebStatsBreakdown.Page, limit=None): + def _run_web_stats_table_query( + self, date_from, date_to, breakdown_by=WebStatsBreakdown.Page, limit=None, path_cleaning_filters=None + ): query = WebStatsTableQuery( dateRange=DateRange(date_from=date_from, date_to=date_to), properties=[], breakdownBy=breakdown_by, limit=limit, + doPathCleaning=bool(path_cleaning_filters), ) + self.team.path_cleaning_filters = path_cleaning_filters or [] runner = WebStatsTableQueryRunner(team=self.team, query=query) return runner.calculate() @@ -141,3 +145,35 @@ def test_limit(self): response_2.results, ) self.assertEqual(False, response_2.hasMore) + + def test_path_filters(self): + self._create_events( + [ + ("p1", [("2023-12-02", "s1", "/cleaned/123/path/456")]), + ("p2", [("2023-12-10", "s2", "/cleaned/123")]), + ("p3", [("2023-12-10", "s3", "/cleaned/456")]), + ("p4", [("2023-12-11", "s4", "/not-cleaned")]), + ("p5", [("2023-12-11", "s5", "/thing_a")]), + ] + ) + + results = self._run_web_stats_table_query( + "all", + "2023-12-15", + path_cleaning_filters=[ + {"regex": "\\/cleaned\\/\\d+", "alias": "/cleaned/:id"}, + {"regex": "\\/path\\/\\d+", "alias": "/path/:id"}, + {"regex": "thing_a", "alias": "thing_b"}, + {"regex": "thing_b", "alias": "thing_c"}, + ], + ).results + + self.assertEqual( + [ + ["/cleaned/:id", 2, 2], + ["/thing_c", 1, 1], + ["/not-cleaned", 1, 1], + ["/cleaned/:id/path/:id", 1, 1], + ], + results, + ) diff --git a/posthog/hogql_queries/web_analytics/web_analytics_query_runner.py b/posthog/hogql_queries/web_analytics/web_analytics_query_runner.py index ad9d568d6c1077..e20a2810274a91 100644 --- a/posthog/hogql_queries/web_analytics/web_analytics_query_runner.py +++ b/posthog/hogql_queries/web_analytics/web_analytics_query_runner.py @@ -1,3 +1,4 @@ +import typing from abc import ABC from datetime import timedelta from math import ceil @@ -228,6 +229,10 @@ def _unsample(self, n: Optional[int | float]): else n / self._sample_rate.numerator ) + def _cache_key(self) -> str: + original = super()._cache_key() + return f"{original}_{self.team.path_cleaning_filters}" + def _sample_rate_from_count(count: int) -> SamplingRate: # Change the sample rate so that the query will sample about 100_000 to 1_000_000 events, but use defined steps of @@ -239,3 +244,7 @@ def _sample_rate_from_count(count: int) -> SamplingRate: if count / sample_target >= step: return SamplingRate(numerator=1, denominator=step) return SamplingRate(numerator=1) + + +def map_columns(results, mapper: dict[int, typing.Callable]): + return [[mapper[i](data) if i in mapper else data for i, data in enumerate(row)] for row in results] diff --git a/posthog/models/activity_logging/activity_log.py b/posthog/models/activity_logging/activity_log.py index a02889b17ce55b..074b53b2dd55b7 100644 --- a/posthog/models/activity_logging/activity_log.py +++ b/posthog/models/activity_logging/activity_log.py @@ -1,6 +1,7 @@ import dataclasses import json from datetime import datetime +from decimal import Decimal from typing import Any, Dict, List, Literal, Optional, Union import structlog @@ -8,6 +9,7 @@ from django.db import models from django.utils import timezone from django.conf import settings + from posthog.models.dashboard import Dashboard from posthog.models.dashboard_tile import DashboardTile from posthog.models.user import User @@ -32,6 +34,7 @@ "EarlyAccessFeature", "SessionRecordingPlaylist", "Comment", + "Team", ] ChangeAction = Literal["changed", "created", "deleted", "merged", "split", "exported"] @@ -73,6 +76,12 @@ def default(self, obj): return str(obj) if isinstance(obj, User): return {"first_name": obj.first_name, "email": obj.email} + if isinstance(obj, float): + # more precision than we'll need but avoids rounding too unnecessarily + return format(obj, ".6f").rstrip("0").rstrip(".") + if isinstance(obj, Decimal): + # more precision than we'll need but avoids rounding too unnecessarily + return format(obj, ".6f").rstrip("0").rstrip(".") return json.JSONEncoder.default(self, obj) @@ -186,6 +195,7 @@ class Meta: "post_to_slack", "property_type_format", ], + "Team": ["uuid", "updated_at", "api_token", "created_at", "id"], } diff --git a/posthog/models/exported_asset.py b/posthog/models/exported_asset.py index 675245e867634a..81cd9d024a6bc6 100644 --- a/posthog/models/exported_asset.py +++ b/posthog/models/exported_asset.py @@ -45,6 +45,8 @@ class ExportFormat(models.TextChoices): PDF = "application/pdf", "application/pdf" CSV = "text/csv", "text/csv" + SUPPORTED_FORMATS = [ExportFormat.PNG, ExportFormat.CSV] + # Relations team: models.ForeignKey = models.ForeignKey("Team", on_delete=models.CASCADE) dashboard = models.ForeignKey("posthog.Dashboard", on_delete=models.CASCADE, null=True) @@ -136,10 +138,7 @@ def get_content_response(asset: ExportedAsset, download: bool = False): content = object_storage.read_bytes(asset.content_location) if not content: - # if we don't have content, the asset is invalid, so, expire it - asset.expires_after = now() - asset.save() - + # Don't modify the asset here as the task might still be running concurrently raise NotFound() res = HttpResponse(content, content_type=asset.export_format) diff --git a/posthog/models/test/test_exported_asset_model.py b/posthog/models/test/test_exported_asset_model.py index f17808caadd548..f2a493cc7cc449 100644 --- a/posthog/models/test/test_exported_asset_model.py +++ b/posthog/models/test/test_exported_asset_model.py @@ -1,11 +1,8 @@ from datetime import datetime, timedelta -import pytest -from dateutil.parser import isoparse from freezegun import freeze_time -from rest_framework.exceptions import NotFound -from posthog.models.exported_asset import ExportedAsset, get_content_response +from posthog.models.exported_asset import ExportedAsset from posthog.test.base import APIBaseTest @@ -76,21 +73,3 @@ def test_delete_expired_assets(self) -> None: asset_that_is_not_expired, asset_that_has_no_expiry, ] - - @freeze_time("2021-01-01T12:00:00Z") - def test_invalid_exported_asset_is_expired_on_access(self) -> None: - asset: ExportedAsset = ExportedAsset.objects.create( - team=self.team, - created_by=self.user, - expires_after=datetime.now() + timedelta(seconds=100), - # an invalid asset is one that has no local or remote content - content=None, - content_location=None, - ) - - assert asset.expires_after != isoparse("2021-01-01T12:00:00Z") - - with pytest.raises(NotFound): - get_content_response(asset, False) - - assert asset.expires_after == isoparse("2021-01-01T12:00:00Z") diff --git a/posthog/schema.py b/posthog/schema.py index 324f601e1142f2..328f61346b2b38 100644 --- a/posthog/schema.py +++ b/posthog/schema.py @@ -1547,6 +1547,7 @@ class WebStatsTableQuery(BaseModel): ) breakdownBy: WebStatsBreakdown dateRange: Optional[DateRange] = None + doPathCleaning: Optional[bool] = None includeBounceRate: Optional[bool] = None includeScrollDepth: Optional[bool] = None kind: Literal["WebStatsTableQuery"] = "WebStatsTableQuery" diff --git a/posthog/tasks/exporter.py b/posthog/tasks/exporter.py index 097010027717f5..044e27556b5d22 100644 --- a/posthog/tasks/exporter.py +++ b/posthog/tasks/exporter.py @@ -3,6 +3,8 @@ from celery import shared_task from prometheus_client import Counter, Histogram +from django.db import transaction + from posthog import settings from posthog.models import ExportedAsset from posthog.tasks.utils import CeleryQueue @@ -37,23 +39,21 @@ # export_asset is used in chords/groups and so must not ignore its results @shared_task( - autoretry_for=(Exception,), - max_retries=5, - retry_backoff=True, acks_late=True, ignore_result=False, time_limit=settings.ASSET_GENERATION_MAX_TIMEOUT_SECONDS, queue=CeleryQueue.EXPORTS.value, ) +@transaction.atomic def export_asset(exported_asset_id: int, limit: Optional[int] = None) -> None: from posthog.tasks.exports import csv_exporter, image_exporter # if Celery is lagging then you can end up with an exported asset that has had a TTL added # and that TTL has passed, in the exporter we don't care about that. # the TTL is for later cleanup. - exported_asset: ExportedAsset = ExportedAsset.objects_including_ttl_deleted.select_related( - "insight", "dashboard" - ).get(pk=exported_asset_id) + exported_asset: ExportedAsset = ExportedAsset.objects_including_ttl_deleted.select_for_update().get( + pk=exported_asset_id + ) is_csv_export = exported_asset.export_format == ExportedAsset.ExportFormat.CSV if is_csv_export: diff --git a/posthog/tasks/exports/csv_exporter.py b/posthog/tasks/exports/csv_exporter.py index 83cf709e17bd1c..e9dce8800cb14d 100644 --- a/posthog/tasks/exports/csv_exporter.py +++ b/posthog/tasks/exports/csv_exporter.py @@ -170,6 +170,16 @@ def _convert_response_to_csv_data(data: Any) -> List[Any]: return csv_rows else: return items + elif data.get("result") and isinstance(data.get("result"), dict): + result = data["result"] + + if "bins" not in result: + return [] + + csv_rows = [] + for key, value in result["bins"]: + csv_rows.append({"bin": key, "value": value}) + return csv_rows return [] @@ -178,7 +188,7 @@ class UnexpectedEmptyJsonResponse(Exception): pass -def _export_to_csv(exported_asset: ExportedAsset, limit: int = 1000) -> None: +def _export_to_csv(exported_asset: ExportedAsset, limit: int) -> None: resource = exported_asset.export_context columns: List[str] = resource.get("columns", []) @@ -203,14 +213,6 @@ def _export_to_csv(exported_asset: ExportedAsset, limit: int = 1000) -> None: while len(all_csv_rows) < CSV_EXPORT_LIMIT: response = make_api_call(access_token, body, limit, method, next_url, path) - if response.status_code != 200: - # noinspection PyBroadException - try: - response_json = response.json() - except Exception: - response_json = "no response json to parse" - raise Exception(f"export API call failed with status_code: {response.status_code}. {response_json}") - # Figure out how to handle funnel polling.... data = response.json() @@ -247,6 +249,10 @@ def _export_to_csv(exported_asset: ExportedAsset, limit: int = 1000) -> None: if columns: render_context["header"] = columns + # Fallback if empty to produce any CSV at all to distinguish from a failed export + if not all_csv_rows: + all_csv_rows = [{}] + rendered_csv_content = renderer.render(all_csv_rows, renderer_context=render_context) save_content(exported_asset, rendered_csv_content) @@ -266,43 +272,34 @@ def make_api_call( path: str, ) -> requests.models.Response: request_url: str = absolute_uri(next_url or path) - try: - url = add_query_params( - request_url, - {get_limit_param_key(request_url): str(limit), "is_csv_export": "1"}, - ) - response = requests.request( - method=method.lower(), - url=url, - json=body, - headers={"Authorization": f"Bearer {access_token}"}, - ) - return response - except Exception as ex: - logger.error( - "csv_exporter.error_making_api_call", - exc=ex, - exc_info=True, - next_url=next_url, - path=path, - request_url=request_url, - limit=limit, - ) - raise ex + params: dict[str, str | int] = { + get_limit_param_key(request_url): limit, + "is_csv_export": "1", + } + response = requests.request( + method=method.lower(), + url=request_url, + params=params, + json=body, + headers={"Authorization": f"Bearer {access_token}"}, + timeout=60, + ) + response.raise_for_status() + return response def export_csv(exported_asset: ExportedAsset, limit: Optional[int] = None) -> None: if not limit: - limit = 1000 + limit = 200 # Too high limit makes e.g. queries with long breakdown values too long and fail try: - if exported_asset.export_format == "text/csv": - with EXPORT_TIMER.labels(type="csv").time(): - _export_to_csv(exported_asset, limit) - EXPORT_SUCCEEDED_COUNTER.labels(type="csv").inc() - else: + if exported_asset.export_format != "text/csv": EXPORT_ASSET_UNKNOWN_COUNTER.labels(type="csv").inc() raise NotImplementedError(f"Export to format {exported_asset.export_format} is not supported") + + with EXPORT_TIMER.labels(type="csv").time(): + _export_to_csv(exported_asset, limit) + EXPORT_SUCCEEDED_COUNTER.labels(type="csv").inc() except Exception as e: if exported_asset: team_id = str(exported_asset.team.id) diff --git a/posthog/tasks/exports/test/test_csv_exporter.py b/posthog/tasks/exports/test/test_csv_exporter.py index 3570d194400218..dd547531b55729 100644 --- a/posthog/tasks/exports/test/test_csv_exporter.py +++ b/posthog/tasks/exports/test/test_csv_exporter.py @@ -239,7 +239,9 @@ def test_csv_exporter_limits_breakdown_insights_correctly( mocked_request.assert_called_with( method="get", - url="http://testserver/" + path + "&breakdown_limit=1000&is_csv_export=1", + url="http://testserver/" + path, + params={"breakdown_limit": 200, "is_csv_export": "1"}, + timeout=60, json=None, headers=ANY, ) @@ -250,10 +252,11 @@ def test_failing_export_api_is_reported(self, _mock_logger: MagicMock) -> None: exported_asset = self._create_asset() mock_response = MagicMock() mock_response.status_code = 403 + mock_response.raise_for_status.side_effect = Exception("HTTP 403 Forbidden") mock_response.ok = False patched_request.return_value = mock_response - with pytest.raises(Exception, match="export API call failed with status_code: 403"): + with pytest.raises(Exception, match="HTTP 403 Forbidden"): csv_exporter.export_csv(exported_asset) def test_limiting_query_as_expected(self) -> None: diff --git a/posthog/temporal/data_imports/pipelines/hubspot/__init__.py b/posthog/temporal/data_imports/pipelines/hubspot/__init__.py index 1ca0d9972ce5b1..f23b305d00a138 100644 --- a/posthog/temporal/data_imports/pipelines/hubspot/__init__.py +++ b/posthog/temporal/data_imports/pipelines/hubspot/__init__.py @@ -112,11 +112,11 @@ def crm_objects( props = ",".join(sorted(list(set(props)))) - if len(props) > 5000: + if len(props) > 10000: raise ValueError( ( "Your request to Hubspot is too long to process. " - "Maximum allowed query length is 2000 symbols, while " + "Maximum allowed query length is 10000 symbols, while " f"your list of properties `{props[:200]}`... is {len(props)} " "symbols long. Use the `props` argument of the resource to " "set the list of properties to extract from the endpoint."