diff --git a/.github/workflows/ci-backend-depot.yml b/.github/workflows/ci-backend-depot.yml index 928886d44cf52..29cf80ddab67c 100644 --- a/.github/workflows/ci-backend-depot.yml +++ b/.github/workflows/ci-backend-depot.yml @@ -33,7 +33,7 @@ jobs: changes: runs-on: depot-ubuntu-latest-4 timeout-minutes: 5 - if: ${{ contains(github.event.pull_request.labels.*.name, 'test-depot') }} + if: github.repository == 'PostHog/posthog' name: Determine need to run backend checks # Set job outputs to values from filter step outputs: diff --git a/.github/workflows/ci-e2e-depot.yml b/.github/workflows/ci-e2e-depot.yml index 697d42e97f945..4985dac9d746a 100644 --- a/.github/workflows/ci-e2e-depot.yml +++ b/.github/workflows/ci-e2e-depot.yml @@ -16,7 +16,7 @@ jobs: changes: runs-on: depot-ubuntu-latest-4 timeout-minutes: 5 - if: ${{ contains(github.event.pull_request.labels.*.name, 'test-depot') }} + if: github.repository == 'PostHog/posthog' name: Determine need to run E2E checks # Set job outputs to values from filter step outputs: diff --git a/cypress/e2e/featureFlags.cy.ts b/cypress/e2e/featureFlags.cy.ts index a60f391f93325..78f0bcd0ab8bd 100644 --- a/cypress/e2e/featureFlags.cy.ts +++ b/cypress/e2e/featureFlags.cy.ts @@ -4,13 +4,7 @@ describe('Feature Flags', () => { let name beforeEach(() => { - cy.intercept('**/decide/*', (req) => - req.reply( - decideResponse({ - 'new-feature-flag-operators': true, - }) - ) - ) + cy.intercept('**/decide/*', (req) => req.reply(decideResponse({}))) cy.intercept('/api/projects/*/property_definitions?type=person*', { fixture: 'api/feature-flags/property_definition', @@ -116,7 +110,7 @@ describe('Feature Flags', () => { cy.get('.Toastify').contains('Undo').should('be.visible') }) - it.only('Move between property types smoothly, and support relative dates', () => { + it('Move between property types smoothly, and support relative dates', () => { // ensure unique names to avoid clashes cy.get('[data-attr=top-bar-name]').should('contain', 'Feature flags') cy.get('[data-attr=new-feature-flag]').click() diff --git a/cypress/support/e2e.ts b/cypress/support/e2e.ts index 6785e5bd69f0b..87f1dfd127b1f 100644 --- a/cypress/support/e2e.ts +++ b/cypress/support/e2e.ts @@ -32,7 +32,6 @@ beforeEach(() => { // set feature flags here e.g. // 'toolbar-launch-side-action': true, 'surveys-new-creation-flow': true, - 'surveys-results-visualizations': true, 'auto-redirect': true, hogql: true, 'data-exploration-insights': true, diff --git a/ee/frontend/mobile-replay/__mocks__/increment-with-child-duplication.json b/ee/frontend/mobile-replay/__mocks__/increment-with-child-duplication.json index c17efc6d9e246..7ffc2e5f38e5c 100644 --- a/ee/frontend/mobile-replay/__mocks__/increment-with-child-duplication.json +++ b/ee/frontend/mobile-replay/__mocks__/increment-with-child-duplication.json @@ -191,6 +191,13 @@ "x": 66, "y": 556 } + }, + { + "parentId": 209272202, + "wireframe": { + "id": 52129787123, + "type": "text" + } } ], "removes": [ diff --git a/ee/frontend/mobile-replay/__snapshots__/transform.test.ts.snap b/ee/frontend/mobile-replay/__snapshots__/transform.test.ts.snap index a421f7ff220bf..bbde91f8defc6 100644 --- a/ee/frontend/mobile-replay/__snapshots__/transform.test.ts.snap +++ b/ee/frontend/mobile-replay/__snapshots__/transform.test.ts.snap @@ -160,6 +160,147 @@ exports[`replay/transform transform can convert images 1`] = ` ] `; +exports[`replay/transform transform can convert invalid text wireframe 1`] = ` +[ + { + "data": { + "height": 600, + "href": "", + "width": 300, + }, + "timestamp": 1, + "type": 4, + }, + { + "data": { + "initialOffset": { + "left": 0, + "top": 0, + }, + "node": { + "childNodes": [ + { + "id": 2, + "name": "html", + "publicId": "", + "systemId": "", + "type": 1, + }, + { + "attributes": { + "data-rrweb-id": 3, + "style": "height: 100vh; width: 100vw;", + }, + "childNodes": [ + { + "attributes": { + "data-rrweb-id": 4, + }, + "childNodes": [ + { + "attributes": { + "type": "text/css", + }, + "childNodes": [ + { + "id": 102, + "textContent": " + body { + margin: unset; + } + input, button, select, textarea { + font: inherit; + margin: 0; + padding: 0; + border: 0; + outline: 0; + background: transparent; + padding-block: 0 !important; + } + .input:focus { + outline: none; + } + img { + border-style: none; + } + ", + "type": 3, + }, + ], + "id": 101, + "tagName": "style", + "type": 2, + }, + ], + "id": 4, + "tagName": "head", + "type": 2, + }, + { + "attributes": { + "data-rrweb-id": 5, + "style": "height: 100vh; width: 100vw;", + }, + "childNodes": [ + { + "attributes": { + "data-rrweb-id": 12345, + "style": "border-width: 4px;border-radius: 10px;border-color: #ee3ee4;border-style: solid;color: #ee3ee4;width: 100px;height: 30px;position: fixed;left: 11px;top: 12px;overflow:hidden;white-space:normal;", + }, + "childNodes": [], + "id": 12345, + "tagName": "div", + "type": 2, + }, + { + "attributes": { + "data-render-reason": "a fixed placeholder to contain the keyboard in the correct stacking position", + "data-rrweb-id": 9, + }, + "childNodes": [], + "id": 9, + "tagName": "div", + "type": 2, + }, + { + "attributes": { + "data-rrweb-id": 7, + }, + "childNodes": [], + "id": 7, + "tagName": "div", + "type": 2, + }, + { + "attributes": { + "data-rrweb-id": 11, + }, + "childNodes": [], + "id": 11, + "tagName": "div", + "type": 2, + }, + ], + "id": 5, + "tagName": "body", + "type": 2, + }, + ], + "id": 3, + "tagName": "html", + "type": 2, + }, + ], + "id": 1, + "type": 0, + }, + }, + "timestamp": 1, + "type": 2, + }, +] +`; + exports[`replay/transform transform can convert navigation bar 1`] = ` [ { @@ -1453,6 +1594,20 @@ exports[`replay/transform transform incremental mutations de-duplicate the tree }, "parentId": 52129787, }, + { + "nextId": null, + "node": { + "attributes": { + "data-rrweb-id": 52129787123, + "style": "position: fixed;left: 0px;top: 0px;overflow:hidden;white-space:normal;", + }, + "childNodes": [], + "id": 52129787123, + "tagName": "div", + "type": 2, + }, + "parentId": 209272202, + }, ], "attributes": [], "removes": [ @@ -1689,6 +1844,13 @@ AAAAAAAAAAAAAAAAAAAAAAAAgCN/AW0xMqHnNQceAAAAAElFTkSuQmCC "y": 556, }, }, + { + "parentId": 209272202, + "wireframe": { + "id": 52129787123, + "type": "text", + }, + }, ], "removes": [ { diff --git a/ee/frontend/mobile-replay/transform.test.ts b/ee/frontend/mobile-replay/transform.test.ts index 788bb65655d3d..92384e48b2986 100644 --- a/ee/frontend/mobile-replay/transform.test.ts +++ b/ee/frontend/mobile-replay/transform.test.ts @@ -480,6 +480,43 @@ describe('replay/transform', () => { expect(converted).toMatchSnapshot() }) + test('can convert invalid text wireframe', () => { + const converted = posthogEEModule.mobileReplay?.transformToWeb([ + { + data: { + width: 300, + height: 600, + }, + timestamp: 1, + type: 4, + }, + { + type: 2, + data: { + wireframes: [ + { + id: 12345, + type: 'text', + x: 11, + y: 12, + width: 100, + height: 30, + style: { + color: '#ee3ee4', + borderColor: '#ee3ee4', + borderWidth: '4', + borderRadius: '10px', + }, + // text property is missing + }, + ], + }, + timestamp: 1, + }, + ]) + expect(converted).toMatchSnapshot() + }) + test('can set background image to base64 png', () => { const converted = posthogEEModule.mobileReplay?.transformToWeb([ { diff --git a/ee/frontend/mobile-replay/transformer/transformers.ts b/ee/frontend/mobile-replay/transformer/transformers.ts index 1527a24d7dbeb..f2b7324a475fa 100644 --- a/ee/frontend/mobile-replay/transformer/transformers.ts +++ b/ee/frontend/mobile-replay/transformer/transformers.ts @@ -105,6 +105,10 @@ export function _isPositiveInteger(id: unknown): id is number { return typeof id === 'number' && id > 0 && id % 1 === 0 } +function _isNullish(x: unknown): x is null | undefined { + return x === null || x === undefined +} + function isRemovedNodeMutation(x: addedNodeMutation | removedNodeMutation): x is removedNodeMutation { return isObject(x) && 'id' in x } @@ -218,6 +222,17 @@ function makeTextElement( // because we might have to style the text, we always wrap it in a div // and apply styles to that const id = context.idSequence.next().value + + const childNodes = [...children] + if (!_isNullish(wireframe.text)) { + childNodes.unshift({ + type: NodeType.Text, + textContent: wireframe.text, + // since the text node is wrapped, we assign it a synthetic id + id, + }) + } + return { result: { type: NodeType.Element, @@ -227,15 +242,7 @@ function makeTextElement( 'data-rrweb-id': wireframe.id, }, id: wireframe.id, - childNodes: [ - { - type: NodeType.Text, - textContent: wireframe.text, - // since the text node is wrapped, we assign it a synthetic id - id: id, - }, - ...children, - ], + childNodes, }, context, } @@ -983,6 +990,7 @@ function isMobileIncrementalSnapshotEvent(x: unknown): x is MobileIncrementalSna function makeIncrementalAdd(add: MobileNodeMutation, context: ConversionContext): addedNodeMutation[] | null { const converted = convertWireframe(add.wireframe, context) + if (!converted) { return null } diff --git a/frontend/__snapshots__/scenes-app-experiments--complete-funnel-experiment--dark.png b/frontend/__snapshots__/scenes-app-experiments--complete-funnel-experiment--dark.png index a3170412155a9..c94024858f014 100644 Binary files a/frontend/__snapshots__/scenes-app-experiments--complete-funnel-experiment--dark.png and b/frontend/__snapshots__/scenes-app-experiments--complete-funnel-experiment--dark.png differ diff --git a/frontend/__snapshots__/scenes-app-experiments--complete-funnel-experiment--light.png b/frontend/__snapshots__/scenes-app-experiments--complete-funnel-experiment--light.png index e8a50e37eebb7..27fe79337869e 100644 Binary files a/frontend/__snapshots__/scenes-app-experiments--complete-funnel-experiment--light.png and b/frontend/__snapshots__/scenes-app-experiments--complete-funnel-experiment--light.png differ diff --git a/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right--dark--webkit.png b/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right--dark--webkit.png index 076bbf95dd5f2..342604e30b9da 100644 Binary files a/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right--dark--webkit.png and b/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right--dark--webkit.png differ diff --git a/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right--dark.png b/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right--dark.png index ac37058ebc358..6fc5f450fcf0c 100644 Binary files a/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right--dark.png and b/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right--dark.png differ diff --git a/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right--light--webkit.png b/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right--light--webkit.png index 9f03b27692951..6d92b76f76832 100644 Binary files a/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right--light--webkit.png and b/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right--light--webkit.png differ diff --git a/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right--light.png b/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right--light.png index 6b2954224253c..b9399d595b1e6 100644 Binary files a/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right--light.png and b/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right--light.png differ diff --git a/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-breakdown--dark--webkit.png b/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-breakdown--dark--webkit.png index 4f5951d4022dc..1c8d0e37c4c3b 100644 Binary files a/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-breakdown--dark--webkit.png and b/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-breakdown--dark--webkit.png differ diff --git a/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-breakdown--dark.png b/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-breakdown--dark.png index a213a4a4dbe41..beb778892dc98 100644 Binary files a/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-breakdown--dark.png and b/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-breakdown--dark.png differ diff --git a/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-breakdown--light--webkit.png b/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-breakdown--light--webkit.png index e5b86ed13abf0..55ca04bf91a1e 100644 Binary files a/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-breakdown--light--webkit.png and b/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-breakdown--light--webkit.png differ diff --git a/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-breakdown--light.png b/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-breakdown--light.png index ef84a29eea45f..8ebee9031d744 100644 Binary files a/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-breakdown--light.png and b/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-breakdown--light.png differ diff --git a/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-breakdown-edit--dark--webkit.png b/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-breakdown-edit--dark--webkit.png index c895df395644b..88c2b14c8b0b9 100644 Binary files a/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-breakdown-edit--dark--webkit.png and b/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-breakdown-edit--dark--webkit.png differ diff --git a/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-breakdown-edit--dark.png b/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-breakdown-edit--dark.png index f286b0fbe282d..f759b15b33464 100644 Binary files a/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-breakdown-edit--dark.png and b/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-breakdown-edit--dark.png differ diff --git a/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-breakdown-edit--light--webkit.png b/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-breakdown-edit--light--webkit.png index 5f06ddcb4e2a9..3a69b9e83707d 100644 Binary files a/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-breakdown-edit--light--webkit.png and b/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-breakdown-edit--light--webkit.png differ diff --git a/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-breakdown-edit--light.png b/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-breakdown-edit--light.png index 4bbcbba2ecc43..663607b53e8ea 100644 Binary files a/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-breakdown-edit--light.png and b/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-breakdown-edit--light.png differ diff --git a/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-edit--dark--webkit.png b/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-edit--dark--webkit.png index 5b8cec66360e4..716ea6837904d 100644 Binary files a/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-edit--dark--webkit.png and b/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-edit--dark--webkit.png differ diff --git a/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-edit--dark.png b/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-edit--dark.png index 0ed6f0abedd6e..6bc9e246d68f8 100644 Binary files a/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-edit--dark.png and b/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-edit--dark.png differ diff --git a/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-edit--light--webkit.png b/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-edit--light--webkit.png index f207448633c95..5f1f1d92437b1 100644 Binary files a/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-edit--light--webkit.png and b/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-edit--light--webkit.png differ diff --git a/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-edit--light.png b/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-edit--light.png index 7cf5ae0c0fa08..d2181932c7f05 100644 Binary files a/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-edit--light.png and b/frontend/__snapshots__/scenes-app-insights--funnel-left-to-right-edit--light.png differ diff --git a/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom--dark--webkit.png b/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom--dark--webkit.png index 1bd2d3cb65439..59c4fad04bb91 100644 Binary files a/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom--dark--webkit.png and b/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom--dark--webkit.png differ diff --git a/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom--dark.png b/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom--dark.png index f7b3ed2806926..07b63f63955a0 100644 Binary files a/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom--dark.png and b/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom--dark.png differ diff --git a/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom--light--webkit.png b/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom--light--webkit.png index 841d652c1b668..221765aaf0f35 100644 Binary files a/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom--light--webkit.png and b/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom--light--webkit.png differ diff --git a/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom--light.png b/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom--light.png index 15864cbd8c7c6..2eef62fcbf80c 100644 Binary files a/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom--light.png and b/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom--light.png differ diff --git a/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-breakdown--dark--webkit.png b/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-breakdown--dark--webkit.png index 734e97aa02c7f..cfacc6abdac7d 100644 Binary files a/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-breakdown--dark--webkit.png and b/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-breakdown--dark--webkit.png differ diff --git a/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-breakdown--dark.png b/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-breakdown--dark.png index ac6911fadb1af..32d5c490f4f9d 100644 Binary files a/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-breakdown--dark.png and b/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-breakdown--dark.png differ diff --git a/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-breakdown--light--webkit.png b/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-breakdown--light--webkit.png index eeb591feab5e9..d14b3f0256a8d 100644 Binary files a/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-breakdown--light--webkit.png and b/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-breakdown--light--webkit.png differ diff --git a/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-breakdown--light.png b/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-breakdown--light.png index 7067293aa1a1e..2ed59b8746a73 100644 Binary files a/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-breakdown--light.png and b/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-breakdown--light.png differ diff --git a/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-breakdown-edit--dark--webkit.png b/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-breakdown-edit--dark--webkit.png index 5f62b18cf66e2..902a8106b2ecf 100644 Binary files a/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-breakdown-edit--dark--webkit.png and b/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-breakdown-edit--dark--webkit.png differ diff --git a/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-breakdown-edit--light--webkit.png b/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-breakdown-edit--light--webkit.png index f08d761957ebd..535a804029e3b 100644 Binary files a/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-breakdown-edit--light--webkit.png and b/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-breakdown-edit--light--webkit.png differ diff --git a/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-edit--dark--webkit.png b/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-edit--dark--webkit.png index 2bbeb51f8515f..1d78dabd80855 100644 Binary files a/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-edit--dark--webkit.png and b/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-edit--dark--webkit.png differ diff --git a/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-edit--dark.png b/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-edit--dark.png index 43a8dd18ccad5..44cc3fcc624c7 100644 Binary files a/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-edit--dark.png and b/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-edit--dark.png differ diff --git a/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-edit--light--webkit.png b/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-edit--light--webkit.png index 3cae496371812..75a57df723c0c 100644 Binary files a/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-edit--light--webkit.png and b/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-edit--light--webkit.png differ diff --git a/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-edit--light.png b/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-edit--light.png index ba97a5b9298bc..ba38c8ca035a7 100644 Binary files a/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-edit--light.png and b/frontend/__snapshots__/scenes-app-insights--funnel-top-to-bottom-edit--light.png differ diff --git a/frontend/__snapshots__/scenes-app-insights--lifecycle-edit--light--webkit.png b/frontend/__snapshots__/scenes-app-insights--lifecycle-edit--light--webkit.png index ba877420fbab8..513177e17b02e 100644 Binary files a/frontend/__snapshots__/scenes-app-insights--lifecycle-edit--light--webkit.png and b/frontend/__snapshots__/scenes-app-insights--lifecycle-edit--light--webkit.png differ diff --git a/frontend/__snapshots__/scenes-app-surveys--new-survey-customisation-section--dark.png b/frontend/__snapshots__/scenes-app-surveys--new-survey-customisation-section--dark.png index 6933b1ab628b3..b24a61098927e 100644 Binary files a/frontend/__snapshots__/scenes-app-surveys--new-survey-customisation-section--dark.png and b/frontend/__snapshots__/scenes-app-surveys--new-survey-customisation-section--dark.png differ diff --git a/frontend/__snapshots__/scenes-app-surveys--new-survey-customisation-section--light.png b/frontend/__snapshots__/scenes-app-surveys--new-survey-customisation-section--light.png index f5102f58516f8..4fab34860029c 100644 Binary files a/frontend/__snapshots__/scenes-app-surveys--new-survey-customisation-section--light.png and b/frontend/__snapshots__/scenes-app-surveys--new-survey-customisation-section--light.png differ diff --git a/frontend/__snapshots__/scenes-other-invitesignup--cloud--dark.png b/frontend/__snapshots__/scenes-other-invitesignup--cloud--dark.png index 10c71817bc470..4946aedfeb38a 100644 Binary files a/frontend/__snapshots__/scenes-other-invitesignup--cloud--dark.png and b/frontend/__snapshots__/scenes-other-invitesignup--cloud--dark.png differ diff --git a/frontend/__snapshots__/scenes-other-invitesignup--cloud--light.png b/frontend/__snapshots__/scenes-other-invitesignup--cloud--light.png index 0952b5a96532a..b7eef2e8b826c 100644 Binary files a/frontend/__snapshots__/scenes-other-invitesignup--cloud--light.png and b/frontend/__snapshots__/scenes-other-invitesignup--cloud--light.png differ diff --git a/frontend/__snapshots__/scenes-other-invitesignup--cloud-eu--dark.png b/frontend/__snapshots__/scenes-other-invitesignup--cloud-eu--dark.png index dd4c5ed9fd9a8..6767462f173e0 100644 Binary files a/frontend/__snapshots__/scenes-other-invitesignup--cloud-eu--dark.png and b/frontend/__snapshots__/scenes-other-invitesignup--cloud-eu--dark.png differ diff --git a/frontend/__snapshots__/scenes-other-invitesignup--cloud-eu--light.png b/frontend/__snapshots__/scenes-other-invitesignup--cloud-eu--light.png index 0f3cdaf9bdd5e..18f965fbc6bf6 100644 Binary files a/frontend/__snapshots__/scenes-other-invitesignup--cloud-eu--light.png and b/frontend/__snapshots__/scenes-other-invitesignup--cloud-eu--light.png differ diff --git a/frontend/__snapshots__/scenes-other-invitesignup--invalid-link--dark.png b/frontend/__snapshots__/scenes-other-invitesignup--invalid-link--dark.png index 4a6a270e80ade..605ac3e495e9c 100644 Binary files a/frontend/__snapshots__/scenes-other-invitesignup--invalid-link--dark.png and b/frontend/__snapshots__/scenes-other-invitesignup--invalid-link--dark.png differ diff --git a/frontend/__snapshots__/scenes-other-invitesignup--invalid-link--light.png b/frontend/__snapshots__/scenes-other-invitesignup--invalid-link--light.png index 62d41c8a92f04..5108cb2db25da 100644 Binary files a/frontend/__snapshots__/scenes-other-invitesignup--invalid-link--light.png and b/frontend/__snapshots__/scenes-other-invitesignup--invalid-link--light.png differ diff --git a/frontend/__snapshots__/scenes-other-invitesignup--logged-in--dark.png b/frontend/__snapshots__/scenes-other-invitesignup--logged-in--dark.png index 1dc24c67f58cf..fc126241a2102 100644 Binary files a/frontend/__snapshots__/scenes-other-invitesignup--logged-in--dark.png and b/frontend/__snapshots__/scenes-other-invitesignup--logged-in--dark.png differ diff --git a/frontend/__snapshots__/scenes-other-invitesignup--logged-in--light.png b/frontend/__snapshots__/scenes-other-invitesignup--logged-in--light.png index 74bd780e5f791..8783f29981d49 100644 Binary files a/frontend/__snapshots__/scenes-other-invitesignup--logged-in--light.png and b/frontend/__snapshots__/scenes-other-invitesignup--logged-in--light.png differ diff --git a/frontend/__snapshots__/scenes-other-invitesignup--self-hosted--dark.png b/frontend/__snapshots__/scenes-other-invitesignup--self-hosted--dark.png index cea25c91e4463..1aa1af55b6ba0 100644 Binary files a/frontend/__snapshots__/scenes-other-invitesignup--self-hosted--dark.png and b/frontend/__snapshots__/scenes-other-invitesignup--self-hosted--dark.png differ diff --git a/frontend/__snapshots__/scenes-other-invitesignup--self-hosted--light.png b/frontend/__snapshots__/scenes-other-invitesignup--self-hosted--light.png index f2f87bcabf0e4..e11aec2c721fe 100644 Binary files a/frontend/__snapshots__/scenes-other-invitesignup--self-hosted--light.png and b/frontend/__snapshots__/scenes-other-invitesignup--self-hosted--light.png differ diff --git a/frontend/__snapshots__/scenes-other-login--cloud--dark.png b/frontend/__snapshots__/scenes-other-login--cloud--dark.png index 79be2f6da084b..7a14791959cef 100644 Binary files a/frontend/__snapshots__/scenes-other-login--cloud--dark.png and b/frontend/__snapshots__/scenes-other-login--cloud--dark.png differ diff --git a/frontend/__snapshots__/scenes-other-login--cloud--light.png b/frontend/__snapshots__/scenes-other-login--cloud--light.png index 089113c318132..187cb08b520a1 100644 Binary files a/frontend/__snapshots__/scenes-other-login--cloud--light.png and b/frontend/__snapshots__/scenes-other-login--cloud--light.png differ diff --git a/frontend/__snapshots__/scenes-other-login--cloud-eu--dark.png b/frontend/__snapshots__/scenes-other-login--cloud-eu--dark.png index 3759b88e7ce6c..1708f185a9db4 100644 Binary files a/frontend/__snapshots__/scenes-other-login--cloud-eu--dark.png and b/frontend/__snapshots__/scenes-other-login--cloud-eu--dark.png differ diff --git a/frontend/__snapshots__/scenes-other-login--cloud-eu--light.png b/frontend/__snapshots__/scenes-other-login--cloud-eu--light.png index 34ff210eb5352..2f675dc967660 100644 Binary files a/frontend/__snapshots__/scenes-other-login--cloud-eu--light.png and b/frontend/__snapshots__/scenes-other-login--cloud-eu--light.png differ diff --git a/frontend/__snapshots__/scenes-other-login--cloud-with-google-login-enforcement--dark.png b/frontend/__snapshots__/scenes-other-login--cloud-with-google-login-enforcement--dark.png index 5e3dc60309789..d0e0904bb1796 100644 Binary files a/frontend/__snapshots__/scenes-other-login--cloud-with-google-login-enforcement--dark.png and b/frontend/__snapshots__/scenes-other-login--cloud-with-google-login-enforcement--dark.png differ diff --git a/frontend/__snapshots__/scenes-other-login--cloud-with-google-login-enforcement--light.png b/frontend/__snapshots__/scenes-other-login--cloud-with-google-login-enforcement--light.png index e67a6a0e2ff33..0f51547c0ad9b 100644 Binary files a/frontend/__snapshots__/scenes-other-login--cloud-with-google-login-enforcement--light.png and b/frontend/__snapshots__/scenes-other-login--cloud-with-google-login-enforcement--light.png differ diff --git a/frontend/__snapshots__/scenes-other-login--second-factor--dark.png b/frontend/__snapshots__/scenes-other-login--second-factor--dark.png index 3be47d83ee98e..442c6d925ac8f 100644 Binary files a/frontend/__snapshots__/scenes-other-login--second-factor--dark.png and b/frontend/__snapshots__/scenes-other-login--second-factor--dark.png differ diff --git a/frontend/__snapshots__/scenes-other-login--second-factor--light.png b/frontend/__snapshots__/scenes-other-login--second-factor--light.png index 661e5be1063bb..7f2ee5e3636e9 100644 Binary files a/frontend/__snapshots__/scenes-other-login--second-factor--light.png and b/frontend/__snapshots__/scenes-other-login--second-factor--light.png differ diff --git a/frontend/__snapshots__/scenes-other-login--self-hosted--dark.png b/frontend/__snapshots__/scenes-other-login--self-hosted--dark.png index 4d54c69d31e1b..00219a94898ad 100644 Binary files a/frontend/__snapshots__/scenes-other-login--self-hosted--dark.png and b/frontend/__snapshots__/scenes-other-login--self-hosted--dark.png differ diff --git a/frontend/__snapshots__/scenes-other-login--self-hosted--light.png b/frontend/__snapshots__/scenes-other-login--self-hosted--light.png index f859af95307bb..12d1a15ba4fdb 100644 Binary files a/frontend/__snapshots__/scenes-other-login--self-hosted--light.png and b/frontend/__snapshots__/scenes-other-login--self-hosted--light.png differ diff --git a/frontend/__snapshots__/scenes-other-login--self-hosted-with-saml--dark.png b/frontend/__snapshots__/scenes-other-login--self-hosted-with-saml--dark.png index f83fd6633b093..752d4e4ee3c7d 100644 Binary files a/frontend/__snapshots__/scenes-other-login--self-hosted-with-saml--dark.png and b/frontend/__snapshots__/scenes-other-login--self-hosted-with-saml--dark.png differ diff --git a/frontend/__snapshots__/scenes-other-login--self-hosted-with-saml--light.png b/frontend/__snapshots__/scenes-other-login--self-hosted-with-saml--light.png index bc653908edc34..02c129a707f6d 100644 Binary files a/frontend/__snapshots__/scenes-other-login--self-hosted-with-saml--light.png and b/frontend/__snapshots__/scenes-other-login--self-hosted-with-saml--light.png differ diff --git a/frontend/__snapshots__/scenes-other-login--sso-error--dark.png b/frontend/__snapshots__/scenes-other-login--sso-error--dark.png index 2d4ed894b8f09..d3e245bf489c0 100644 Binary files a/frontend/__snapshots__/scenes-other-login--sso-error--dark.png and b/frontend/__snapshots__/scenes-other-login--sso-error--dark.png differ diff --git a/frontend/__snapshots__/scenes-other-login--sso-error--light.png b/frontend/__snapshots__/scenes-other-login--sso-error--light.png index 9702eef8a92dd..a6286b20801f7 100644 Binary files a/frontend/__snapshots__/scenes-other-login--sso-error--light.png and b/frontend/__snapshots__/scenes-other-login--sso-error--light.png differ diff --git a/frontend/__snapshots__/scenes-other-password-reset-complete--default--dark.png b/frontend/__snapshots__/scenes-other-password-reset-complete--default--dark.png index b3969f7948c77..81780bef94ddb 100644 Binary files a/frontend/__snapshots__/scenes-other-password-reset-complete--default--dark.png and b/frontend/__snapshots__/scenes-other-password-reset-complete--default--dark.png differ diff --git a/frontend/__snapshots__/scenes-other-password-reset-complete--default--light.png b/frontend/__snapshots__/scenes-other-password-reset-complete--default--light.png index cf50642150875..c721ccfce7107 100644 Binary files a/frontend/__snapshots__/scenes-other-password-reset-complete--default--light.png and b/frontend/__snapshots__/scenes-other-password-reset-complete--default--light.png differ diff --git a/frontend/__snapshots__/scenes-other-password-reset-complete--invalid-link--light.png b/frontend/__snapshots__/scenes-other-password-reset-complete--invalid-link--light.png index d94a85300a4bd..4e8728bcfada1 100644 Binary files a/frontend/__snapshots__/scenes-other-password-reset-complete--invalid-link--light.png and b/frontend/__snapshots__/scenes-other-password-reset-complete--invalid-link--light.png differ diff --git a/frontend/__snapshots__/scenes-other-preflight--preflight--dark.png b/frontend/__snapshots__/scenes-other-preflight--preflight--dark.png index 0f038280670e4..da76826c30718 100644 Binary files a/frontend/__snapshots__/scenes-other-preflight--preflight--dark.png and b/frontend/__snapshots__/scenes-other-preflight--preflight--dark.png differ diff --git a/frontend/__snapshots__/scenes-other-preflight--preflight--light.png b/frontend/__snapshots__/scenes-other-preflight--preflight--light.png index 1fb61449ce120..e312576737901 100644 Binary files a/frontend/__snapshots__/scenes-other-preflight--preflight--light.png and b/frontend/__snapshots__/scenes-other-preflight--preflight--light.png differ diff --git a/frontend/__snapshots__/scenes-other-settings--settings-user--dark.png b/frontend/__snapshots__/scenes-other-settings--settings-user--dark.png index fe51119bcbbf4..f68ff84645860 100644 Binary files a/frontend/__snapshots__/scenes-other-settings--settings-user--dark.png and b/frontend/__snapshots__/scenes-other-settings--settings-user--dark.png differ diff --git a/frontend/__snapshots__/scenes-other-settings--settings-user--light.png b/frontend/__snapshots__/scenes-other-settings--settings-user--light.png index 25bda7486cff0..b4a80fca60fc9 100644 Binary files a/frontend/__snapshots__/scenes-other-settings--settings-user--light.png and b/frontend/__snapshots__/scenes-other-settings--settings-user--light.png differ diff --git a/frontend/__snapshots__/scenes-other-signup--cloud--dark.png b/frontend/__snapshots__/scenes-other-signup--cloud--dark.png index 22483eefa81c9..3812da12f2360 100644 Binary files a/frontend/__snapshots__/scenes-other-signup--cloud--dark.png and b/frontend/__snapshots__/scenes-other-signup--cloud--dark.png differ diff --git a/frontend/__snapshots__/scenes-other-signup--cloud--light.png b/frontend/__snapshots__/scenes-other-signup--cloud--light.png index c527248055ffa..06af20259734b 100644 Binary files a/frontend/__snapshots__/scenes-other-signup--cloud--light.png and b/frontend/__snapshots__/scenes-other-signup--cloud--light.png differ diff --git a/frontend/__snapshots__/scenes-other-signup--self-hosted--dark.png b/frontend/__snapshots__/scenes-other-signup--self-hosted--dark.png index d5e0428cc0cef..4070b44e89d74 100644 Binary files a/frontend/__snapshots__/scenes-other-signup--self-hosted--dark.png and b/frontend/__snapshots__/scenes-other-signup--self-hosted--dark.png differ diff --git a/frontend/__snapshots__/scenes-other-signup--self-hosted--light.png b/frontend/__snapshots__/scenes-other-signup--self-hosted--light.png index a46e014a8fb4e..cf918675e9bfe 100644 Binary files a/frontend/__snapshots__/scenes-other-signup--self-hosted--light.png and b/frontend/__snapshots__/scenes-other-signup--self-hosted--light.png differ diff --git a/frontend/__snapshots__/scenes-other-signup--self-hosted-sso--dark.png b/frontend/__snapshots__/scenes-other-signup--self-hosted-sso--dark.png index 7744a05479fe6..cdb034b88aba3 100644 Binary files a/frontend/__snapshots__/scenes-other-signup--self-hosted-sso--dark.png and b/frontend/__snapshots__/scenes-other-signup--self-hosted-sso--dark.png differ diff --git a/frontend/__snapshots__/scenes-other-signup--self-hosted-sso--light.png b/frontend/__snapshots__/scenes-other-signup--self-hosted-sso--light.png index 3ce828b31ab1a..51591a33cb059 100644 Binary files a/frontend/__snapshots__/scenes-other-signup--self-hosted-sso--light.png and b/frontend/__snapshots__/scenes-other-signup--self-hosted-sso--light.png differ diff --git a/frontend/__snapshots__/scenes-other-toolbar--heatmap--dark.png b/frontend/__snapshots__/scenes-other-toolbar--heatmap--dark.png index cc32a507690ea..0724cce928ed6 100644 Binary files a/frontend/__snapshots__/scenes-other-toolbar--heatmap--dark.png and b/frontend/__snapshots__/scenes-other-toolbar--heatmap--dark.png differ diff --git a/frontend/__snapshots__/scenes-other-toolbar--heatmap--light.png b/frontend/__snapshots__/scenes-other-toolbar--heatmap--light.png index 9d5b574363ee9..f19ddfa738b17 100644 Binary files a/frontend/__snapshots__/scenes-other-toolbar--heatmap--light.png and b/frontend/__snapshots__/scenes-other-toolbar--heatmap--light.png differ diff --git a/frontend/__snapshots__/scenes-other-toolbar--heatmap-dark--dark.png b/frontend/__snapshots__/scenes-other-toolbar--heatmap-dark--dark.png index d9a8ef34ae661..53dd61dfec0b7 100644 Binary files a/frontend/__snapshots__/scenes-other-toolbar--heatmap-dark--dark.png and b/frontend/__snapshots__/scenes-other-toolbar--heatmap-dark--dark.png differ diff --git a/frontend/__snapshots__/scenes-other-toolbar--heatmap-dark--light.png b/frontend/__snapshots__/scenes-other-toolbar--heatmap-dark--light.png index 9cf7721957e98..1480c76289b9d 100644 Binary files a/frontend/__snapshots__/scenes-other-toolbar--heatmap-dark--light.png and b/frontend/__snapshots__/scenes-other-toolbar--heatmap-dark--light.png differ diff --git a/frontend/__snapshots__/scenes-other-unsubscribe--unsubscribe-scene--light.png b/frontend/__snapshots__/scenes-other-unsubscribe--unsubscribe-scene--light.png index 88a84bc3e21f3..2f60dada396b9 100644 Binary files a/frontend/__snapshots__/scenes-other-unsubscribe--unsubscribe-scene--light.png and b/frontend/__snapshots__/scenes-other-unsubscribe--unsubscribe-scene--light.png differ diff --git a/frontend/__snapshots__/scenes-other-verify-email--verify-email-invalid--dark.png b/frontend/__snapshots__/scenes-other-verify-email--verify-email-invalid--dark.png index a996167a4d6f2..dd975c4acc401 100644 Binary files a/frontend/__snapshots__/scenes-other-verify-email--verify-email-invalid--dark.png and b/frontend/__snapshots__/scenes-other-verify-email--verify-email-invalid--dark.png differ diff --git a/frontend/__snapshots__/scenes-other-verify-email--verify-email-invalid--light.png b/frontend/__snapshots__/scenes-other-verify-email--verify-email-invalid--light.png index fee2278660097..c45bfc650efc8 100644 Binary files a/frontend/__snapshots__/scenes-other-verify-email--verify-email-invalid--light.png and b/frontend/__snapshots__/scenes-other-verify-email--verify-email-invalid--light.png differ diff --git a/frontend/__snapshots__/scenes-other-verify-email--verify-email-pending--dark.png b/frontend/__snapshots__/scenes-other-verify-email--verify-email-pending--dark.png index fa760226c666b..0d23b6e09b819 100644 Binary files a/frontend/__snapshots__/scenes-other-verify-email--verify-email-pending--dark.png and b/frontend/__snapshots__/scenes-other-verify-email--verify-email-pending--dark.png differ diff --git a/frontend/__snapshots__/scenes-other-verify-email--verify-email-pending--light.png b/frontend/__snapshots__/scenes-other-verify-email--verify-email-pending--light.png index fe0bff82ad081..a867c9f022e0a 100644 Binary files a/frontend/__snapshots__/scenes-other-verify-email--verify-email-pending--light.png and b/frontend/__snapshots__/scenes-other-verify-email--verify-email-pending--light.png differ diff --git a/frontend/__snapshots__/scenes-other-verify-email--verify-email-success--dark.png b/frontend/__snapshots__/scenes-other-verify-email--verify-email-success--dark.png index 2df3dabc00d20..2bf2567a3e6db 100644 Binary files a/frontend/__snapshots__/scenes-other-verify-email--verify-email-success--dark.png and b/frontend/__snapshots__/scenes-other-verify-email--verify-email-success--dark.png differ diff --git a/frontend/__snapshots__/scenes-other-verify-email--verify-email-success--light.png b/frontend/__snapshots__/scenes-other-verify-email--verify-email-success--light.png index 3cfa63c456086..f76014d8ffa51 100644 Binary files a/frontend/__snapshots__/scenes-other-verify-email--verify-email-success--light.png and b/frontend/__snapshots__/scenes-other-verify-email--verify-email-success--light.png differ diff --git a/frontend/src/layout/navigation-3000/sidepanel/panels/activation/activationLogic.ts b/frontend/src/layout/navigation-3000/sidepanel/panels/activation/activationLogic.ts index e8afd00aea1ed..b0be99d68f438 100644 --- a/frontend/src/layout/navigation-3000/sidepanel/panels/activation/activationLogic.ts +++ b/frontend/src/layout/navigation-3000/sidepanel/panels/activation/activationLogic.ts @@ -2,6 +2,7 @@ import { actions, connect, events, kea, listeners, path, reducers, selectors } f import { loaders } from 'kea-loaders' import { router } from 'kea-router' import api from 'lib/api' +import { reverseProxyCheckerLogic } from 'lib/components/ReverseProxyChecker/reverseProxyCheckerLogic' import { permanentlyMount } from 'lib/utils/kea-logic-builders' import posthog from 'posthog-js' import { membersLogic } from 'scenes/organization/membersLogic' @@ -58,6 +59,8 @@ export const activationLogic = kea([ ['insights'], dashboardsModel, ['rawDashboards'], + reverseProxyCheckerLogic, + ['hasReverseProxy'], ], actions: [ inviteLogic, @@ -193,6 +196,7 @@ export const activationLogic = kea([ s.customEventsCount, s.installedPlugins, s.currentTeamSkippedTasks, + s.hasReverseProxy, ], ( currentTeam, @@ -202,7 +206,8 @@ export const activationLogic = kea([ dashboards, customEventsCount, installedPlugins, - skippedTasks + skippedTasks, + hasReverseProxy ) => { const tasks: ActivationTaskType[] = [] for (const task of Object.values(ActivationTasks)) { @@ -286,7 +291,7 @@ export const activationLogic = kea([ id: ActivationTasks.SetUpReverseProxy, name: 'Set up a reverse proxy', description: 'Send your events from your own domain to avoid tracking blockers', - completed: false, + completed: hasReverseProxy || false, canSkip: true, skipped: skippedTasks.includes(ActivationTasks.SetUpReverseProxy), url: 'https://posthog.com/docs/advanced/proxy', diff --git a/frontend/src/lib/components/BridgePage/BridgePage.scss b/frontend/src/lib/components/BridgePage/BridgePage.scss index a95676cd869fd..cbaa3daa9631c 100644 --- a/frontend/src/lib/components/BridgePage/BridgePage.scss +++ b/frontend/src/lib/components/BridgePage/BridgePage.scss @@ -23,7 +23,13 @@ } .BridgePage__content-wrapper { - max-width: 100%; + width: 100%; + max-width: 380px; + + @include screen($md) { + width: auto; + max-width: 100%; + } } .BridgePage__left-wrapper { diff --git a/frontend/src/lib/components/CustomerLogo.tsx b/frontend/src/lib/components/CustomerLogo.tsx new file mode 100644 index 0000000000000..659f739d1d7dc --- /dev/null +++ b/frontend/src/lib/components/CustomerLogo.tsx @@ -0,0 +1,23 @@ +interface CustomerProps { + image: string + alt: string + className?: string +} + +interface LogoProps { + src: string + alt: string + className?: string +} + +const Logo = ({ src, alt, className = '' }: LogoProps): JSX.Element => ( + {alt} +) + +export const CustomerLogo = ({ image, alt, className = '' }: CustomerProps): JSX.Element => { + return ( +
  • + +
  • + ) +} diff --git a/frontend/src/lib/components/MemberSelect.tsx b/frontend/src/lib/components/MemberSelect.tsx index 09e13a2626eae..c1a919dbf6b68 100644 --- a/frontend/src/lib/components/MemberSelect.tsx +++ b/frontend/src/lib/components/MemberSelect.tsx @@ -44,7 +44,7 @@ export function MemberSelect({ defaultLabel = 'Any user', value, onChange }: Mem setShowPopover(visible)} overlay={ diff --git a/frontend/src/lib/components/ReverseProxyChecker/reverseProxyCheckerLogic.test.ts b/frontend/src/lib/components/ReverseProxyChecker/reverseProxyCheckerLogic.test.ts new file mode 100644 index 0000000000000..5ea635b7e4f90 --- /dev/null +++ b/frontend/src/lib/components/ReverseProxyChecker/reverseProxyCheckerLogic.test.ts @@ -0,0 +1,63 @@ +import { expectLogic } from 'kea-test-utils' + +import { useMocks } from '~/mocks/jest' +import { initKeaTests } from '~/test/init' + +import { reverseProxyCheckerLogic } from './reverseProxyCheckerLogic' + +const hasReverseProxyValues = [['https://proxy.example.com'], [null]] +const doesNotHaveReverseProxyValues = [[null], [null]] + +const useMockedValues = (results: (string | null)[][]): void => { + useMocks({ + post: { + '/api/projects/:team/query': () => [ + 200, + { + results, + }, + ], + }, + }) +} + +describe('reverseProxyCheckerLogic', () => { + let logic: ReturnType + + beforeEach(() => { + initKeaTests() + localStorage.clear() + logic = reverseProxyCheckerLogic() + }) + + afterEach(() => { + logic.unmount() + }) + + it('should not have a reverse proxy set - when no data', async () => { + useMockedValues([]) + + logic.mount() + await expectLogic(logic).toFinishAllListeners().toMatchValues({ + hasReverseProxy: false, + }) + }) + + it('should not have a reverse proxy set - when data with no lib_custom_api_host values', async () => { + useMockedValues(doesNotHaveReverseProxyValues) + + logic.mount() + await expectLogic(logic).toFinishAllListeners().toMatchValues({ + hasReverseProxy: false, + }) + }) + + it('should have a reverse proxy set', async () => { + useMockedValues(hasReverseProxyValues) + + logic.mount() + await expectLogic(logic).toFinishAllListeners().toMatchValues({ + hasReverseProxy: true, + }) + }) +}) diff --git a/frontend/src/lib/components/ReverseProxyChecker/reverseProxyCheckerLogic.ts b/frontend/src/lib/components/ReverseProxyChecker/reverseProxyCheckerLogic.ts new file mode 100644 index 0000000000000..6b945e5c94c48 --- /dev/null +++ b/frontend/src/lib/components/ReverseProxyChecker/reverseProxyCheckerLogic.ts @@ -0,0 +1,49 @@ +import { afterMount, kea, path, reducers } from 'kea' +import { loaders } from 'kea-loaders' +import api from 'lib/api' + +import { HogQLQuery, NodeKind } from '~/queries/schema' +import { hogql } from '~/queries/utils' + +import type { reverseProxyCheckerLogicType } from './reverseProxyCheckerLogicType' + +const CHECK_INTERVAL_MS = 1000 * 60 * 60 // 1 hour + +export const reverseProxyCheckerLogic = kea([ + path(['components', 'ReverseProxyChecker', 'reverseProxyCheckerLogic']), + loaders({ + hasReverseProxy: [ + false as boolean | null, + { + loadHasReverseProxy: async () => { + const query: HogQLQuery = { + kind: NodeKind.HogQLQuery, + query: hogql`SELECT properties.$lib_custom_api_host AS lib_custom_api_host + FROM events + WHERE timestamp >= now() - INTERVAL 1 DAY + AND timestamp <= now() + ORDER BY timestamp DESC + limit 10`, + } + + const res = await api.query(query) + return !!res.results?.find((x) => !!x[0]) + }, + }, + ], + }), + reducers({ + lastCheckedTimestamp: [ + 0, + { persist: true }, + { + loadHasReverseProxySuccess: () => Date.now(), + }, + ], + }), + afterMount(({ actions, values }) => { + if (values.lastCheckedTimestamp < Date.now() - CHECK_INTERVAL_MS) { + actions.loadHasReverseProxy() + } + }), +]) diff --git a/frontend/src/lib/components/SeriesGlyph.tsx b/frontend/src/lib/components/SeriesGlyph.tsx index ad4c25429f0da..d34a88de6de34 100644 --- a/frontend/src/lib/components/SeriesGlyph.tsx +++ b/frontend/src/lib/components/SeriesGlyph.tsx @@ -58,7 +58,7 @@ interface ExperimentVariantNumberProps { index: number } export function ExperimentVariantNumber({ className, index }: ExperimentVariantNumberProps): JSX.Element { - const color = getSeriesColor(index) + const color = getSeriesColor(index + 1) const { isDarkModeOn } = useValues(themeLogic) return ( diff --git a/frontend/src/lib/components/TaxonomicPopover/TaxonomicPopover.tsx b/frontend/src/lib/components/TaxonomicPopover/TaxonomicPopover.tsx index b0e6cfc85a508..3a7e7e33d0218 100644 --- a/frontend/src/lib/components/TaxonomicPopover/TaxonomicPopover.tsx +++ b/frontend/src/lib/components/TaxonomicPopover/TaxonomicPopover.tsx @@ -96,7 +96,7 @@ export function TaxonomicPopover } - sameWidth={false} + matchWidth={false} actionable visible={visible} onClickOutside={() => { diff --git a/frontend/src/lib/components/VersionChecker/versionCheckerLogic.ts b/frontend/src/lib/components/VersionChecker/versionCheckerLogic.ts index cc26d0eff45fc..ce53ba46d5db8 100644 --- a/frontend/src/lib/components/VersionChecker/versionCheckerLogic.ts +++ b/frontend/src/lib/components/VersionChecker/versionCheckerLogic.ts @@ -7,7 +7,7 @@ import { hogql } from '~/queries/utils' import type { versionCheckerLogicType } from './versionCheckerLogicType' -const CHECK_INTERVAL_MS = 1000 * 60 * 60 // 6 hour +const CHECK_INTERVAL_MS = 1000 * 60 * 60 * 6 // 6 hour export type SDKVersion = { version: string diff --git a/frontend/src/lib/constants.tsx b/frontend/src/lib/constants.tsx index 08dcb9b875e47..07ae25a326be8 100644 --- a/frontend/src/lib/constants.tsx +++ b/frontend/src/lib/constants.tsx @@ -99,6 +99,7 @@ export const INSTANTLY_AVAILABLE_PROPERTIES = [ 'distinct_id', ] export const MAX_EXPERIMENT_VARIANTS = 10 +export const EXPERIMENT_DEFAULT_DURATION = 14 // days // Event constants export const ACTION_TYPE = 'action_type' @@ -142,14 +143,13 @@ export const FEATURE_FLAGS = { DEBUG_REACT_RENDERS: 'debug-react-renders', // owner: @benjackwhite AUTO_ROLLBACK_FEATURE_FLAGS: 'auto-rollback-feature-flags', // owner: @EDsCODE ONBOARDING_V2_DEMO: 'onboarding-v2-demo', // owner: #team-growth - ROLE_BASED_ACCESS: 'role-based-access', // owner: #team-experiments, @liyiy QUERY_RUNNING_TIME: 'query_running_time', // owner: @mariusandra QUERY_TIMINGS: 'query-timings', // owner: @mariusandra QUERY_ASYNC: 'query-async', // owner: @webjunkie POSTHOG_3000_NAV: 'posthog-3000-nav', // owner: @Twixes HEDGEHOG_MODE: 'hedgehog-mode', // owner: @benjackwhite HEDGEHOG_MODE_DEBUG: 'hedgehog-mode-debug', // owner: @benjackwhite - GENERIC_SIGNUP_BENEFITS: 'generic-signup-benefits', // experiment, owner: @raquelmsmith + SIGNUP_BENEFITS: 'signup-benefits', // experiment, owner: @zlwaterfield WEB_ANALYTICS: 'web-analytics', // owner @robbie-c #team-web-analytics WEB_ANALYTICS_SAMPLING: 'web-analytics-sampling', // owner @robbie-c #team-web-analytics HIGH_FREQUENCY_BATCH_EXPORTS: 'high-frequency-batch-exports', // owner: @tomasfarias @@ -174,25 +174,19 @@ export const FEATURE_FLAGS = { BI_VIZ: 'bi_viz', // owner: @Gilbert09 HOGQL_AUTOCOMPLETE: 'hogql-autocomplete', // owner: @Gilbert09 WEBHOOKS_DENYLIST: 'webhooks-denylist', // owner: #team-pipeline - SURVEYS_RESULTS_VISUALIZATIONS: 'surveys-results-visualizations', // owner: @jurajmajerik - SURVEYS_PAYGATES: 'surveys-paygates', PERSONS_HOGQL_QUERY: 'persons-hogql-query', // owner: @mariusandra PIPELINE_UI: 'pipeline-ui', // owner: #team-pipeline SESSION_RECORDING_SAMPLING: 'session-recording-sampling', // owner: #team-replay PERSON_FEED_CANVAS: 'person-feed-canvas', // owner: #project-canvas - MULTI_PROJECT_FEATURE_FLAGS: 'multi-project-feature-flags', // owner: @jurajmajerik #team-feature-success FEATURE_FLAG_COHORT_CREATION: 'feature-flag-cohort-creation', // owner: @neilkakkar #team-feature-success INSIGHT_HORIZONTAL_CONTROLS: 'insight-horizontal-controls', // owner: @benjackwhite SURVEYS_WIDGETS: 'surveys-widgets', // owner: @liyiy - SCHEDULED_CHANGES_FEATURE_FLAGS: 'scheduled-changes-feature-flags', // owner: @jurajmajerik #team-feature-success - SESSION_REPLAY_MOBILE: 'session-replay-mobile', // owner: #team-replay INVITE_TEAM_MEMBER_ONBOARDING: 'invite-team-member-onboarding', // owner: @biancayang YEAR_IN_HOG: 'year-in-hog', // owner: #team-replay SESSION_REPLAY_EXPORT_MOBILE_DATA: 'session-replay-export-mobile-data', // owner: #team-replay DISCUSSIONS: 'discussions', // owner: #team-replay REDIRECT_INSIGHT_CREATION_PRODUCT_ANALYTICS_ONBOARDING: 'redirect-insight-creation-product-analytics-onboarding', // owner: @biancayang SIDEPANEL_STATUS: 'sidepanel-status', // owner: @benjackwhite - NEW_FEATURE_FLAG_OPERATORS: 'new-feature-flag-operators', // owner: @neilkakkar AI_SESSION_SUMMARY: 'ai-session-summary', // owner: #team-replay AI_SESSION_PERMISSIONS: 'ai-session-permissions', // owner: #team-replay PRODUCT_INTRO_PAGES: 'product-intro-pages', // owner: @raquelmsmith diff --git a/frontend/src/lib/customers/airbus.svg b/frontend/src/lib/customers/airbus.svg new file mode 100644 index 0000000000000..ff18cae1c8c0f --- /dev/null +++ b/frontend/src/lib/customers/airbus.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/frontend/src/lib/customers/hasura.svg b/frontend/src/lib/customers/hasura.svg new file mode 100644 index 0000000000000..1eb0373ecf1f4 --- /dev/null +++ b/frontend/src/lib/customers/hasura.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/frontend/src/lib/customers/staples.svg b/frontend/src/lib/customers/staples.svg new file mode 100644 index 0000000000000..0e1ff76715798 --- /dev/null +++ b/frontend/src/lib/customers/staples.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/frontend/src/lib/customers/y-combinator.svg b/frontend/src/lib/customers/y-combinator.svg new file mode 100644 index 0000000000000..1d19c5ff15d4a --- /dev/null +++ b/frontend/src/lib/customers/y-combinator.svg @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/frontend/src/lib/lemon-ui/LemonButton/LemonButton.stories.tsx b/frontend/src/lib/lemon-ui/LemonButton/LemonButton.stories.tsx index 6664aac5c5fc6..245754ad1a090 100644 --- a/frontend/src/lib/lemon-ui/LemonButton/LemonButton.stories.tsx +++ b/frontend/src/lib/lemon-ui/LemonButton/LemonButton.stories.tsx @@ -386,7 +386,7 @@ WithDropdownToTheBottom.args = { ), placement: 'bottom', - sameWidth: true, + matchWidth: true, }, } @@ -404,7 +404,7 @@ WithVeryLongPopoverToTheBottom.args = { ), placement: 'bottom', - sameWidth: true, + matchWidth: true, }, } diff --git a/frontend/src/lib/lemon-ui/LemonInputSelect/LemonInputSelect.tsx b/frontend/src/lib/lemon-ui/LemonInputSelect/LemonInputSelect.tsx index 9e5240a275a68..86869bbf9bb4d 100644 --- a/frontend/src/lib/lemon-ui/LemonInputSelect/LemonInputSelect.tsx +++ b/frontend/src/lib/lemon-ui/LemonInputSelect/LemonInputSelect.tsx @@ -244,7 +244,7 @@ export function LemonInputSelect({ return ( > = forwardRef(function LemonProgress( - { size = 'medium', percent, strokeColor = 'var(--brand-blue)', children, className }, + { + size = 'medium', + percent, + bgColor = 'var(--bg-3000)', + strokeColor = 'var(--brand-blue)', + children, + className, + }, ref ): JSX.Element { const width = isNaN(percent) ? 0 : Math.max(Math.min(percent, 100), 0) @@ -20,10 +28,12 @@ export const LemonProgress: React.FunctionComponent ({ (function P className, padded = true, middleware, - sameWidth = false, + matchWidth = false, maxContentWidth = false, additionalRefs = [], closeParentPopoverOnClickInside = false, @@ -131,7 +131,10 @@ export const Popover = React.forwardRef(function P apply({ availableWidth, availableHeight, rects, elements: { floating } }) { floating.style.maxHeight = `${availableHeight}px` floating.style.maxWidth = `${availableWidth}px` - floating.style.width = sameWidth ? `${rects.reference.width}px` : 'initial' + floating.style.width = 'initial' + if (matchWidth) { + floating.style.minWidth = `${rects.reference.width}px` + } }, }), ...(showArrow ? [arrow({ element: arrowRef, padding: 8 })] : []), diff --git a/frontend/src/queries/nodes/DataTable/SavedQueries.tsx b/frontend/src/queries/nodes/DataTable/SavedQueries.tsx index 5f10bef86273c..bec890d838bee 100644 --- a/frontend/src/queries/nodes/DataTable/SavedQueries.tsx +++ b/frontend/src/queries/nodes/DataTable/SavedQueries.tsx @@ -34,7 +34,7 @@ export function SavedQueries({ query, setQuery }: SavedQueriesProps): JSX.Elemen return ( ( +
    {footerHighlights[preflight?.cloud ? 'cloud' : 'selfHosted'].map((val, idx) => ( - +

    {val} - +

    ))} - +
    } sideLogo leftContainerContent={} @@ -50,51 +52,82 @@ export function SignupContainer(): JSX.Element | null { ) : null } +type ProductBenefit = { + benefit: string + description: string | ReactNode +} + +const getProductBenefits = (featureFlags: FeatureFlagsSet): ProductBenefit[] => { + const signupBenefitsFlag = featureFlags[FEATURE_FLAGS.SIGNUP_BENEFITS] + switch (signupBenefitsFlag) { + case 'generic-language': + return [ + { + benefit: 'Free usage every month - even on paid plans', + description: '1M free events, 5K free session recordings, and more. Every month. Forever.', + }, + { + benefit: 'Start collecting data immediately', + description: 'Integrate with developer-friendly APIs or a low-code web snippet.', + }, + { + benefit: 'Join industry leaders that run on PostHog', + description: + 'Airbus, Hasura, Y Combinator, Staples, and thousands more trust PostHog as their Product OS.', + }, + ] + case 'logos': + return [ + { + benefit: '1M events free every month', + description: 'Product analytics, feature flags, experiments, and more.', + }, + { + benefit: 'Start collecting events immediately', + description: 'Integrate with developer-friendly APIs or use our easy autocapture script.', + }, + { + benefit: 'Join industry leaders that run on PostHog', + description: ( +
    + {[airbus, hasura, yCombinator, staples].map((company, i) => ( + + + + ))} +
    + ), + }, + ] + default: + return [ + { + benefit: 'Free for 1M events every month', + description: 'Product analytics, feature flags, experiments, and more.', + }, + { + benefit: 'Start collecting events immediately', + description: 'Integrate with developer-friendly APIs or use our easy autocapture script.', + }, + { + benefit: 'Join industry leaders that run on PostHog', + description: + 'Airbus, Hasura, Y Combinator, Staples, and thousands more trust PostHog as their Product OS.', + }, + ] + } +} + export function SignupLeftContainer(): JSX.Element { const { preflight } = useValues(preflightLogic) const { featureFlags } = useValues(featureFlagLogic) - const showGenericSignupBenefits: boolean = featureFlags[FEATURE_FLAGS.GENERIC_SIGNUP_BENEFITS] === 'test' - const getRegionUrl = (region: string): string => { const { pathname, search, hash } = router.values.currentLocation return `https://${CLOUD_HOSTNAMES[region]}${pathname}${search}${hash}` } - const productBenefits: { - benefit: string - description: string - }[] = showGenericSignupBenefits - ? [ - { - benefit: 'Free usage every month - even on paid plans', - description: '1M free events, 5K free session recordings, and more. Every month. Forever.', - }, - { - benefit: 'Start collecting data immediately', - description: 'Integrate with developer-friendly APIs or low-code web snippet.', - }, - { - benefit: 'Join industry leaders that run on PostHog', - description: - 'ClickHouse, Airbus, Hasura, Y Combinator, and thousands more trust PostHog as their Product OS.', - }, - ] - : [ - { - benefit: 'Free for 1M events every month', - description: 'Product analytics, feature flags, experiments, and more.', - }, - { - benefit: 'Start collecting events immediately', - description: 'Integrate with developer-friendly APIs or use our easy autocapture script.', - }, - { - benefit: 'Join industry leaders that run on PostHog', - description: - 'ClickHouse, Airbus, Hasura, Y Combinator, and thousands more trust PostHog as their Product OS.', - }, - ] + const productBenefits = getProductBenefits(featureFlags) return ( <> diff --git a/frontend/src/scenes/batch_exports/BatchExportEditForm.tsx b/frontend/src/scenes/batch_exports/BatchExportEditForm.tsx index b015659cfaef1..a2a9f9968f82c 100644 --- a/frontend/src/scenes/batch_exports/BatchExportEditForm.tsx +++ b/frontend/src/scenes/batch_exports/BatchExportEditForm.tsx @@ -242,6 +242,15 @@ export function BatchExportsEditFields({ ]} /> + + + +
    diff --git a/frontend/src/scenes/batch_exports/BatchExports.stories.tsx b/frontend/src/scenes/batch_exports/BatchExports.stories.tsx index 0dd616c44982a..dbd6779cb208d 100644 --- a/frontend/src/scenes/batch_exports/BatchExports.stories.tsx +++ b/frontend/src/scenes/batch_exports/BatchExports.stories.tsx @@ -42,6 +42,7 @@ export default { include_events: [], encryption: null, kms_key_id: null, + file_format: 'JSONLines', }, }, start_at: null, diff --git a/frontend/src/scenes/batch_exports/batchExportEditLogic.ts b/frontend/src/scenes/batch_exports/batchExportEditLogic.ts index bc86d1618fe4f..30c123256d81a 100644 --- a/frontend/src/scenes/batch_exports/batchExportEditLogic.ts +++ b/frontend/src/scenes/batch_exports/batchExportEditLogic.ts @@ -90,6 +90,7 @@ export const batchExportFormFields = ( aws_secret_access_key: isNew ? (!config.aws_secret_access_key ? 'This field is required' : '') : '', compression: '', encryption: '', + file_format: isNew ? (!config.file_format ? 'This field is required' : '') : '', kms_key_id: !config.kms_key_id && config.encryption == 'aws:kms' ? 'This field is required' : '', exclude_events: '', include_events: '', diff --git a/frontend/src/scenes/debug/HogQLDebug.tsx b/frontend/src/scenes/debug/HogQLDebug.tsx index 5defca51309b1..e2c4a08341d13 100644 --- a/frontend/src/scenes/debug/HogQLDebug.tsx +++ b/frontend/src/scenes/debug/HogQLDebug.tsx @@ -71,6 +71,7 @@ export function HogQLDebug({ query, setQuery, queryKey }: HogQLDebugProps): JSX. { value: 'v1_enabled', label: 'V1 Enabled' }, { value: 'v1_mixed', label: 'V1 Mixed' }, { value: 'v2_enabled', label: 'V2 Enabled' }, + { value: 'v3_enabled', label: 'V3 Enabled (Join)' }, ]} onChange={(value) => setQuery({ diff --git a/frontend/src/scenes/experiments/Experiment.scss b/frontend/src/scenes/experiments/Experiment.scss index e56c2b26b11d5..8d0d2c667d705 100644 --- a/frontend/src/scenes/experiments/Experiment.scss +++ b/frontend/src/scenes/experiments/Experiment.scss @@ -156,17 +156,6 @@ } } -.preview-conversion-goal-num { - flex-shrink: 0; - width: 24px; - height: 24px; - margin-right: 0.5rem; - font-weight: 700; - color: var(--primary-alt); - text-align: center; - background-color: var(--side); -} - .experiment-preview-row { padding-bottom: 1rem; margin-bottom: 1rem; @@ -193,3 +182,9 @@ text-transform: uppercase; letter-spacing: 0.5px; } + +.experiment-view { + .InsightViz .LemonTable__cell--sticky::before { + background: var(--bg-table); + } +} diff --git a/frontend/src/scenes/experiments/Experiment.tsx b/frontend/src/scenes/experiments/Experiment.tsx index 40dc9632a6180..4a01bcb120b33 100644 --- a/frontend/src/scenes/experiments/Experiment.tsx +++ b/frontend/src/scenes/experiments/Experiment.tsx @@ -840,7 +840,13 @@ export function Experiment(): JSX.Element { ) } -const ResetButton = ({ experiment, onConfirm }: { experiment: ExperimentType; onConfirm: () => void }): JSX.Element => { +export const ResetButton = ({ + experiment, + onConfirm, +}: { + experiment: ExperimentType + onConfirm: () => void +}): JSX.Element => { const onClickReset = (): void => { LemonDialog.open({ title: 'Reset this experiment?', diff --git a/frontend/src/scenes/experiments/ExperimentCodeSnippets.tsx b/frontend/src/scenes/experiments/ExperimentCodeSnippets.tsx index b5d0f76e29e16..f4513affb6556 100644 --- a/frontend/src/scenes/experiments/ExperimentCodeSnippets.tsx +++ b/frontend/src/scenes/experiments/ExperimentCodeSnippets.tsx @@ -40,7 +40,7 @@ if (experimentFlagValue === '${variant}' ) { export function JSSnippet({ flagKey, variant }: SnippetProps): JSX.Element { return ( - <> +
    {`if (posthog.getFeatureFlag('${flagKey}') === '${variant}') { // Do something differently for this user @@ -49,11 +49,13 @@ export function JSSnippet({ flagKey, variant }: SnippetProps): JSX.Element { // so if something goes wrong with flag evaluation, you don't break your app. }`} - Test that it works +
    + Test that it works +
    {`posthog.featureFlags.override({'${flagKey}': '${variant}'})`} - +
    ) } diff --git a/frontend/src/scenes/experiments/ExperimentForm.tsx b/frontend/src/scenes/experiments/ExperimentForm.tsx new file mode 100644 index 0000000000000..5a95c20edd972 --- /dev/null +++ b/frontend/src/scenes/experiments/ExperimentForm.tsx @@ -0,0 +1,306 @@ +import './Experiment.scss' + +import { IconPlusSmall, IconTrash } from '@posthog/icons' +import { LemonDivider, LemonInput, LemonTextArea, Tooltip } from '@posthog/lemon-ui' +import { BindLogic, useActions, useValues } from 'kea' +import { Form, Group } from 'kea-forms' +import { ExperimentVariantNumber } from 'lib/components/SeriesGlyph' +import { MAX_EXPERIMENT_VARIANTS } from 'lib/constants' +import { IconChevronLeft } from 'lib/lemon-ui/icons' +import { LemonButton } from 'lib/lemon-ui/LemonButton' +import { LemonField } from 'lib/lemon-ui/LemonField' +import { LemonRadio } from 'lib/lemon-ui/LemonRadio' +import { capitalizeFirstLetter } from 'lib/utils' +import { useEffect } from 'react' +import { insightDataLogic } from 'scenes/insights/insightDataLogic' +import { insightLogic } from 'scenes/insights/insightLogic' + +import { Query } from '~/queries/Query/Query' +import { InsightType } from '~/types' + +import { EXPERIMENT_INSIGHT_ID } from './constants' +import { experimentLogic } from './experimentLogic' +import { ExperimentInsightCreator } from './MetricSelector' + +const StepInfo = (): JSX.Element => { + const { experiment } = useValues(experimentLogic) + const { addExperimentGroup, removeExperimentGroup, moveToNextFormStep } = useActions(experimentLogic) + + return ( +
    +
    +
    + + + + + + + + + +
    +
    +

    Variants

    +
    Add up to 9 variants to test against your control.
    + +
    +
    +

    Control

    +
    + + + + + + +
    +
    + Included automatically, cannot be edited or removed +
    +
    +
    +

    Test(s)

    + {experiment.parameters.feature_flag_variants?.map((_, index) => { + if (index === 0) { + return null + } + + return ( + +
    1 && 'mt-2'}`} + > + + + + +
    + {index !== 1 && ( + + } + onClick={() => removeExperimentGroup(index)} + /> + + )} +
    +
    +
    + ) + })} +
    + Alphanumeric, hyphens and underscores only +
    + {(experiment.parameters.feature_flag_variants.length ?? 0) < MAX_EXPERIMENT_VARIANTS && ( + addExperimentGroup()} + icon={} + data-attr="add-test-variant" + > + Add test variant + + )} +
    +
    +
    +
    + moveToNextFormStep()}> + Continue + +
    + ) +} + +const StepGoal = (): JSX.Element => { + const { experiment, exposureAndSampleSize, experimentInsightType, groupTypes, aggregationLabel } = + useValues(experimentLogic) + const { setExperiment, setNewExperimentInsight, createExperiment } = useActions(experimentLogic) + + // insightLogic + const logic = insightLogic({ dashboardItemId: EXPERIMENT_INSIGHT_ID }) + const { insightProps } = useValues(logic) + + // insightDataLogic + const { query } = useValues(insightDataLogic(insightProps)) + + return ( +
    +
    + {groupTypes.size > 0 && ( +
    +

    Participant type

    +
    + This sets default aggregation type for all metrics and feature flags. You can change this at + any time by updating the metric or feature flag. +
    + + { + const groupTypeIndex = rawGroupTypeIndex !== -1 ? rawGroupTypeIndex : undefined + + setExperiment({ + parameters: { + ...experiment.parameters, + aggregation_group_type_index: groupTypeIndex ?? undefined, + }, + }) + setNewExperimentInsight() + }} + options={[ + { value: -1, label: 'Persons' }, + ...Array.from(groupTypes.values()).map((groupType) => ({ + value: groupType.group_type_index, + label: capitalizeFirstLetter(aggregationLabel(groupType.group_type_index).plural), + })), + ]} + /> +
    + )} +
    +

    Goal type

    + + { + val && + setNewExperimentInsight({ + insight: val, + properties: experiment?.filters?.properties, + }) + }} + options={[ + { + value: InsightType.FUNNELS, + label: ( +
    +
    Conversion funnel
    +
    + Track how many people complete a sequence of actions and/or events +
    +
    + ), + }, + { + value: InsightType.TRENDS, + label: ( +
    +
    Trend
    +
    + Track a cumulative total count of a specific event or action +
    +
    + ), + }, + ]} + /> +
    +
    +

    Goal criteria

    +
    + {experimentInsightType === InsightType.FUNNELS + ? "Create the funnel where you'd like to see an increased conversion rate." + : 'Create a trend goal to track change in a single metric.'} +
    + +
    + +
    +
    +
    +

    Goal preview

    +
    + + + +
    +
    +
    + { + const { exposure, sampleSize } = exposureAndSampleSize + createExperiment(true, exposure, sampleSize) + }} + > + Save as draft + +
    + ) +} + +export function ExperimentForm(): JSX.Element { + const { currentFormStep, props } = useValues(experimentLogic) + const { setCurrentFormStep } = useActions(experimentLogic) + + const stepComponents = { + 0: , + 1: , + } + const CurrentStepComponent = (currentFormStep && stepComponents[currentFormStep]) || + + useEffect(() => { + setCurrentFormStep(0) + }, []) + + return ( +
    + {currentFormStep > 0 && ( + } + type="secondary" + className="my-4" + onClick={() => { + setCurrentFormStep(currentFormStep - 1) + }} + > + Back + + )} +
    + {CurrentStepComponent} +
    +
    + ) +} diff --git a/frontend/src/scenes/experiments/ExperimentImplementationDetails.tsx b/frontend/src/scenes/experiments/ExperimentImplementationDetails.tsx index 8880b55f7eabc..8a836986ed37a 100644 --- a/frontend/src/scenes/experiments/ExperimentImplementationDetails.tsx +++ b/frontend/src/scenes/experiments/ExperimentImplementationDetails.tsx @@ -109,35 +109,49 @@ export function ExperimentImplementationDetails({ experiment }: ExperimentImplem } return ( -
    -
    Feature flag usage and implementation
    -
    -
    -
    - Variant group - ({ - value: variant.key, - label: variant.key, - }) - )} - /> +
    +

    Implementation

    +
    +
    +
    +
    + Variant group + ({ + value: variant.key, + label: variant.key, + }) + )} + /> +
    +
    + +
    - +
    + Implement your experiment in code +
    +
    + +
    + + + See the docs for more implementation information. +
    - Implement your experiment in code - - - - See the docs for more implementation information. -
    ) diff --git a/frontend/src/scenes/experiments/ExperimentNext.tsx b/frontend/src/scenes/experiments/ExperimentNext.tsx index e1891241ac816..01557833d7f80 100644 --- a/frontend/src/scenes/experiments/ExperimentNext.tsx +++ b/frontend/src/scenes/experiments/ExperimentNext.tsx @@ -1,330 +1,71 @@ import './Experiment.scss' -import { IconPlusSmall, IconTrash } from '@posthog/icons' -import { LemonDivider, LemonInput, LemonTextArea, Tooltip } from '@posthog/lemon-ui' -import { BindLogic, useActions, useValues } from 'kea' -import { Form, Group } from 'kea-forms' -import { ExperimentVariantNumber } from 'lib/components/SeriesGlyph' -import { MAX_EXPERIMENT_VARIANTS } from 'lib/constants' -import { IconChevronRight } from 'lib/lemon-ui/icons' -import { LemonButton } from 'lib/lemon-ui/LemonButton' -import { LemonField } from 'lib/lemon-ui/LemonField' -import { LemonRadio } from 'lib/lemon-ui/LemonRadio' -import { capitalizeFirstLetter } from 'lib/utils' -import React from 'react' -import { insightDataLogic } from 'scenes/insights/insightDataLogic' -import { insightLogic } from 'scenes/insights/insightLogic' +import { useActions, useValues } from 'kea' -import { Query } from '~/queries/Query/Query' -import { InsightType } from '~/types' - -import { EXPERIMENT_INSIGHT_ID } from './constants' +import { ExperimentForm } from './ExperimentForm' +import { ExperimentImplementationDetails } from './ExperimentImplementationDetails' import { experimentLogic } from './experimentLogic' -import { ExperimentInsightCreator } from './MetricSelector' - -const Header = (): JSX.Element => { - const { currentFormStep } = useValues(experimentLogic) - - const steps = ['Info', 'Goal'] - - return ( -
    -
    -

    New experiment

    -
    Measure the impact of changes against the baseline.
    -
    -
    -
    - {steps.map((step, index) => ( - - {index > 0 && } -
    - {step} -
    -
    - ))} -
    -
    -
    - ) -} - -const StepInfo = (): JSX.Element => { - const { experiment } = useValues(experimentLogic) - const { addExperimentGroup, removeExperimentGroup, moveToNextFormStep } = useActions(experimentLogic) - - return ( -
    -
    -
    - - - - - - - - - -
    -
    -

    Variants

    -
    Add up to 9 variants to test against your control.
    - -
    -
    -

    Control

    -
    - - - - - - -
    -
    - Included automatically, cannot be edited or removed -
    -
    -
    -

    Test(s)

    - {experiment.parameters.feature_flag_variants?.map((_, index) => { - if (index === 0) { - return null - } - - return ( - -
    1 && 'mt-2'}`} - > - - - - -
    - {index !== 1 && ( - - } - onClick={() => removeExperimentGroup(index)} - /> - - )} -
    -
    -
    - ) - })} -
    - Alphanumeric, hyphens and underscores only -
    - {(experiment.parameters.feature_flag_variants.length ?? 0) < MAX_EXPERIMENT_VARIANTS && ( - addExperimentGroup()} - icon={} - data-attr="add-test-variant" - > - Add test variant - - )} -
    -
    -
    -
    -
    - - moveToNextFormStep()}> - Continue - -
    -
    - ) -} - -const StepGoal = (): JSX.Element => { - const { experiment, exposureAndSampleSize, experimentInsightType, groupTypes, aggregationLabel } = +import { ExperimentLoader, ExperimentLoadingAnimation, PageHeaderCustom } from './ExperimentView/components' +import { DistributionTable } from './ExperimentView/DistributionTable' +import { ExperimentExposureModal, ExperimentGoalModal, Goal } from './ExperimentView/Goal' +import { Info } from './ExperimentView/Info' +import { NoResultsEmptyState } from './ExperimentView/NoResultsEmptyState' +import { Overview } from './ExperimentView/Overview' +import { ProgressBar } from './ExperimentView/ProgressBar' +import { ReleaseConditionsTable } from './ExperimentView/ReleaseConditionsTable' +import { Results } from './ExperimentView/Results' +import { SecondaryMetricsTable } from './ExperimentView/SecondaryMetricsTable' + +export function ExperimentView(): JSX.Element { + const { experiment, experimentLoading, experimentResultsLoading, experimentId, experimentResults } = useValues(experimentLogic) - const { setExperiment, setNewExperimentInsight, createExperiment } = useActions(experimentLogic) - // insightLogic - const logic = insightLogic({ dashboardItemId: EXPERIMENT_INSIGHT_ID }) - const { insightProps } = useValues(logic) - - // insightDataLogic - const { query } = useValues(insightDataLogic(insightProps)) + const { updateExperimentSecondaryMetrics } = useActions(experimentLogic) return ( -
    -
    -
    -

    Participant type

    -
    - This sets default aggregation type for all metrics and feature flags. You can change this at any - time by updating the metric or feature flag. -
    - - { - const groupTypeIndex = rawGroupTypeIndex !== -1 ? rawGroupTypeIndex : undefined - - setExperiment({ - parameters: { - ...experiment.parameters, - aggregation_group_type_index: groupTypeIndex ?? undefined, - }, - }) - setNewExperimentInsight() - }} - options={[ - { value: -1, label: 'Persons' }, - ...Array.from(groupTypes.values()).map((groupType) => ({ - value: groupType.group_type_index, - label: capitalizeFirstLetter(aggregationLabel(groupType.group_type_index).plural), - })), - ]} - /> -
    -
    -

    Goal type

    - - { - val && - setNewExperimentInsight({ - insight: val, - properties: experiment?.filters?.properties, - }) - }} - options={[ - { - value: InsightType.FUNNELS, - label: ( -
    -
    Conversion funnel
    -
    - Track how many people complete a sequence of actions and/or events -
    -
    - ), - }, - { - value: InsightType.TRENDS, - label: ( -
    -
    Trend
    -
    - Track a cumulative total count of a specific event or action -
    -
    - ), - }, - ]} - /> -
    -
    -

    Goal criteria

    -
    - {experimentInsightType === InsightType.FUNNELS - ? "Create the funnel where you'd like to see an increased conversion rate." - : 'Create a trend goal to track change in a single metric.'} -
    - -
    - -
    -
    -
    -

    Goal preview

    -
    - - - -
    -
    -
    -
    - { - const { exposure, sampleSize } = exposureAndSampleSize - createExperiment(true, exposure, sampleSize) - }} - > - Create experiment - + <> + +
    + {experimentLoading ? ( + + ) : ( + <> + + {experimentResultsLoading ? ( + + ) : experimentResults && experimentResults.insight ? ( + <> + + + + + updateExperimentSecondaryMetrics(metrics)} + initialMetrics={experiment.secondary_metrics} + defaultAggregationType={experiment.parameters?.aggregation_group_type_index} + /> + + + + ) : ( + <> + + + {experiment.start_date && } + + )} + + + + )}
    -
    + ) } export function ExperimentNext(): JSX.Element { - const { experimentId, editingExistingExperiment, currentFormStep, props } = useValues(experimentLogic) + const { experimentId, editingExistingExperiment } = useValues(experimentLogic) - const stepComponents = { - 0: , - 1: , - } - const CurrentStepComponent = (currentFormStep && stepComponents[currentFormStep]) || - - return ( - <> - {experimentId === 'new' || editingExistingExperiment ? ( -
    -
    -
    - {CurrentStepComponent} -
    -
    - ) : ( -

    {`Experiment ${experimentId} draft/results`}

    - )} - - ) + return experimentId === 'new' || editingExistingExperiment ? : } diff --git a/frontend/src/scenes/experiments/ExperimentPreview.tsx b/frontend/src/scenes/experiments/ExperimentPreview.tsx index 3924eb67e5ace..6536294e95a58 100644 --- a/frontend/src/scenes/experiments/ExperimentPreview.tsx +++ b/frontend/src/scenes/experiments/ExperimentPreview.tsx @@ -444,7 +444,7 @@ export function MetricDisplay({ filters }: { filters?: FilterType }): JSX.Elemen .map((event: ActionFilterType, idx: number) => (
    -
    +
    {experimentInsightType === InsightType.FUNNELS ? (event.order || 0) + 1 : idx + 1}
    diff --git a/frontend/src/scenes/experiments/ExperimentResult.tsx b/frontend/src/scenes/experiments/ExperimentResult.tsx index 67633f8dd6f42..c978a5cd8c767 100644 --- a/frontend/src/scenes/experiments/ExperimentResult.tsx +++ b/frontend/src/scenes/experiments/ExperimentResult.tsx @@ -3,7 +3,6 @@ import './Experiment.scss' import { IconInfo } from '@posthog/icons' import { LemonTable, Tooltip } from '@posthog/lemon-ui' import { useValues } from 'kea' -import { getSeriesColor } from 'lib/colors' import { EntityFilterInfo } from 'lib/components/EntityFilterInfo' import { FunnelLayout } from 'lib/constants' import { LemonProgress } from 'lib/lemon-ui/LemonProgress' @@ -16,10 +15,8 @@ import { ChartDisplayType, FilterType, FunnelVizType, InsightShortId, InsightTyp import { LoadingState } from './Experiment' import { experimentLogic } from './experimentLogic' +import { getExperimentInsightColour } from './utils' -export function getExperimentInsightColour(variantIndex: number | null): string { - return variantIndex !== null ? getSeriesColor(variantIndex) : 'var(--muted-3000)' -} interface ExperimentResultProps { secondaryMetricId?: number } diff --git a/frontend/src/scenes/experiments/ExperimentView/DistributionTable.tsx b/frontend/src/scenes/experiments/ExperimentView/DistributionTable.tsx new file mode 100644 index 0000000000000..43b3c50ed614b --- /dev/null +++ b/frontend/src/scenes/experiments/ExperimentView/DistributionTable.tsx @@ -0,0 +1,66 @@ +import '../Experiment.scss' + +import { LemonTable, LemonTableColumns, Link } from '@posthog/lemon-ui' +import { useValues } from 'kea' +import { getSeriesColor } from 'lib/colors' +import { capitalizeFirstLetter } from 'lib/utils' +import { urls } from 'scenes/urls' + +import { MultivariateFlagVariant } from '~/types' + +import { experimentLogic } from '../experimentLogic' + +export function DistributionTable(): JSX.Element { + const { experiment } = useValues(experimentLogic) + + const columns: LemonTableColumns = [ + { + className: 'w-1/3', + key: 'key', + title: 'Variant', + render: function Key(_, item, index): JSX.Element { + return ( +
    +
    + {capitalizeFirstLetter(item.key)} +
    + ) + }, + }, + { + className: 'w-1/3', + key: 'rollout_percentage', + title: 'Rollout', + render: function Key(_, item): JSX.Element { + return
    {`${item.rollout_percentage}%`}
    + }, + }, + ] + + return ( +
    +
    +
    +

    Distribution

    +
    + +
    +
    + + Manage distribution + +
    +
    +
    + +
    + ) +} diff --git a/frontend/src/scenes/experiments/ExperimentView/Goal.tsx b/frontend/src/scenes/experiments/ExperimentView/Goal.tsx new file mode 100644 index 0000000000000..d1406633d4e86 --- /dev/null +++ b/frontend/src/scenes/experiments/ExperimentView/Goal.tsx @@ -0,0 +1,238 @@ +import '../Experiment.scss' + +import { IconInfo } from '@posthog/icons' +import { LemonButton, LemonDivider, LemonModal, Tooltip } from '@posthog/lemon-ui' +import { useActions, useValues } from 'kea' +import { Field, Form } from 'kea-forms' +import { InsightLabel } from 'lib/components/InsightLabel' +import { PropertyFilterButton } from 'lib/components/PropertyFilters/components/PropertyFilterButton' + +import { ActionFilter as ActionFilterType, AnyPropertyFilter, Experiment, FilterType, InsightType } from '~/types' + +import { EXPERIMENT_EXPOSURE_INSIGHT_ID, EXPERIMENT_INSIGHT_ID } from '../constants' +import { experimentLogic } from '../experimentLogic' +import { MetricSelector } from '../MetricSelector' + +export function MetricDisplay({ filters }: { filters?: FilterType }): JSX.Element { + const experimentInsightType = filters?.insight || InsightType.TRENDS + + return ( + <> + {([...(filters?.events || []), ...(filters?.actions || [])] as ActionFilterType[]) + .sort((a, b) => (a.order || 0) - (b.order || 0)) + .map((event: ActionFilterType, idx: number) => ( +
    +
    +
    + {experimentInsightType === InsightType.FUNNELS ? (event.order || 0) + 1 : idx + 1} +
    + + + +
    +
    + {event.properties?.map((prop: AnyPropertyFilter) => ( + + ))} +
    +
    + ))} + + ) +} + +export function ExposureMetric({ experimentId }: { experimentId: Experiment['id'] }): JSX.Element { + const { experiment } = useValues(experimentLogic({ experimentId })) + const { openExperimentExposureModal, updateExperimentExposure } = useActions(experimentLogic({ experimentId })) + + return ( + <> +
    + Exposure metric + + + +
    + {experiment.parameters?.custom_exposure_filter ? ( + + ) : ( + Default via $feature_flag_called events + )} +
    + + + Change exposure metric + + {experiment.parameters?.custom_exposure_filter && ( + updateExperimentExposure(null)} + > + Reset exposure + + )} + +
    + + ) +} + +export function ExperimentGoalModal({ experimentId }: { experimentId: Experiment['id'] }): JSX.Element { + const { experiment, isExperimentGoalModalOpen, experimentLoading } = useValues(experimentLogic({ experimentId })) + const { closeExperimentGoalModal, updateExperimentGoal, setNewExperimentInsight } = useActions( + experimentLogic({ experimentId }) + ) + + return ( + + + Cancel + + { + updateExperimentGoal(experiment.filters) + }} + type="primary" + loading={experimentLoading} + data-attr="create-annotation-submit" + > + Save + +
    + } + > +
    + + + +
    + + ) +} + +export function ExperimentExposureModal({ experimentId }: { experimentId: Experiment['id'] }): JSX.Element { + const { experiment, isExperimentExposureModalOpen, experimentLoading } = useValues( + experimentLogic({ experimentId }) + ) + const { closeExperimentExposureModal, updateExperimentExposure, setExperimentExposureInsight } = useActions( + experimentLogic({ experimentId }) + ) + + return ( + + + Cancel + + { + if (experiment.parameters.custom_exposure_filter) { + updateExperimentExposure(experiment.parameters.custom_exposure_filter) + } + }} + type="primary" + loading={experimentLoading} + data-attr="create-annotation-submit" + > + Save + +
    + } + > +
    + + + +
    + + ) +} + +export function Goal(): JSX.Element { + const { experiment, experimentId, experimentInsightType, experimentMathAggregationForTrends } = + useValues(experimentLogic) + const { openExperimentGoalModal } = useActions(experimentLogic({ experimentId })) + + return ( +
    +

    Experiment goal

    +
    + This {experimentInsightType === InsightType.FUNNELS ? 'funnel' : 'trend'}{' '} + {experimentInsightType === InsightType.FUNNELS + ? 'experiment measures conversion through each step of the user journey.' + : 'experiment tracks the performance of a single metric.'} +
    +
    +
    +
    + {experimentInsightType === InsightType.FUNNELS ? 'Conversion goal steps' : 'Trend goal'} +
    + + + Change experiment goal + +
    + {experimentInsightType === InsightType.TRENDS && + !experimentMathAggregationForTrends(experiment.filters) && ( + <> + +
    +
    + +
    +
    + + )} +
    +
    + ) +} diff --git a/frontend/src/scenes/experiments/ExperimentView/Info.tsx b/frontend/src/scenes/experiments/ExperimentView/Info.tsx new file mode 100644 index 0000000000000..b11b938860eac --- /dev/null +++ b/frontend/src/scenes/experiments/ExperimentView/Info.tsx @@ -0,0 +1,87 @@ +import '../Experiment.scss' + +import { IconWarning } from '@posthog/icons' +import { Link, ProfilePicture, Tooltip } from '@posthog/lemon-ui' +import { useValues } from 'kea' +import { CopyToClipboardInline } from 'lib/components/CopyToClipboard' +import { TZLabel } from 'lib/components/TZLabel' +import { IconOpenInNew } from 'lib/lemon-ui/icons' +import { urls } from 'scenes/urls' + +import { ProgressStatus } from '~/types' + +import { StatusTag } from '../Experiment' +import { experimentLogic } from '../experimentLogic' +import { getExperimentStatus } from '../experimentsLogic' +import { ResultsTag } from './components' + +export function Info(): JSX.Element { + const { experiment } = useValues(experimentLogic) + const { created_by, created_at } = experiment + + if (!experiment.feature_flag) { + return <> + } + + return ( +
    +
    +
    +
    Status
    + +
    +
    +
    Significance
    + +
    + {experiment.feature_flag && ( +
    +
    + Feature flag +
    + {getExperimentStatus(experiment) === ProgressStatus.Running && + !experiment.feature_flag.active && ( + + + + )} + + {experiment.feature_flag.key} + + + + +
    + )} +
    + +
    +
    +
    +
    Created at
    + {created_at && } +
    +
    +
    Created by
    + {created_by && } +
    +
    +
    +
    + ) +} diff --git a/frontend/src/scenes/experiments/ExperimentView/NoResultsEmptyState.tsx b/frontend/src/scenes/experiments/ExperimentView/NoResultsEmptyState.tsx new file mode 100644 index 0000000000000..c4c021a3c382e --- /dev/null +++ b/frontend/src/scenes/experiments/ExperimentView/NoResultsEmptyState.tsx @@ -0,0 +1,33 @@ +import '../Experiment.scss' + +import { Empty } from 'antd' +import { useValues } from 'kea' + +import { experimentLogic } from '../experimentLogic' + +export function NoResultsEmptyState(): JSX.Element { + const { experimentResultsLoading, experimentResultCalculationError } = useValues(experimentLogic) + + if (experimentResultsLoading) { + return <> + } + + return ( +
    +

    Results

    +
    +
    + +

    There are no experiment results yet

    + {!!experimentResultCalculationError && ( +
    {experimentResultCalculationError}
    + )} +
    + Wait a bit longer for your users to be exposed to the experiment. Double check your feature flag + implementation if you're still not seeing results. +
    +
    +
    +
    + ) +} diff --git a/frontend/src/scenes/experiments/ExperimentView/Overview.tsx b/frontend/src/scenes/experiments/ExperimentView/Overview.tsx new file mode 100644 index 0000000000000..76cc2136116d4 --- /dev/null +++ b/frontend/src/scenes/experiments/ExperimentView/Overview.tsx @@ -0,0 +1,95 @@ +import '../Experiment.scss' + +import { LemonDivider } from '@posthog/lemon-ui' +import { useValues } from 'kea' +import { getSeriesColor } from 'lib/colors' +import { capitalizeFirstLetter } from 'lib/utils' + +import { InsightType } from '~/types' + +import { experimentLogic } from '../experimentLogic' + +export function Overview(): JSX.Element { + const { + experimentResults, + getIndexForVariant, + experimentInsightType, + sortedConversionRates, + highestProbabilityVariant, + areResultsSignificant, + } = useValues(experimentLogic) + + function SignificanceText(): JSX.Element { + return ( + <> + Your results are  + {`${areResultsSignificant ? 'significant' : 'not significant'}`}. + + ) + } + + if (experimentInsightType === InsightType.FUNNELS) { + const winningVariant = sortedConversionRates[0] + const secondBestVariant = sortedConversionRates[1] + const difference = winningVariant.conversionRate - secondBestVariant.conversionRate + + return ( +
    +

    Summary

    +
    +
    + {capitalizeFirstLetter(winningVariant.key)} +  is winning with a conversion rate  + + increase of {`${difference.toFixed(2)}%`} + +  percentage points (vs  +
    + {capitalizeFirstLetter(secondBestVariant.key)} + ).  + +
    +
    + ) + } + + const index = getIndexForVariant(experimentResults, highestProbabilityVariant || '') + if (highestProbabilityVariant && index !== null && experimentResults) { + const { probability } = experimentResults + + return ( +
    +

    Overview

    + +
    +
    + {capitalizeFirstLetter(highestProbabilityVariant)} +  is winning with a  + + {`${(probability[highestProbabilityVariant] * 100).toFixed(2)}% probability`}  + + of being best.  + +
    +
    + ) + } + + return <> +} diff --git a/frontend/src/scenes/experiments/ExperimentView/ProgressBar.tsx b/frontend/src/scenes/experiments/ExperimentView/ProgressBar.tsx new file mode 100644 index 0000000000000..1cedbcf500d6c --- /dev/null +++ b/frontend/src/scenes/experiments/ExperimentView/ProgressBar.tsx @@ -0,0 +1,77 @@ +import '../Experiment.scss' + +import { useValues } from 'kea' +import { dayjs } from 'lib/dayjs' +import { LemonProgress } from 'lib/lemon-ui/LemonProgress' +import { humanFriendlyNumber } from 'lib/utils' + +import { FunnelStep, InsightType } from '~/types' + +import { experimentLogic } from '../experimentLogic' + +export function ProgressBar(): JSX.Element { + const { experiment, experimentResults, experimentInsightType } = useValues(experimentLogic) + + // Parameters for experiment results + // don't use creation variables in results + const funnelResultsPersonsTotal = + experimentInsightType === InsightType.FUNNELS && experimentResults?.insight + ? (experimentResults.insight as FunnelStep[][]).reduce( + (sum: number, variantResult: FunnelStep[]) => variantResult[0]?.count + sum, + 0 + ) + : 0 + + const experimentProgressPercent = + experimentInsightType === InsightType.FUNNELS + ? ((funnelResultsPersonsTotal || 0) / (experiment?.parameters?.recommended_sample_size || 1)) * 100 + : (dayjs().diff(experiment?.start_date, 'day') / (experiment?.parameters?.recommended_running_time || 1)) * + 100 + + return ( +
    +
    {`${ + experimentProgressPercent > 100 ? 100 : experimentProgressPercent.toFixed(2) + }% complete`}
    + + {experimentInsightType === InsightType.TRENDS && experiment.start_date && ( +
    + {experiment.end_date ? ( +
    + Ran for {dayjs(experiment.end_date).diff(experiment.start_date, 'day')} days +
    + ) : ( +
    + {dayjs().diff(experiment.start_date, 'day')} days running +
    + )} +
    + Goal: {experiment?.parameters?.recommended_running_time ?? 'Unknown'} days +
    +
    + )} + {experimentInsightType === InsightType.FUNNELS && ( +
    + {experiment.end_date ? ( +
    + Saw {humanFriendlyNumber(funnelResultsPersonsTotal)} participants +
    + ) : ( +
    + {humanFriendlyNumber(funnelResultsPersonsTotal)} participants seen +
    + )} +
    + Goal: {humanFriendlyNumber(experiment?.parameters?.recommended_sample_size || 0)}{' '} + participants +
    +
    + )} +
    + ) +} diff --git a/frontend/src/scenes/experiments/ExperimentView/ReleaseConditionsTable.tsx b/frontend/src/scenes/experiments/ExperimentView/ReleaseConditionsTable.tsx new file mode 100644 index 0000000000000..c0a4024e559f6 --- /dev/null +++ b/frontend/src/scenes/experiments/ExperimentView/ReleaseConditionsTable.tsx @@ -0,0 +1,77 @@ +import '../Experiment.scss' + +import { LemonTable, LemonTableColumns, LemonTag, Link } from '@posthog/lemon-ui' +import { useValues } from 'kea' +import { urls } from 'scenes/urls' + +import { groupsModel } from '~/models/groupsModel' +import { FeatureFlagGroupType } from '~/types' + +import { experimentLogic } from '../experimentLogic' + +export function ReleaseConditionsTable(): JSX.Element { + const { experiment } = useValues(experimentLogic) + const { aggregationLabel } = useValues(groupsModel) + + const columns: LemonTableColumns = [ + { + key: 'key', + title: '', + render: function Key(_, _item, index): JSX.Element { + return
    {`Set ${index + 1}`}
    + }, + }, + { + key: 'rollout_percentage', + title: 'Rollout', + render: function Key(_, item): JSX.Element { + const aggregationTargetName = + experiment.filters.aggregation_group_type_index != null + ? aggregationLabel(experiment.filters.aggregation_group_type_index).plural + : 'users' + + const releaseText = `${item.rollout_percentage}% of ${aggregationTargetName}` + + return ( +
    + {releaseText.startsWith('100% of') ? ( + {releaseText} + ) : ( + releaseText + )} +
    + ) + }, + }, + { + key: 'variant', + title: 'Override', + render: function Key(_, item): JSX.Element { + return
    {item.variant || '--'}
    + }, + }, + ] + + return ( +
    +
    +
    +

    Release conditions

    +
    + +
    +
    + + Manage release conditions + +
    +
    +
    + +
    + ) +} diff --git a/frontend/src/scenes/experiments/ExperimentView/Results.tsx b/frontend/src/scenes/experiments/ExperimentView/Results.tsx new file mode 100644 index 0000000000000..bd0662dfea042 --- /dev/null +++ b/frontend/src/scenes/experiments/ExperimentView/Results.tsx @@ -0,0 +1,50 @@ +import '../Experiment.scss' + +import { useValues } from 'kea' + +import { filtersToQueryNode } from '~/queries/nodes/InsightQuery/utils/filtersToQueryNode' +import { Query } from '~/queries/Query/Query' +import { NodeKind } from '~/queries/schema' +import { InsightShortId } from '~/types' + +import { experimentLogic } from '../experimentLogic' +import { transformResultFilters } from '../utils' +import { ResultsTag } from './components' +import { SummaryTable } from './SummaryTable' + +export function Results(): JSX.Element { + const { experimentResults } = useValues(experimentLogic) + + return ( +
    +
    +

    Results

    + +
    + + +
    + ) +} diff --git a/frontend/src/scenes/experiments/ExperimentView/SecondaryMetricsTable.tsx b/frontend/src/scenes/experiments/ExperimentView/SecondaryMetricsTable.tsx new file mode 100644 index 0000000000000..ea9c7befcdd7f --- /dev/null +++ b/frontend/src/scenes/experiments/ExperimentView/SecondaryMetricsTable.tsx @@ -0,0 +1,211 @@ +import '../Experiment.scss' + +import { IconPlus } from '@posthog/icons' +import { LemonButton, LemonInput, LemonModal, LemonTable, LemonTableColumns } from '@posthog/lemon-ui' +import { useActions, useValues } from 'kea' +import { Form } from 'kea-forms' +import { IconAreaChart } from 'lib/lemon-ui/icons' +import { LemonField } from 'lib/lemon-ui/LemonField' +import { capitalizeFirstLetter, humanFriendlyNumber } from 'lib/utils' + +import { InsightType } from '~/types' + +import { SECONDARY_METRIC_INSIGHT_ID } from '../constants' +import { experimentLogic, TabularSecondaryMetricResults } from '../experimentLogic' +import { MetricSelector } from '../MetricSelector' +import { secondaryMetricsLogic, SecondaryMetricsProps } from '../secondaryMetricsLogic' +import { getExperimentInsightColour } from '../utils' + +export function SecondaryMetricsTable({ + onMetricsChange, + initialMetrics, + experimentId, + defaultAggregationType, +}: SecondaryMetricsProps): JSX.Element { + const logic = secondaryMetricsLogic({ onMetricsChange, initialMetrics, experimentId, defaultAggregationType }) + const { metrics, isModalOpen, isSecondaryMetricModalSubmitting, existingModalSecondaryMetric, metricIdx } = + useValues(logic) + + const { + deleteMetric, + openModalToCreateSecondaryMetric, + openModalToEditSecondaryMetric, + closeModal, + saveSecondaryMetric, + setPreviewInsight, + } = useActions(logic) + + const { + secondaryMetricResultsLoading, + isExperimentRunning, + getIndexForVariant, + experiment, + experimentResults, + tabularSecondaryMetricResults, + } = useValues(experimentLogic({ experimentId })) + + const columns: LemonTableColumns = [ + { + key: 'variant', + title: 'Variant', + render: function Key(_, item: TabularSecondaryMetricResults): JSX.Element { + return ( +
    +
    + {capitalizeFirstLetter(item.variant)} +
    + ) + }, + }, + ] + + experiment.secondary_metrics?.forEach((metric, idx) => { + columns.push({ + key: `results_${idx}`, + title: ( + + } + onClick={() => openModalToEditSecondaryMetric(metric, idx)} + > + {capitalizeFirstLetter(metric.name)} + + + ), + render: function Key(_, item: TabularSecondaryMetricResults): JSX.Element { + return ( +
    + {item.results?.[idx].result ? ( + item.results[idx].insightType === InsightType.FUNNELS ? ( + <>{((item.results[idx].result as number) * 100).toFixed(1)}% + ) : ( + <>{humanFriendlyNumber(item.results[idx].result as number)} + ) + ) : ( + <>-- + )} +
    + ) + }, + }) + }) + + return ( + <> + + {existingModalSecondaryMetric && ( + deleteMetric(metricIdx)} + > + Delete + + )} +
    + + Cancel + + + {existingModalSecondaryMetric ? 'Save' : 'Create'} + +
    + + } + > +
    + + + + + + +
    +
    +
    +
    +
    +

    Secondary metrics

    + {metrics.length > 0 && ( +
    Click a metric name to compare variants on a graph.
    + )} +
    + +
    +
    + {metrics && metrics.length > 0 && metrics.length < 3 && isExperimentRunning && ( +
    + + Add metric + +
    + )} +
    +
    +
    + {metrics && metrics.length > 0 ? ( + + ) : ( +
    +
    + +
    + Add up to 3 secondary metrics to gauge side effects of your experiment. +
    + } + type="secondary" + size="small" + onClick={openModalToCreateSecondaryMetric} + > + Add metric + +
    +
    + )} +
    + + ) +} diff --git a/frontend/src/scenes/experiments/ExperimentView/SummaryTable.tsx b/frontend/src/scenes/experiments/ExperimentView/SummaryTable.tsx new file mode 100644 index 0000000000000..b6d4b95674c2c --- /dev/null +++ b/frontend/src/scenes/experiments/ExperimentView/SummaryTable.tsx @@ -0,0 +1,132 @@ +import '../Experiment.scss' + +import { IconInfo } from '@posthog/icons' +import { LemonTable, LemonTableColumns, Tooltip } from '@posthog/lemon-ui' +import { useValues } from 'kea' +import { getSeriesColor } from 'lib/colors' +import { EntityFilterInfo } from 'lib/components/EntityFilterInfo' +import { LemonProgress } from 'lib/lemon-ui/LemonProgress' +import { capitalizeFirstLetter } from 'lib/utils' + +import { FunnelExperimentVariant, InsightType, TrendExperimentVariant } from '~/types' + +import { experimentLogic } from '../experimentLogic' + +export function SummaryTable(): JSX.Element { + const { + experimentResults, + experimentInsightType, + exposureCountDataForVariant, + conversionRateForVariant, + sortedConversionRates, + experimentMathAggregationForTrends, + countDataForVariant, + areTrendResultsConfusing, + } = useValues(experimentLogic) + + if (!experimentResults) { + return <> + } + + const columns: LemonTableColumns = [ + { + key: 'variants', + title: 'Variant', + render: function Key(_, item, index): JSX.Element { + return ( +
    +
    + {capitalizeFirstLetter(item.key)} +
    + ) + }, + }, + ] + + if (experimentInsightType === InsightType.TRENDS) { + columns.push({ + key: 'counts', + title: ( +
    + {experimentResults.insight?.[0] && 'action' in experimentResults.insight[0] && ( + + )} + + {experimentMathAggregationForTrends(experimentResults?.filters) ? 'metric' : 'count'} + +
    + ), + render: function Key(_, item, index): JSX.Element { + return ( +
    + {countDataForVariant(experimentResults, item.key)}{' '} + {areTrendResultsConfusing && index === 0 && ( + + + + )} +
    + ) + }, + }) + columns.push({ + key: 'exposure', + title: 'Exposure', + render: function Key(_, item): JSX.Element { + return
    {exposureCountDataForVariant(experimentResults, item.key)}
    + }, + }) + } + + if (experimentInsightType === InsightType.FUNNELS) { + columns.push({ + key: 'conversionRate', + title: 'Conversion rate', + render: function Key(_, item): JSX.Element { + const isWinning = item.key === sortedConversionRates[0].key + return ( +
    {`${conversionRateForVariant( + experimentResults, + item.key + )}%`}
    + ) + }, + }) + } + + columns.push({ + key: 'winProbability', + title: 'Win probability', + render: function Key(_, item): JSX.Element { + const percentage = + experimentResults?.probability?.[item.key] != undefined && + experimentResults.probability?.[item.key] * 100 + + return ( + <> + {percentage ? ( + + + {percentage.toFixed(2)}% + + ) : ( + '--' + )} + + ) + }, + }) + + return ( +
    + +
    + ) +} diff --git a/frontend/src/scenes/experiments/ExperimentView/components.tsx b/frontend/src/scenes/experiments/ExperimentView/components.tsx new file mode 100644 index 0000000000000..1a22957925e68 --- /dev/null +++ b/frontend/src/scenes/experiments/ExperimentView/components.tsx @@ -0,0 +1,148 @@ +import '../Experiment.scss' + +import { LemonButton, LemonDivider, LemonTable, LemonTag, LemonTagType } from '@posthog/lemon-ui' +import { useActions, useValues } from 'kea' +import { AnimationType } from 'lib/animations/animations' +import { Animation } from 'lib/components/Animation/Animation' +import { PageHeader } from 'lib/components/PageHeader' +import { dayjs } from 'lib/dayjs' +import { More } from 'lib/lemon-ui/LemonButton/More' +import { useEffect, useState } from 'react' + +import { ResetButton } from '../Experiment' +import { experimentLogic } from '../experimentLogic' + +export function ResultsTag(): JSX.Element { + const { areResultsSignificant } = useValues(experimentLogic) + const result: { color: LemonTagType; label: string } = areResultsSignificant + ? { color: 'success', label: 'Significant' } + : { color: 'primary', label: 'Not significant' } + + return ( + + {result.label} + + ) +} + +export function ExperimentLoader(): JSX.Element { + return ( + + ) +} + +export function ExperimentLoadingAnimation(): JSX.Element { + function EllipsisAnimation(): JSX.Element { + const [ellipsis, setEllipsis] = useState('.') + + useEffect(() => { + let count = 1 + let direction = 1 + + const interval = setInterval(() => { + setEllipsis('.'.repeat(count)) + count += direction + + if (count === 3 || count === 1) { + direction *= -1 + } + }, 300) + + return () => clearInterval(interval) + }, []) + + return {ellipsis} + } + + return ( +
    + +
    + Fetching experiment results + +
    +
    + ) +} + +export function PageHeaderCustom(): JSX.Element { + const { experiment, isExperimentRunning } = useValues(experimentLogic) + const { + launchExperiment, + resetRunningExperiment, + endExperiment, + archiveExperiment, + setEditExperiment, + loadExperimentResults, + loadSecondaryMetricResults, + } = useActions(experimentLogic) + + return ( + + {experiment && !isExperimentRunning && ( +
    + setEditExperiment(true)}> + Edit + + launchExperiment()}> + Launch + +
    + )} + {experiment && isExperimentRunning && ( +
    + <> + + loadExperimentResults(true)} + fullWidth + data-attr="refresh-experiment" + > + Refresh experiment results + + loadSecondaryMetricResults(true)} + fullWidth + data-attr="refresh-secondary-metrics" + > + Refresh secondary metrics + + + } + /> + + + + {!experiment.end_date && ( + endExperiment()}> + Stop + + )} + {experiment?.end_date && + dayjs().isSameOrAfter(dayjs(experiment.end_date), 'day') && + !experiment.archived && ( + archiveExperiment()}> + Archive + + )} +
    + )} + + } + /> + ) +} diff --git a/frontend/src/scenes/experiments/MetricSelector.tsx b/frontend/src/scenes/experiments/MetricSelector.tsx index 4df25546fe8a9..fbfcd0617d61c 100644 --- a/frontend/src/scenes/experiments/MetricSelector.tsx +++ b/frontend/src/scenes/experiments/MetricSelector.tsx @@ -4,6 +4,7 @@ import { IconInfo } from '@posthog/icons' import { LemonSelect } from '@posthog/lemon-ui' import { BindLogic, useActions, useValues } from 'kea' import { TaxonomicFilterGroupType } from 'lib/components/TaxonomicFilter/types' +import { EXPERIMENT_DEFAULT_DURATION } from 'lib/constants' import { LemonBanner } from 'lib/lemon-ui/LemonBanner' import { Tooltip } from 'lib/lemon-ui/Tooltip' import { Attribution } from 'scenes/insights/EditorFilters/AttributionFilter' @@ -23,8 +24,6 @@ import { Query } from '~/queries/Query/Query' import { FunnelsQuery, InsightQueryNode, TrendsQuery } from '~/queries/schema' import { EditorFilterProps, FilterType, InsightLogicProps, InsightShortId, InsightType } from '~/types' -import { DEFAULT_DURATION } from './experimentLogic' - export interface MetricSelectorProps { dashboardItemId: InsightShortId setPreviewInsight: (filters?: Partial) => void @@ -75,8 +74,8 @@ export function MetricSelector({ {showDateRangeBanner && ( - Preview insights are generated based on {DEFAULT_DURATION} days of data. This can cause a mismatch - between the preview and the actual results. + Preview insights are generated based on {EXPERIMENT_DEFAULT_DURATION} days of data. This can cause a + mismatch between the preview and the actual results. )} diff --git a/frontend/src/scenes/experiments/SecondaryMetricsTable.tsx b/frontend/src/scenes/experiments/SecondaryMetricsTable.tsx index fbaae05233582..c15b4f8293a3a 100644 --- a/frontend/src/scenes/experiments/SecondaryMetricsTable.tsx +++ b/frontend/src/scenes/experiments/SecondaryMetricsTable.tsx @@ -16,9 +16,9 @@ import { InsightType } from '~/types' import { SECONDARY_METRIC_INSIGHT_ID } from './constants' import { experimentLogic, TabularSecondaryMetricResults } from './experimentLogic' -import { getExperimentInsightColour } from './ExperimentResult' import { MetricSelector } from './MetricSelector' import { secondaryMetricsLogic, SecondaryMetricsProps } from './secondaryMetricsLogic' +import { getExperimentInsightColour } from './utils' export function SecondaryMetricsTable({ onMetricsChange, diff --git a/frontend/src/scenes/experiments/experimentLogic.tsx b/frontend/src/scenes/experiments/experimentLogic.tsx index 35e617d41470e..d2154c637a2a6 100644 --- a/frontend/src/scenes/experiments/experimentLogic.tsx +++ b/frontend/src/scenes/experiments/experimentLogic.tsx @@ -4,7 +4,7 @@ import { forms } from 'kea-forms' import { loaders } from 'kea-loaders' import { router, urlToAction } from 'kea-router' import api from 'lib/api' -import { FunnelLayout } from 'lib/constants' +import { EXPERIMENT_DEFAULT_DURATION, FunnelLayout } from 'lib/constants' import { dayjs } from 'lib/dayjs' import { lemonToast } from 'lib/lemon-ui/LemonToast/LemonToast' import { Tooltip } from 'lib/lemon-ui/Tooltip' @@ -23,7 +23,7 @@ import { urls } from 'scenes/urls' import { groupsModel } from '~/models/groupsModel' import { filtersToQueryNode } from '~/queries/nodes/InsightQuery/utils/filtersToQueryNode' import { queryNodeToFilter } from '~/queries/nodes/InsightQuery/utils/queryNodeToFilter' -import { InsightVizNode } from '~/queries/schema' +import { FunnelsQuery, InsightVizNode, TrendsQuery } from '~/queries/schema' import { ActionFilter as ActionFilterType, Breadcrumb, @@ -47,8 +47,6 @@ import { EXPERIMENT_EXPOSURE_INSIGHT_ID, EXPERIMENT_INSIGHT_ID } from './constan import type { experimentLogicType } from './experimentLogicType' import { experimentsLogic } from './experimentsLogic' -export const DEFAULT_DURATION = 14 // days - const NEW_EXPERIMENT: Experiment = { id: 'new', name: '', @@ -358,7 +356,7 @@ export const experimentLogic = kea([ newInsightFilters = cleanFilters({ insight: InsightType.FUNNELS, funnel_viz_type: FunnelVizType.Steps, - date_from: dayjs().subtract(DEFAULT_DURATION, 'day').format('YYYY-MM-DDTHH:mm'), + date_from: dayjs().subtract(EXPERIMENT_DEFAULT_DURATION, 'day').format('YYYY-MM-DDTHH:mm'), date_to: dayjs().endOf('d').format('YYYY-MM-DDTHH:mm'), layout: FunnelLayout.horizontal, aggregation_group_type_index: aggregationGroupTypeIndex, @@ -375,14 +373,23 @@ export const experimentLogic = kea([ : { events: [{ ...getDefaultEvent(), ...groupAggregation }] } newInsightFilters = cleanFilters({ insight: InsightType.TRENDS, - date_from: dayjs().subtract(DEFAULT_DURATION, 'day').format('YYYY-MM-DDTHH:mm'), + date_from: dayjs().subtract(EXPERIMENT_DEFAULT_DURATION, 'day').format('YYYY-MM-DDTHH:mm'), date_to: dayjs().endOf('d').format('YYYY-MM-DDTHH:mm'), ...eventAddition, ...filters, }) } - actions.updateQuerySource(filtersToQueryNode(newInsightFilters)) + // This allows switching between insight types. It's necessary as `updateQuerySource` merges + // the new query with any existing query and that causes validation problems when there are + // unsupported properties in the now merged query. + const newQuery = filtersToQueryNode(newInsightFilters) + if (filters?.insight === InsightType.FUNNELS) { + ;(newQuery as TrendsQuery).trendsFilter = undefined + } else { + ;(newQuery as FunnelsQuery).funnelsFilter = undefined + } + actions.updateQuerySource(newQuery) }, // sync form value `filters` with query setQuery: ({ query }) => { @@ -391,7 +398,7 @@ export const experimentLogic = kea([ setExperimentExposureInsight: async ({ filters }) => { const newInsightFilters = cleanFilters({ insight: InsightType.TRENDS, - date_from: dayjs().subtract(DEFAULT_DURATION, 'day').format('YYYY-MM-DDTHH:mm'), + date_from: dayjs().subtract(EXPERIMENT_DEFAULT_DURATION, 'day').format('YYYY-MM-DDTHH:mm'), date_to: dayjs().endOf('d').format('YYYY-MM-DDTHH:mm'), ...filters, }) @@ -672,6 +679,16 @@ export const experimentLogic = kea([ return !!experiment?.start_date }, ], + isExperimentStopped: [ + (s) => [s.experiment], + (experiment): boolean => { + return ( + !!experiment?.end_date && + dayjs().isSameOrAfter(dayjs(experiment.end_date), 'day') && + !experiment.archived + ) + }, + ], breadcrumbs: [ (s) => [s.experiment, s.experimentId], (experiment, experimentId): Breadcrumb[] => [ @@ -801,7 +818,11 @@ export const experimentLogic = kea([ return parseFloat( ( 4 / - Math.pow(Math.sqrt(lambda1 / DEFAULT_DURATION) - Math.sqrt(lambda2 / DEFAULT_DURATION), 2) + Math.pow( + Math.sqrt(lambda1 / EXPERIMENT_DEFAULT_DURATION) - + Math.sqrt(lambda2 / EXPERIMENT_DEFAULT_DURATION), + 2 + ) ).toFixed(1) ) }, @@ -809,7 +830,7 @@ export const experimentLogic = kea([ expectedRunningTime: [ () => [], () => - (entrants: number, sampleSize: number, duration: number = DEFAULT_DURATION): number => { + (entrants: number, sampleSize: number, duration: number = EXPERIMENT_DEFAULT_DURATION): number => { // recommended people / (actual people / day) = expected days return parseFloat((sampleSize / (entrants / duration)).toFixed(1)) }, @@ -1014,13 +1035,29 @@ export const experimentLogic = kea([ return variantsWithResults }, ], + sortedConversionRates: [ + (s) => [s.experimentResults, s.variants, s.conversionRateForVariant], + ( + experimentResults: any, + variants: any, + conversionRateForVariant: any + ): { key: string; conversionRate: number; index: number }[] => { + const conversionRates = [] + for (let index = 0; index < variants.length; index++) { + const variant = variants[index].key + const conversionRate = parseFloat(conversionRateForVariant(experimentResults, variant)) + conversionRates.push({ key: variant, conversionRate, index }) + } + return conversionRates.sort((a, b) => b.conversionRate - a.conversionRate) + }, + ], }), forms(({ actions, values }) => ({ experiment: { options: { showErrorsOnTouch: true }, defaults: { ...NEW_EXPERIMENT } as Experiment, errors: ({ name, feature_flag_key, parameters }) => ({ - name: !name && 'You have to enter a name.', + name: !name && 'Please enter a name', feature_flag_key: validateFeatureFlagKey(feature_flag_key), parameters: { feature_flag_variants: parameters.feature_flag_variants?.map(({ key }) => ({ diff --git a/frontend/src/scenes/experiments/secondaryMetricsLogic.ts b/frontend/src/scenes/experiments/secondaryMetricsLogic.ts index d3b04d4a29c38..a12bc0f4a7547 100644 --- a/frontend/src/scenes/experiments/secondaryMetricsLogic.ts +++ b/frontend/src/scenes/experiments/secondaryMetricsLogic.ts @@ -10,7 +10,7 @@ import { teamLogic } from 'scenes/teamLogic' import { filtersToQueryNode } from '~/queries/nodes/InsightQuery/utils/filtersToQueryNode' import { queryNodeToFilter } from '~/queries/nodes/InsightQuery/utils/queryNodeToFilter' -import { InsightVizNode } from '~/queries/schema' +import { FunnelsQuery, InsightVizNode, TrendsQuery } from '~/queries/schema' import { Experiment, FilterType, FunnelVizType, InsightType, SecondaryExperimentMetric } from '~/types' import { SECONDARY_METRIC_INSIGHT_ID } from './constants' @@ -162,7 +162,16 @@ export const secondaryMetricsLogic = kea([ }) } - actions.updateQuerySource(filtersToQueryNode(newInsightFilters)) + // This allows switching between insight types. It's necessary as `updateQuerySource` merges + // the new query with any existing query and that causes validation problems when there are + // unsupported properties in the now merged query. + const newQuery = filtersToQueryNode(newInsightFilters) + if (filters?.insight === InsightType.FUNNELS) { + ;(newQuery as TrendsQuery).trendsFilter = undefined + } else { + ;(newQuery as FunnelsQuery).funnelsFilter = undefined + } + actions.updateQuerySource(newQuery) }, // sync form value `filters` with query setQuery: ({ query }) => { diff --git a/frontend/src/scenes/experiments/utils.ts b/frontend/src/scenes/experiments/utils.ts new file mode 100644 index 0000000000000..90d7b2c64f44b --- /dev/null +++ b/frontend/src/scenes/experiments/utils.ts @@ -0,0 +1,19 @@ +import { getSeriesColor } from 'lib/colors' +import { FunnelLayout } from 'lib/constants' + +import { ChartDisplayType, FilterType, FunnelVizType, InsightType } from '~/types' + +export function getExperimentInsightColour(variantIndex: number | null): string { + return variantIndex !== null ? getSeriesColor(variantIndex) : 'var(--muted-3000)' +} + +export const transformResultFilters = (filters: Partial): Partial => ({ + ...filters, + ...(filters.insight === InsightType.FUNNELS && { + layout: FunnelLayout.vertical, + funnel_viz_type: FunnelVizType.Steps, + }), + ...(filters.insight === InsightType.TRENDS && { + display: ChartDisplayType.ActionsLineGraphCumulative, + }), +}) diff --git a/frontend/src/scenes/feature-flags/FeatureFlag.tsx b/frontend/src/scenes/feature-flags/FeatureFlag.tsx index 7c353f42777e4..12a307080088d 100644 --- a/frontend/src/scenes/feature-flags/FeatureFlag.tsx +++ b/frontend/src/scenes/feature-flags/FeatureFlag.tsx @@ -164,17 +164,13 @@ export function FeatureFlag({ id }: { id?: string } = {}): JSX.Element { key: FeatureFlagsTab.USAGE, content: , }) - } - if (featureFlags[FEATURE_FLAGS.MULTI_PROJECT_FEATURE_FLAGS]) { tabs.push({ label: 'Projects', key: FeatureFlagsTab.PROJECTS, content: , }) - } - if (featureFlags[FEATURE_FLAGS.SCHEDULED_CHANGES_FEATURE_FLAGS]) { tabs.push({ label: 'Schedule', key: FeatureFlagsTab.SCHEDULE, @@ -210,7 +206,7 @@ export function FeatureFlag({ id }: { id?: string } = {}): JSX.Element { }) } - if (featureFlags[FEATURE_FLAGS.ROLE_BASED_ACCESS] && featureFlag.can_edit) { + if (featureFlag.can_edit) { tabs.push({ label: 'Permissions', key: FeatureFlagsTab.PERMISSIONS, @@ -407,15 +403,13 @@ export function FeatureFlag({ id }: { id?: string } = {}): JSX.Element { {featureFlags[FEATURE_FLAGS.AUTO_ROLLBACK_FEATURE_FLAGS] && ( )} - {featureFlags[FEATURE_FLAGS.ROLE_BASED_ACCESS] && ( -
    -

    Permissions

    - -
    - -
    +
    +

    Permissions

    + +
    +
    - )} +
    )} @@ -534,15 +528,13 @@ export function FeatureFlag({ id }: { id?: string } = {}): JSX.Element { { editFeatureFlag(true) }} - disabled={!featureFlag.can_edit} > Edit diff --git a/frontend/src/scenes/feature-flags/FeatureFlagPermissions.tsx b/frontend/src/scenes/feature-flags/FeatureFlagPermissions.tsx index 38b1d8a33bff6..ab3858345a75d 100644 --- a/frontend/src/scenes/feature-flags/FeatureFlagPermissions.tsx +++ b/frontend/src/scenes/feature-flags/FeatureFlagPermissions.tsx @@ -10,11 +10,7 @@ import { LemonTableColumns } from 'lib/lemon-ui/LemonTable' import { AccessControlObject } from '~/layout/navigation-3000/sidepanel/panels/access_control/AccessControlObject' import { AccessLevel, AvailableFeature, FeatureFlagType, Resource, RoleType } from '~/types' -import { - FormattedResourceLevel, - permissionsLogic, - ResourcePermissionMapping, -} from '../settings/organization/Permissions/permissionsLogic' +import { permissionsLogic } from '../settings/organization/Permissions/permissionsLogic' import { rolesLogic } from '../settings/organization/Permissions/Roles/rolesLogic' import { urls } from '../urls' import { featureFlagPermissionsLogic } from './featureFlagPermissionsLogic' @@ -88,7 +84,7 @@ export function ResourcePermission({ resourceType, canEdit, }: ResourcePermissionProps): JSX.Element { - const { allPermissions, shouldShowPermissionsTable } = useValues(permissionsLogic) + const { allPermissions } = useValues(permissionsLogic) const { roles: possibleRolesWithAccess } = useValues(rolesLogic) const resourceLevel = allPermissions.find((permission) => permission.resource === resourceType) // TODO: feature_flag_access_level should eventually be generic in this component @@ -112,7 +108,7 @@ export function ResourcePermission({ icon={ } - to={`${urls.settings('organization')}?tab=role_based_access`} + to={`${urls.settings('organization-rbac')}`} targetBlank size="small" noPadding @@ -166,33 +162,7 @@ export function ResourcePermission({ return ( <> - {!shouldShowPermissionsTable && ( - <> - {resourceLevel && } - - - )} - {shouldShowPermissionsTable && } - {!shouldShowPermissionsTable && ( - <> -
    Roles
    - {roles.length > 0 ? ( -
    - {roles.map((role) => { - return ( - deleteAssociatedRole(roleId)} - /> - ) - })} -
    - ) : ( -
    No roles added yet
    - )} - - )} + {canEdit && ( <>
    Custom edit roles
    @@ -217,61 +187,3 @@ export function ResourcePermission({ ) } - -function OrganizationResourcePermissionLabel({ - resourceLevel, -}: { - resourceLevel: FormattedResourceLevel -}): JSX.Element { - return ( - <> - } - to={`${urls.settings('organization')}?tab=role_based_access`} - targetBlank - size="small" - noPadding - className="ml-1" - /> - } - > -
    Organization default
    -
    - {ResourcePermissionMapping[resourceLevel.access_level]} - - ) -} - -function OrganizationResourcePermissionRoles({ roles }: { roles: RoleType[] }): JSX.Element { - return ( - <> -
    Roles with edit access
    -
    - {roles.map((role) => ( - - {role.name}{' '} - - ))} -
    - - ) -} - -function RoleRow({ role, deleteRole }: { role: RoleType; deleteRole?: (roleId: RoleType['id']) => void }): JSX.Element { - return ( -
    - {role.name} - {deleteRole && ( - } - onClick={() => deleteRole(role.id)} - tooltip="Remove role from permission" - tooltipPlacement="bottom-start" - size="small" - /> - )} -
    - ) -} diff --git a/frontend/src/scenes/feature-flags/FeatureFlagReleaseConditions.tsx b/frontend/src/scenes/feature-flags/FeatureFlagReleaseConditions.tsx index 389e0e2e4f271..75e3b9a47a6e6 100644 --- a/frontend/src/scenes/feature-flags/FeatureFlagReleaseConditions.tsx +++ b/frontend/src/scenes/feature-flags/FeatureFlagReleaseConditions.tsx @@ -8,7 +8,7 @@ import { router } from 'kea-router' import { allOperatorsToHumanName } from 'lib/components/DefinitionPopover/utils' import { PropertyFilters } from 'lib/components/PropertyFilters/PropertyFilters' import { isPropertyFilterWithOperator } from 'lib/components/PropertyFilters/utils' -import { FEATURE_FLAGS, INSTANTLY_AVAILABLE_PROPERTIES } from 'lib/constants' +import { INSTANTLY_AVAILABLE_PROPERTIES } from 'lib/constants' import { groupsAccessLogic, GroupsAccessStatus } from 'lib/introductions/groupsAccessLogic' import { GroupsIntroductionOption } from 'lib/introductions/GroupsIntroductionOption' import { IconErrorOutline, IconOpenInNew, IconSubArrowRight } from 'lib/lemon-ui/icons' @@ -60,7 +60,6 @@ export function FeatureFlagReleaseConditions({ affectedUsers, totalUsers, filtersTaxonomicOptions, - enabledFeatures, aggregationTargetName, } = useValues(releaseConditionsLogic) @@ -233,7 +232,7 @@ export function FeatureFlagReleaseConditions({ taxonomicFilterOptionsFromProp={filtersTaxonomicOptions} hasRowOperator={false} sendAllKeyUpdates - allowRelativeDateOptions={!!enabledFeatures[FEATURE_FLAGS.NEW_FEATURE_FLAG_OPERATORS]} + allowRelativeDateOptions errorMessages={ propertySelectErrors?.[index]?.properties?.some((message) => !!message.value) ? propertySelectErrors[index].properties?.map((message, index) => { diff --git a/frontend/src/scenes/feature-flags/FeatureFlagReleaseConditionsLogic.ts b/frontend/src/scenes/feature-flags/FeatureFlagReleaseConditionsLogic.ts index f0007c648cc28..dab0d6f408993 100644 --- a/frontend/src/scenes/feature-flags/FeatureFlagReleaseConditionsLogic.ts +++ b/frontend/src/scenes/feature-flags/FeatureFlagReleaseConditionsLogic.ts @@ -2,8 +2,6 @@ import { actions, afterMount, connect, kea, key, listeners, path, props, propsCh import { subscriptions } from 'kea-subscriptions' import api from 'lib/api' import { TaxonomicFilterGroupType, TaxonomicFilterProps } from 'lib/components/TaxonomicFilter/types' -import { FEATURE_FLAGS } from 'lib/constants' -import { featureFlagLogic as enabledFeaturesLogic } from 'lib/logic/featureFlagLogic' import { objectsEqual } from 'lib/utils' import { groupsModel } from '~/models/groupsModel' @@ -32,14 +30,7 @@ export const featureFlagReleaseConditionsLogic = kea id ?? 'unknown'), connect({ - values: [ - teamLogic, - ['currentTeamId'], - groupsModel, - ['groupTypes', 'aggregationLabel'], - enabledFeaturesLogic, - ['featureFlags as enabledFeatures'], - ], + values: [teamLogic, ['currentTeamId'], groupsModel, ['groupTypes', 'aggregationLabel']], }), actions({ setFilters: (filters: FeatureFlagFilters) => ({ filters }), @@ -210,35 +201,28 @@ export const featureFlagReleaseConditionsLogic = kea [s.filters, s.groupTypes, s.enabledFeatures], - (filters, groupTypes, enabledFeatures): TaxonomicFilterGroupType[] => { - const baseGroupTypes = [] - const additionalGroupTypes = [] - const newFlagOperatorsEnabled = enabledFeatures[FEATURE_FLAGS.NEW_FEATURE_FLAG_OPERATORS] + (s) => [s.filters, s.groupTypes], + (filters, groupTypes): TaxonomicFilterGroupType[] => { + const targetGroupTypes = [] const targetGroup = filters?.aggregation_group_type_index != null ? groupTypes.get(filters.aggregation_group_type_index as GroupTypeIndex) : undefined if (targetGroup) { - baseGroupTypes.push( + targetGroupTypes.push( `${TaxonomicFilterGroupType.GroupsPrefix}_${targetGroup?.group_type_index}` as unknown as TaxonomicFilterGroupType ) - if (newFlagOperatorsEnabled) { - additionalGroupTypes.push( - `${TaxonomicFilterGroupType.GroupNamesPrefix}_${filters.aggregation_group_type_index}` as unknown as TaxonomicFilterGroupType - ) - } + targetGroupTypes.push( + `${TaxonomicFilterGroupType.GroupNamesPrefix}_${filters.aggregation_group_type_index}` as unknown as TaxonomicFilterGroupType + ) } else { - baseGroupTypes.push(TaxonomicFilterGroupType.PersonProperties) - baseGroupTypes.push(TaxonomicFilterGroupType.Cohorts) - - if (newFlagOperatorsEnabled) { - additionalGroupTypes.push(TaxonomicFilterGroupType.Metadata) - } + targetGroupTypes.push(TaxonomicFilterGroupType.PersonProperties) + targetGroupTypes.push(TaxonomicFilterGroupType.Cohorts) + targetGroupTypes.push(TaxonomicFilterGroupType.Metadata) } - return [...baseGroupTypes, ...additionalGroupTypes] + return targetGroupTypes }, ], aggregationTargetName: [ diff --git a/frontend/src/scenes/feature-flags/featureFlagLogic.ts b/frontend/src/scenes/feature-flags/featureFlagLogic.ts index 4ddc7bf91d51e..dfd79480a6d36 100644 --- a/frontend/src/scenes/feature-flags/featureFlagLogic.ts +++ b/frontend/src/scenes/feature-flags/featureFlagLogic.ts @@ -112,7 +112,7 @@ const EMPTY_MULTIVARIATE_OPTIONS: MultivariateFlagOptions = { /** Check whether a string is a valid feature flag key. If not, a reason string is returned - otherwise undefined. */ export function validateFeatureFlagKey(key: string): string | undefined { return !key - ? 'You need to set a key' + ? 'Please set a key' : !key.match?.(/^([A-z]|[a-z]|[0-9]|-|_)+$/) ? 'Only letters, numbers, hyphens (-) & underscores (_) are allowed.' : undefined diff --git a/frontend/src/scenes/insights/EditorFilters/PathsEventTypes.tsx b/frontend/src/scenes/insights/EditorFilters/PathsEventTypes.tsx index 71fe12ce8d68d..48bbd016595ee 100644 --- a/frontend/src/scenes/insights/EditorFilters/PathsEventTypes.tsx +++ b/frontend/src/scenes/insights/EditorFilters/PathsEventTypes.tsx @@ -60,7 +60,7 @@ export function PathsEventsTypes({ insightProps }: EditorFilterProps): JSX.Eleme fullWidth type="secondary" dropdown={{ - sameWidth: true, + matchWidth: true, closeOnClickInside: false, overlay: options.map((option) => ( setVisible(false)} diff --git a/frontend/src/scenes/notebooks/NotebookSelectButton/NotebookSelectButton.tsx b/frontend/src/scenes/notebooks/NotebookSelectButton/NotebookSelectButton.tsx index 50358e1f18537..aab8e816e5db5 100644 --- a/frontend/src/scenes/notebooks/NotebookSelectButton/NotebookSelectButton.tsx +++ b/frontend/src/scenes/notebooks/NotebookSelectButton/NotebookSelectButton.tsx @@ -212,7 +212,7 @@ export function NotebookSelectPopover({
    } - sameWidth={false} + matchWidth={false} actionable visible={!!showPopover} onVisibilityChange={(visible) => setShowPopover(visible)} diff --git a/frontend/src/scenes/session-recordings/errors/SessionRecordingErrors.tsx b/frontend/src/scenes/session-recordings/errors/SessionRecordingErrors.tsx index 8b73fbcc1f924..cb6ef67cc5b3d 100644 --- a/frontend/src/scenes/session-recordings/errors/SessionRecordingErrors.tsx +++ b/frontend/src/scenes/session-recordings/errors/SessionRecordingErrors.tsx @@ -1,5 +1,6 @@ import { IconFeatures } from '@posthog/icons' -import { LemonButton, LemonTable, LemonTabs, Spinner } from '@posthog/lemon-ui' +import { LemonButton, LemonTable, LemonTabs } from '@posthog/lemon-ui' +import { captureException } from '@sentry/react' import { useActions, useValues } from 'kea' import { JSONViewer } from 'lib/components/JSONViewer' import { Sparkline } from 'lib/lemon-ui/Sparkline' @@ -17,11 +18,7 @@ export function SessionRecordingErrors(): JSX.Element { const { errors, errorsLoading } = useValues(sessionRecordingErrorsLogic) const { loadErrorClusters, createPlaylist } = useActions(sessionRecordingErrorsLogic) - if (errorsLoading) { - return - } - - if (!errors) { + if (!errors && !errorsLoading) { return ( } onClick={() => loadErrorClusters()}> Automagically find errors @@ -110,7 +107,8 @@ export function SessionRecordingErrors(): JSX.Element { }, }, ]} - dataSource={errors} + loading={errorsLoading} + dataSource={errors || []} expandable={{ expandedRowRender: (cluster) => , }} @@ -165,5 +163,20 @@ function parseTitle(error: string): string { input = error } - return input.split('\n')[0].trim().substring(0, MAX_TITLE_LENGTH) + if (!input) { + return error + } + + try { + // TRICKY - after json parsing we might not have a string, + // since the JSON parser will helpfully convert to other types too e.g. have seen objects here + if (typeof input !== 'string') { + input = JSON.stringify(input) + } + + return input.split('\n')[0].trim().substring(0, MAX_TITLE_LENGTH) || error + } catch (e) { + captureException(e, { extra: { error }, tags: { feature: 'replay/error-clustering' } }) + return error + } } diff --git a/frontend/src/scenes/session-recordings/file-playback/sessionRecordingFilePlaybackLogic.ts b/frontend/src/scenes/session-recordings/file-playback/sessionRecordingFilePlaybackLogic.ts index fb01f15b9a0cc..b6f547603114b 100644 --- a/frontend/src/scenes/session-recordings/file-playback/sessionRecordingFilePlaybackLogic.ts +++ b/frontend/src/scenes/session-recordings/file-playback/sessionRecordingFilePlaybackLogic.ts @@ -3,7 +3,6 @@ import { eventWithTime } from '@rrweb/types' import { BuiltLogic, connect, kea, listeners, path, reducers, selectors } from 'kea' import { loaders } from 'kea-loaders' import { beforeUnload } from 'kea-router' -import { FEATURE_FLAGS } from 'lib/constants' import { dayjs } from 'lib/dayjs' import { featureFlagLogic } from 'lib/logic/featureFlagLogic' import { uuid } from 'lib/utils' @@ -178,11 +177,7 @@ export const sessionRecordingFilePlaybackLogic = kea diff --git a/frontend/src/scenes/session-recordings/player/sessionRecordingDataLogic.ts b/frontend/src/scenes/session-recordings/player/sessionRecordingDataLogic.ts index 8d630aa10acbb..0aff48444a7e9 100644 --- a/frontend/src/scenes/session-recordings/player/sessionRecordingDataLogic.ts +++ b/frontend/src/scenes/session-recordings/player/sessionRecordingDataLogic.ts @@ -67,9 +67,10 @@ function isRecordingSnapshot(x: unknown): x is RecordingSnapshot { export const parseEncodedSnapshots = async ( items: (RecordingSnapshot | EncodedRecordingSnapshot | string)[], sessionId: string, - withMobileTransformer: boolean + // this is only kept so that we can export the untransformed data for debugging + withMobileTransformer: boolean = true ): Promise => { - if (!postHogEEModule && withMobileTransformer) { + if (!postHogEEModule) { postHogEEModule = await posthogEE() } const lineCount = items.length @@ -239,11 +240,7 @@ async function processEncodedResponse( let untransformed: RecordingSnapshot[] | null = null const transformed = deduplicateSnapshots( - await parseEncodedSnapshots( - encodedResponse, - props.sessionRecordingId, - !!featureFlags[FEATURE_FLAGS.SESSION_REPLAY_MOBILE] - ), + await parseEncodedSnapshots(encodedResponse, props.sessionRecordingId), existingData?.snapshots ?? [] ) diff --git a/frontend/src/scenes/session-recordings/playlist/SessionRecordingsPlaylist.tsx b/frontend/src/scenes/session-recordings/playlist/SessionRecordingsPlaylist.tsx index 22dd5881ae378..3c2a9842c0dbc 100644 --- a/frontend/src/scenes/session-recordings/playlist/SessionRecordingsPlaylist.tsx +++ b/frontend/src/scenes/session-recordings/playlist/SessionRecordingsPlaylist.tsx @@ -6,7 +6,6 @@ import clsx from 'clsx' import { range } from 'd3' import { BindLogic, useActions, useValues } from 'kea' import { EmptyMessage } from 'lib/components/EmptyMessage/EmptyMessage' -import { FlaggedFeature } from 'lib/components/FlaggedFeature' import { PropertyKeyInfo } from 'lib/components/PropertyKeyInfo' import { FEATURE_FLAGS } from 'lib/constants' import { useResizeBreakpoints } from 'lib/hooks/useResizeObserver' @@ -58,12 +57,10 @@ function UnusableEventsWarning(props: { unusableEventsInFilter: string[] }): JSX the Web SDK - - ,{' '} - - the Android SDK - - + ,{' '} + + the Android SDK +

    ) diff --git a/frontend/src/scenes/session-recordings/playlist/SessionRecordingsPlaylistTroubleshooting.tsx b/frontend/src/scenes/session-recordings/playlist/SessionRecordingsPlaylistTroubleshooting.tsx index bda13153d9ccb..961b0b54fa246 100644 --- a/frontend/src/scenes/session-recordings/playlist/SessionRecordingsPlaylistTroubleshooting.tsx +++ b/frontend/src/scenes/session-recordings/playlist/SessionRecordingsPlaylistTroubleshooting.tsx @@ -1,7 +1,5 @@ import { LemonDivider, Link } from '@posthog/lemon-ui' import { useActions, useValues } from 'kea' -import { FlaggedFeature } from 'lib/components/FlaggedFeature' -import { FEATURE_FLAGS } from 'lib/constants' import { playerSettingsLogic } from '../player/playerSettingsLogic' import { sessionRecordingsPlaylistLogic } from './sessionRecordingsPlaylistLogic' @@ -21,9 +19,7 @@ export const SessionRecordingsPlaylistTroubleshooting = (): JSX.Element => {

      - -
      All recording sources:
      -
      +
      All recording sources:
      {otherRecordings.length > 0 && hideViewedRecordings && (
    • Viewed recordings hidden.{' '} @@ -42,10 +38,8 @@ export const SessionRecordingsPlaylistTroubleshooting = (): JSX.Element => { They are outside the retention period
    • - - -
      Web recordings
      -
      + +
      Web recordings
    • = { export const permissionsLogic = kea([ path(['scenes', 'organization', 'Settings', 'Permissions', 'permissionsLogic']), connect({ - values: [featureFlagLogic, ['featureFlags'], rolesLogic, ['roles']], + values: [rolesLogic, ['roles']], actions: [rolesLogic, ['updateRole']], }), actions({ @@ -123,10 +121,6 @@ export const permissionsLogic = kea([ ) }, ], - shouldShowPermissionsTable: [ - (s) => [s.featureFlags], - (featureFlags) => featureFlags[FEATURE_FLAGS.ROLE_BASED_ACCESS] === 'control', - ], resourceRolesAccess: [ (s) => [s.allPermissions, s.roles], (permissions, roles) => { diff --git a/frontend/src/scenes/surveys/SurveyView.tsx b/frontend/src/scenes/surveys/SurveyView.tsx index f36568410fe1a..95c2ca6df47cf 100644 --- a/frontend/src/scenes/surveys/SurveyView.tsx +++ b/frontend/src/scenes/surveys/SurveyView.tsx @@ -6,12 +6,10 @@ import { LemonButton, LemonDivider, Link } from '@posthog/lemon-ui' import { useActions, useValues } from 'kea' import { EditableField } from 'lib/components/EditableField/EditableField' import { PageHeader } from 'lib/components/PageHeader' -import { FEATURE_FLAGS } from 'lib/constants' import { dayjs } from 'lib/dayjs' import { More } from 'lib/lemon-ui/LemonButton/More' import { LemonSkeleton } from 'lib/lemon-ui/LemonSkeleton' import { LemonTabs } from 'lib/lemon-ui/LemonTabs' -import { featureFlagLogic } from 'lib/logic/featureFlagLogic' import { capitalizeFirstLetter, pluralize } from 'lib/utils' import { useEffect, useState } from 'react' import { urls } from 'scenes/urls' @@ -260,7 +258,6 @@ export function SurveyResult({ disableEventsTable }: { disableEventsTable?: bool surveyOpenTextResultsReady, surveyNPSScore, } = useValues(surveyLogic) - const { featureFlags } = useValues(featureFlagLogic) return ( <> @@ -274,10 +271,8 @@ export function SurveyResult({ disableEventsTable }: { disableEventsTable?: bool <>
      {surveyNPSScore}
      Total NPS Score
      - {featureFlags[FEATURE_FLAGS.SURVEYS_RESULTS_VISUALIZATIONS] && ( - // TODO: rework this to show nps scores over time - - )} + {/* TODO: rework this to show nps scores over time */} + )} ([ path(['scenes', 'surveys', 'surveysLogic']), connect(() => ({ - values: [ - userLogic, - ['hasAvailableFeature'], - teamLogic, - ['currentTeam', 'currentTeamLoading'], - featureFlagLogic, - ['featureFlags'], - ], + values: [userLogic, ['hasAvailableFeature'], teamLogic, ['currentTeam', 'currentTeamLoading']], actions: [teamLogic, ['loadCurrentTeam']], })), actions({ @@ -151,21 +142,17 @@ export const surveysLogic = kea([ }, ], ], - payGateFlagOn: [(s) => [s.featureFlags], (featureFlags) => featureFlags[FEATURE_FLAGS.SURVEYS_PAYGATES]], surveysStylingAvailable: [ - (s) => [s.hasAvailableFeature, s.payGateFlagOn], - (hasAvailableFeature, payGateFlagOn) => - !payGateFlagOn || (payGateFlagOn && hasAvailableFeature(AvailableFeature.SURVEYS_STYLING)), + (s) => [s.hasAvailableFeature], + (hasAvailableFeature) => hasAvailableFeature(AvailableFeature.SURVEYS_STYLING), ], surveysHTMLAvailable: [ - (s) => [s.hasAvailableFeature, s.payGateFlagOn], - (hasAvailableFeature, payGateFlagOn) => - !payGateFlagOn || (payGateFlagOn && hasAvailableFeature(AvailableFeature.SURVEYS_TEXT_HTML)), + (s) => [s.hasAvailableFeature], + (hasAvailableFeature) => hasAvailableFeature(AvailableFeature.SURVEYS_TEXT_HTML), ], surveysMultipleQuestionsAvailable: [ - (s) => [s.hasAvailableFeature, s.payGateFlagOn], - (hasAvailableFeature, payGateFlagOn) => - !payGateFlagOn || (payGateFlagOn && hasAvailableFeature(AvailableFeature.SURVEYS_MULTIPLE_QUESTIONS)), + (s) => [s.hasAvailableFeature], + (hasAvailableFeature) => hasAvailableFeature(AvailableFeature.SURVEYS_MULTIPLE_QUESTIONS), ], showSurveysDisabledBanner: [ (s) => [s.currentTeam, s.currentTeamLoading, s.surveys], diff --git a/frontend/src/toolbar/elements/heatmapLogic.ts b/frontend/src/toolbar/elements/heatmapLogic.ts index 8bb148b4f6adc..de7232bd70dfd 100644 --- a/frontend/src/toolbar/elements/heatmapLogic.ts +++ b/frontend/src/toolbar/elements/heatmapLogic.ts @@ -24,7 +24,7 @@ const emptyElementsStatsPages: PaginatedResponse = { export const heatmapLogic = kea([ path(['toolbar', 'elements', 'heatmapLogic']), connect({ - values: [toolbarConfigLogic, ['apiURL'], currentPageLogic, ['href', 'wildcardHref']], + values: [currentPageLogic, ['href', 'wildcardHref']], actions: [currentPageLogic, ['setHref', 'setWildcardHref']], }), actions({ @@ -113,7 +113,7 @@ export const heatmapLogic = kea([ ...values.heatmapFilter, } const includeEventsParams = '&include=$autocapture&include=$rageclick' - defaultUrl = `${values.apiURL}/api/element/stats/${encodeParams( + defaultUrl = `/api/element/stats/${encodeParams( { ...params, paginate_response: true }, '?' )}${includeEventsParams}` @@ -124,7 +124,7 @@ export const heatmapLogic = kea([ url || defaultUrl, 'GET', undefined, - url ? 'use-as-provided' : 'only-add-token' + url ? 'use-as-provided' : 'full' ) if (response.status === 403) { diff --git a/frontend/src/toolbar/toolbarConfigLogic.ts b/frontend/src/toolbar/toolbarConfigLogic.ts index 853b03bdeea32..c0650d7552d55 100644 --- a/frontend/src/toolbar/toolbarConfigLogic.ts +++ b/frontend/src/toolbar/toolbarConfigLogic.ts @@ -6,7 +6,7 @@ import { posthog } from '~/toolbar/posthog' import { ToolbarProps } from '~/types' import type { toolbarConfigLogicType } from './toolbarConfigLogicType' -import { clearSessionToolbarToken } from './utils' +import { LOCALSTORAGE_KEY } from './utils' export const toolbarConfigLogic = kea([ path(['toolbar', 'toolbarConfigLogic']), @@ -19,48 +19,68 @@ export const toolbarConfigLogic = kea([ clearUserIntent: true, showButton: true, hideButton: true, + persistConfig: true, }), reducers(({ props }) => ({ - rawApiURL: [props.apiURL as string], - rawJsURL: [(props.jsURL || props.apiURL) as string], - temporaryToken: [props.temporaryToken || null, { logout: () => null, tokenExpired: () => null }], + // TRICKY: We cache a copy of the props. This allows us to connect the logic without passing the props in - only the top level caller has to do this. + props: [props], + temporaryToken: [ + props.temporaryToken || null, + { logout: () => null, tokenExpired: () => null, authenticate: () => null }, + ], actionId: [props.actionId || null, { logout: () => null, clearUserIntent: () => null }], userIntent: [props.userIntent || null, { logout: () => null, clearUserIntent: () => null }], - source: [props.source || null, { logout: () => null }], buttonVisible: [true, { showButton: () => true, hideButton: () => false, logout: () => false }], - dataAttributes: [props.dataAttributes || []], - posthog: [props.posthog ?? null], })), selectors({ - apiURL: [(s) => [s.rawApiURL], (apiURL) => `${apiURL.endsWith('/') ? apiURL.replace(/\/+$/, '') : apiURL}`], + posthog: [(s) => [s.props], (props) => props.posthog ?? null], + apiURL: [ + (s) => [s.props], + (props: ToolbarProps) => `${props.apiURL?.endsWith('/') ? props.apiURL.replace(/\/+$/, '') : props.apiURL}`, + ], jsURL: [ - (s) => [s.rawJsURL, s.apiURL], - (rawJsURL, apiUrl) => - `${rawJsURL ? (rawJsURL.endsWith('/') ? rawJsURL.replace(/\/+$/, '') : rawJsURL) : apiUrl}`, + (s) => [s.props, s.apiURL], + (props: ToolbarProps, apiUrl) => + `${props.jsURL ? (props.jsURL.endsWith('/') ? props.jsURL.replace(/\/+$/, '') : props.jsURL) : apiUrl}`, ], + dataAttributes: [(s) => [s.props], (props): string[] => props.dataAttributes ?? []], isAuthenticated: [(s) => [s.temporaryToken], (temporaryToken) => !!temporaryToken], }), - listeners(({ values }) => ({ + listeners(({ values, actions }) => ({ authenticate: () => { posthog.capture('toolbar authenticate', { is_authenticated: values.isAuthenticated }) const encodedUrl = encodeURIComponent(window.location.href) + actions.persistConfig() window.location.href = `${values.apiURL}/authorize_and_redirect/?redirect=${encodedUrl}` - clearSessionToolbarToken() }, logout: () => { posthog.capture('toolbar logout') - clearSessionToolbarToken() + localStorage.removeItem(LOCALSTORAGE_KEY) }, tokenExpired: () => { posthog.capture('toolbar token expired') console.warn('PostHog Toolbar API token expired. Clearing session.') - if (values.source !== 'localstorage') { + if (values.props.source !== 'localstorage') { lemonToast.error('PostHog Toolbar API token expired.') } - clearSessionToolbarToken() + actions.persistConfig() + }, + + persistConfig: () => { + // Most params we don't change, only those that we may have modified during the session + const toolbarParams: ToolbarProps = { + ...values.props, + temporaryToken: values.temporaryToken ?? undefined, + actionId: values.actionId ?? undefined, + userIntent: values.userIntent ?? undefined, + posthog: undefined, + featureFlags: undefined, + } + + localStorage.setItem(LOCALSTORAGE_KEY, JSON.stringify(toolbarParams)) }, })), @@ -83,12 +103,10 @@ export async function toolbarFetch( /* allows caller to control how the provided URL is altered before use if "full" then the payload and URL are taken apart and reconstructed - if "only-add-token" the URL is unchanged, the payload is not used - but the temporary token is added to the URL if "use-as-provided" then the URL is used as-is, and the payload is not used this is because the heatmapLogic needs more control over how the query parameters are constructed */ - urlConstruction: 'full' | 'only-add-token' | 'use-as-provided' = 'full' + urlConstruction: 'full' | 'use-as-provided' = 'full' ): Promise { const temporaryToken = toolbarConfigLogic.findMounted()?.values.temporaryToken const apiURL = toolbarConfigLogic.findMounted()?.values.apiURL @@ -96,8 +114,6 @@ export async function toolbarFetch( let fullUrl: string if (urlConstruction === 'use-as-provided') { fullUrl = url - } else if (urlConstruction === 'only-add-token') { - fullUrl = `${url}&temporary_token=${temporaryToken}` } else { const { pathname, searchParams } = combineUrl(url) const params = { ...searchParams, temporary_token: temporaryToken } diff --git a/frontend/src/toolbar/utils.ts b/frontend/src/toolbar/utils.ts index 20441d2d385a9..9e2e67b56231f 100644 --- a/frontend/src/toolbar/utils.ts +++ b/frontend/src/toolbar/utils.ts @@ -7,6 +7,7 @@ import { ActionStepForm, BoxColor, ElementRect } from '~/toolbar/types' import { ActionStepType, StringMatching } from '~/types' export const TOOLBAR_ID = '__POSTHOG_TOOLBAR__' +export const LOCALSTORAGE_KEY = '_postHogToolbarParams' export function getSafeText(el: HTMLElement): string { if (!el.childNodes || !el.childNodes.length) { @@ -328,14 +329,6 @@ export function stepToDatabaseFormat(step: ActionStepForm): ActionStepType { } } -export function clearSessionToolbarToken(): void { - window.sessionStorage?.removeItem('_postHogToolbarParams') - window.localStorage?.removeItem('_postHogToolbarParams') - // keeping these around for compatibility, should be eventually removed - window.sessionStorage?.removeItem('_postHogEditorParams') - window.localStorage?.removeItem('_postHogEditorParams') -} - export function getRectForElement(element: HTMLElement): ElementRect { const elements = [elementToAreaRect(element)] diff --git a/frontend/src/types.ts b/frontend/src/types.ts index 7a0cadb86ae7b..49a926823dd81 100644 --- a/frontend/src/types.ts +++ b/frontend/src/types.ts @@ -3046,7 +3046,7 @@ interface BreadcrumbBase { /** Symbol, e.g. a lettermark or a profile picture. */ symbol?: React.ReactNode /** Whether to show a custom popover */ - popover?: Pick + popover?: Pick } interface LinkBreadcrumb extends BreadcrumbBase { /** Path to link to. */ @@ -3667,6 +3667,7 @@ export type BatchExportDestinationS3 = { encryption: string | null kms_key_id: string | null endpoint_url: string | null + file_format: string } } diff --git a/mypy-baseline.txt b/mypy-baseline.txt index 26acbf5ab8d36..7982187a098d5 100644 --- a/mypy-baseline.txt +++ b/mypy-baseline.txt @@ -85,6 +85,7 @@ posthog/hogql/parser.py:0: error: "None" has no attribute "text" [attr-defined] posthog/hogql/parser.py:0: error: "None" has no attribute "text" [attr-defined] posthog/hogql/parser.py:0: error: Statement is unreachable [unreachable] posthog/hogql/database/schema/person_distinct_ids.py:0: error: Argument 1 to "select_from_person_distinct_ids_table" has incompatible type "dict[str, list[str]]"; expected "dict[str, list[str | int]]" [arg-type] +posthog/hogql/database/schema/person_distinct_id_overrides.py:0: error: Argument 1 to "select_from_person_distinct_id_overrides_table" has incompatible type "dict[str, list[str]]"; expected "dict[str, list[str | int]]" [arg-type] posthog/hogql/database/schema/cohort_people.py:0: error: Argument "chain" to "Field" has incompatible type "list[str]"; expected "list[str | int]" [arg-type] posthog/hogql/database/schema/cohort_people.py:0: note: "List" is invariant -- see https://mypy.readthedocs.io/en/stable/common_issues.html#variance posthog/hogql/database/schema/cohort_people.py:0: note: Consider using "Sequence" instead, which is covariant @@ -105,6 +106,7 @@ posthog/hogql/database/database.py:0: error: "FieldOrTable" has no attribute "fi posthog/hogql/database/database.py:0: error: "FieldOrTable" has no attribute "fields" [attr-defined] posthog/hogql/database/database.py:0: error: "FieldOrTable" has no attribute "fields" [attr-defined] posthog/hogql/database/database.py:0: error: "FieldOrTable" has no attribute "fields" [attr-defined] +posthog/hogql/database/database.py:0: error: "FieldOrTable" has no attribute "fields" [attr-defined] posthog/hogql/database/database.py:0: error: Incompatible types (expression has type "Literal['view', 'lazy_table']", TypedDict item "type" has type "Literal['integer', 'float', 'string', 'datetime', 'date', 'boolean', 'array', 'json', 'lazy_table', 'virtual_table', 'field_traverser', 'expression']") [typeddict-item] posthog/warehouse/models/datawarehouse_saved_query.py:0: error: Argument 1 to "create_hogql_database" has incompatible type "int | None"; expected "int" [arg-type] posthog/warehouse/models/datawarehouse_saved_query.py:0: error: Incompatible types in assignment (expression has type "Expr", variable has type "SelectQuery | SelectUnionQuery") [assignment] diff --git a/package.json b/package.json index 2128fa3207a76..ca948c3607d16 100644 --- a/package.json +++ b/package.json @@ -145,7 +145,7 @@ "pmtiles": "^2.11.0", "postcss": "^8.4.31", "postcss-preset-env": "^9.3.0", - "posthog-js": "1.116.4", + "posthog-js": "1.116.6", "posthog-js-lite": "2.5.0", "prettier": "^2.8.8", "prop-types": "^15.7.2", diff --git a/plugin-server/functional_tests/api.ts b/plugin-server/functional_tests/api.ts index abbd770d7bb77..c6ff46bf5bf6d 100644 --- a/plugin-server/functional_tests/api.ts +++ b/plugin-server/functional_tests/api.ts @@ -106,6 +106,7 @@ export const capture = async ({ }) ), key: teamId ? teamId.toString() : '', + waitForAck: true, }) } diff --git a/plugin-server/functional_tests/jobs-consumer.test.ts b/plugin-server/functional_tests/jobs-consumer.test.ts index 30e2abd9af282..353bd3518397e 100644 --- a/plugin-server/functional_tests/jobs-consumer.test.ts +++ b/plugin-server/functional_tests/jobs-consumer.test.ts @@ -43,7 +43,7 @@ describe('dlq handling', () => { test.concurrent(`handles empty messages`, async () => { const key = uuidv4() - await produce({ topic: 'jobs', message: null, key }) + await produce({ topic: 'jobs', message: null, key, waitForAck: true }) await waitForExpect(() => { const messages = dlq.filter((message) => message.key?.toString() === key) @@ -54,7 +54,7 @@ describe('dlq handling', () => { test.concurrent(`handles invalid JSON`, async () => { const key = uuidv4() - await produce({ topic: 'jobs', message: Buffer.from('invalid json'), key }) + await produce({ topic: 'jobs', message: Buffer.from('invalid json'), key, waitForAck: true }) await waitForExpect(() => { const messages = dlq.filter((message) => message.key?.toString() === key) @@ -72,7 +72,7 @@ describe('dlq handling', () => { labels: { topic: 'jobs', partition: '0', groupId: 'jobs-inserter' }, }) - await produce({ topic: 'jobs', message: Buffer.from(''), key: '' }) + await produce({ topic: 'jobs', message: Buffer.from(''), key: '', waitForAck: true }) await waitForExpect(async () => { const metricAfter = await getMetric({ diff --git a/plugin-server/functional_tests/kafka.ts b/plugin-server/functional_tests/kafka.ts index c2ab7ac87a6ab..f431488b290ac 100644 --- a/plugin-server/functional_tests/kafka.ts +++ b/plugin-server/functional_tests/kafka.ts @@ -36,7 +36,17 @@ export async function createKafkaProducer() { return producer } -export async function produce({ topic, message, key }: { topic: string; message: Buffer | null; key: string }) { +export async function produce({ + topic, + message, + key, + waitForAck, +}: { + topic: string + message: Buffer | null + key: string + waitForAck: boolean +}) { producer = producer ?? (await createKafkaProducer()) - await defaultProduce({ producer, topic, value: message, key: Buffer.from(key) }) + await defaultProduce({ producer, topic, value: message, key: Buffer.from(key), waitForAck }) } diff --git a/plugin-server/functional_tests/scheduled-tasks-runner.test.ts b/plugin-server/functional_tests/scheduled-tasks-runner.test.ts index 3e3345245a644..48764ae7f90a7 100644 --- a/plugin-server/functional_tests/scheduled-tasks-runner.test.ts +++ b/plugin-server/functional_tests/scheduled-tasks-runner.test.ts @@ -43,7 +43,7 @@ describe('dlq handling', () => { test.concurrent(`handles empty messages`, async () => { const key = uuidv4() - await produce({ topic: 'scheduled_tasks', message: null, key }) + await produce({ topic: 'scheduled_tasks', message: null, key, waitForAck: true }) await waitForExpect(() => { const messages = dlq.filter((message) => message.key?.toString() === key) @@ -54,7 +54,7 @@ describe('dlq handling', () => { test.concurrent(`handles invalid JSON`, async () => { const key = uuidv4() - await produce({ topic: 'scheduled_tasks', message: Buffer.from('invalid json'), key }) + await produce({ topic: 'scheduled_tasks', message: Buffer.from('invalid json'), key, waitForAck: true }) await waitForExpect(() => { const messages = dlq.filter((message) => message.key?.toString() === key) @@ -69,6 +69,7 @@ describe('dlq handling', () => { topic: 'scheduled_tasks', message: Buffer.from(JSON.stringify({ taskType: 'invalidTaskType', pluginConfigId: 1 })), key, + waitForAck: true, }) await waitForExpect(() => { @@ -84,6 +85,7 @@ describe('dlq handling', () => { topic: 'scheduled_tasks', message: Buffer.from(JSON.stringify({ taskType: 'runEveryMinute', pluginConfigId: 'asdf' })), key, + waitForAck: true, }) await waitForExpect(() => { @@ -104,7 +106,7 @@ describe('dlq handling', () => { // NOTE: we don't actually care too much about the contents of the // message, just that it triggeres the consumer to try to process it. - await produce({ topic: 'scheduled_tasks', message: Buffer.from(''), key: '' }) + await produce({ topic: 'scheduled_tasks', message: Buffer.from(''), key: '', waitForAck: true }) await waitForExpect(async () => { const metricAfter = await getMetric({ diff --git a/plugin-server/functional_tests/session-recordings.test.ts b/plugin-server/functional_tests/session-recordings.test.ts index 62075bc6bd10f..783fbdbeb43cd 100644 --- a/plugin-server/functional_tests/session-recordings.test.ts +++ b/plugin-server/functional_tests/session-recordings.test.ts @@ -173,7 +173,12 @@ test.skip('consumer updates timestamp exported to prometheus', async () => { }, }) - await produce({ topic: KAFKA_SESSION_RECORDING_SNAPSHOT_ITEM_EVENTS, message: Buffer.from(''), key: '' }) + await produce({ + topic: KAFKA_SESSION_RECORDING_SNAPSHOT_ITEM_EVENTS, + message: Buffer.from(''), + key: '', + waitForAck: true, + }) await waitForExpect(async () => { const metricAfter = await getMetric({ @@ -245,6 +250,7 @@ test.skip(`handles message with no token or with token and no associated team_id topic: KAFKA_SESSION_RECORDING_SNAPSHOT_ITEM_EVENTS, message: Buffer.from(JSON.stringify({ uuid: noTokenUuid, data: JSON.stringify({}) })), key: noTokenKey, + waitForAck: true, }) await produce({ topic: KAFKA_SESSION_RECORDING_SNAPSHOT_ITEM_EVENTS, @@ -252,6 +258,7 @@ test.skip(`handles message with no token or with token and no associated team_id JSON.stringify({ uuid: noAssociatedTeamUuid, token: 'no associated team', data: JSON.stringify({}) }) ), key: noAssociatedTeamKey, + waitForAck: true, }) await capture(makeSessionMessage(teamId, 'should be ingested')) diff --git a/plugin-server/src/config/config.ts b/plugin-server/src/config/config.ts index dcaebe4c1097a..def72eea474bb 100644 --- a/plugin-server/src/config/config.ts +++ b/plugin-server/src/config/config.ts @@ -164,8 +164,8 @@ export function getDefaultConfig(): PluginsServerConfig { SESSION_RECORDING_KAFKA_DEBUG: undefined, SESSION_RECORDING_MAX_PARALLEL_FLUSHES: 10, SESSION_RECORDING_OVERFLOW_ENABLED: false, - SESSION_RECORDING_OVERFLOW_BUCKET_REPLENISH_RATE: 2_000_000, // 2MB/second uncompressed, sustained - SESSION_RECORDING_OVERFLOW_BUCKET_CAPACITY: 100_000_000, // 100MB burst + SESSION_RECORDING_OVERFLOW_BUCKET_REPLENISH_RATE: 5_000_000, // 5MB/second uncompressed, sustained + SESSION_RECORDING_OVERFLOW_BUCKET_CAPACITY: 200_000_000, // 200MB burst } } diff --git a/plugin-server/src/kafka/producer.ts b/plugin-server/src/kafka/producer.ts index 7029a26c79fbd..062785f902bc4 100644 --- a/plugin-server/src/kafka/producer.ts +++ b/plugin-server/src/kafka/producer.ts @@ -7,6 +7,7 @@ import { NumberNullUndefined, ProducerGlobalConfig, } from 'node-rdkafka' +import { Summary } from 'prom-client' import { getSpan } from '../sentry' import { status } from '../utils/status' @@ -17,6 +18,13 @@ export type KafkaProducerConfig = { KAFKA_PRODUCER_QUEUE_BUFFERING_MAX_MESSAGES: number } +export const ingestEventKafkaProduceLatency = new Summary({ + name: 'ingest_event_kafka_produce_latency', + help: 'Wait time for individual Kafka produces', + labelNames: ['topic', 'waitForAck'], + percentiles: [0.5, 0.9, 0.95, 0.99], +}) + // Kafka production related functions using node-rdkafka. export const createKafkaProducer = async (globalConfig: ProducerGlobalConfig, producerConfig: KafkaProducerConfig) => { const producer = new RdKafkaProducer({ @@ -71,18 +79,22 @@ export const produce = async ({ value, key, headers = [], - waitForAck = true, + waitForAck, }: { producer: RdKafkaProducer topic: string value: MessageValue key: MessageKey headers?: MessageHeader[] - waitForAck?: boolean + waitForAck: boolean }): Promise => { status.debug('📤', 'Producing message', { topic: topic }) const produceSpan = getSpan()?.startChild({ op: 'kafka_produce' }) return await new Promise((resolve, reject) => { + const produceTimer = ingestEventKafkaProduceLatency + .labels({ topic, waitForAck: waitForAck.toString() }) + .startTimer() + if (waitForAck) { producer.produce( topic, @@ -100,6 +112,7 @@ export const produce = async ({ resolve(offset) } + produceTimer() produceSpan?.finish() } ) @@ -112,6 +125,7 @@ export const produce = async ({ produceSpan?.finish() }) resolve(undefined) + produceTimer() } }) } diff --git a/plugin-server/src/main/graphile-worker/schedule.ts b/plugin-server/src/main/graphile-worker/schedule.ts index d50c672cea428..16435d02c0466 100644 --- a/plugin-server/src/main/graphile-worker/schedule.ts +++ b/plugin-server/src/main/graphile-worker/schedule.ts @@ -56,8 +56,11 @@ export async function runScheduledTasks( for (const pluginConfigId of server.pluginSchedule?.[taskType] || []) { status.info('⏲️', 'queueing_schedule_task', { taskType, pluginConfigId }) await server.kafkaProducer.queueMessage({ - topic: KAFKA_SCHEDULED_TASKS, - messages: [{ key: pluginConfigId.toString(), value: JSON.stringify({ taskType, pluginConfigId }) }], + kafkaMessage: { + topic: KAFKA_SCHEDULED_TASKS, + messages: [{ key: pluginConfigId.toString(), value: JSON.stringify({ taskType, pluginConfigId }) }], + }, + waitForAck: true, }) graphileScheduledTaskCounter.labels({ status: 'queued', task: taskType }).inc() } diff --git a/plugin-server/src/main/ingestion-queues/batch-processing/each-batch-ingestion.ts b/plugin-server/src/main/ingestion-queues/batch-processing/each-batch-ingestion.ts index 749e41c18c335..588c2c92beb86 100644 --- a/plugin-server/src/main/ingestion-queues/batch-processing/each-batch-ingestion.ts +++ b/plugin-server/src/main/ingestion-queues/batch-processing/each-batch-ingestion.ts @@ -15,6 +15,7 @@ import { eventDroppedCounter, latestOffsetTimestampGauge } from '../metrics' import { ingestEventBatchingBatchCountSummary, ingestEventBatchingInputLengthSummary, + ingestEventEachBatchKafkaAckWait, ingestionOverflowingMessagesTotal, ingestionParallelism, ingestionParallelismPotential, @@ -41,7 +42,7 @@ type IngestionSplitBatch = { type IngestResult = { // Promises that the batch handler should await on before committing offsets, // contains the Kafka producer ACKs, to avoid blocking after every message. - promises?: Array> + ackPromises?: Array> } async function handleProcessingError( @@ -166,7 +167,7 @@ export async function eachBatchParallelIngestion( return await runner.runEventPipeline(pluginEvent) })) as IngestResult - result.promises?.forEach((promise) => + result.ackPromises?.forEach((promise) => processingPromises.push( promise.catch(async (error) => { await handleProcessingError(error, message, pluginEvent, queue) @@ -227,7 +228,9 @@ export async function eachBatchParallelIngestion( // impact the success. Delaying ACKs allows the producer to write in big batches for // better throughput and lower broker load. const awaitSpan = transaction.startChild({ op: 'awaitACKs', data: { promiseCount: processingPromises.length } }) + const kafkaAckWaitMetric = ingestEventEachBatchKafkaAckWait.startTimer() await Promise.all(processingPromises) + kafkaAckWaitMetric() awaitSpan.finish() for (const message of messages) { diff --git a/plugin-server/src/main/ingestion-queues/batch-processing/metrics.ts b/plugin-server/src/main/ingestion-queues/batch-processing/metrics.ts index 42c1b06a27b5d..60563b6cabaaa 100644 --- a/plugin-server/src/main/ingestion-queues/batch-processing/metrics.ts +++ b/plugin-server/src/main/ingestion-queues/batch-processing/metrics.ts @@ -41,3 +41,9 @@ export const ingestEventBatchingBatchCountSummary = new Summary({ help: 'Number of batches of events', percentiles: [0.5, 0.9, 0.95, 0.99], }) + +export const ingestEventEachBatchKafkaAckWait = new Summary({ + name: 'ingest_event_each_batch_kafka_ack_wait', + help: 'Wait time for the batch of Kafka ACKs at the end of eachBatchParallelIngestion', + percentiles: [0.5, 0.9, 0.95, 0.99], +}) diff --git a/plugin-server/src/main/ingestion-queues/jobs-consumer.ts b/plugin-server/src/main/ingestion-queues/jobs-consumer.ts index 94549340da4fe..605a812068c51 100644 --- a/plugin-server/src/main/ingestion-queues/jobs-consumer.ts +++ b/plugin-server/src/main/ingestion-queues/jobs-consumer.ts @@ -54,8 +54,11 @@ export const startJobsConsumer = async ({ }) // TODO: handle resolving offsets asynchronously await producer.queueMessage({ - topic: KAFKA_JOBS_DLQ, - messages: [{ value: message.value, key: message.key }], + kafkaMessage: { + topic: KAFKA_JOBS_DLQ, + messages: [{ value: message.value, key: message.key }], + }, + waitForAck: true, }) resolveOffset(message.offset) continue @@ -71,8 +74,11 @@ export const startJobsConsumer = async ({ }) // TODO: handle resolving offsets asynchronously await producer.queueMessage({ - topic: KAFKA_JOBS_DLQ, - messages: [{ value: message.value, key: message.key }], + kafkaMessage: { + topic: KAFKA_JOBS_DLQ, + messages: [{ value: message.value, key: message.key }], + }, + waitForAck: true, }) resolveOffset(message.offset) continue diff --git a/plugin-server/src/main/ingestion-queues/scheduled-tasks-consumer.ts b/plugin-server/src/main/ingestion-queues/scheduled-tasks-consumer.ts index 3de544ce2d0a4..83ea62fdfdd6f 100644 --- a/plugin-server/src/main/ingestion-queues/scheduled-tasks-consumer.ts +++ b/plugin-server/src/main/ingestion-queues/scheduled-tasks-consumer.ts @@ -163,8 +163,11 @@ const getTasksFromBatch = async (batch: Batch, producer: KafkaProducerWrapper) = value: message.value, }) await producer.queueMessage({ - topic: KAFKA_SCHEDULED_TASKS_DLQ, - messages: [{ value: message.value, key: message.key }], + kafkaMessage: { + topic: KAFKA_SCHEDULED_TASKS_DLQ, + messages: [{ value: message.value, key: message.key }], + }, + waitForAck: true, }) continue } @@ -181,8 +184,11 @@ const getTasksFromBatch = async (batch: Batch, producer: KafkaProducerWrapper) = error: error.stack ?? error, }) await producer.queueMessage({ - topic: KAFKA_SCHEDULED_TASKS_DLQ, - messages: [{ value: message.value, key: message.key }], + kafkaMessage: { + topic: KAFKA_SCHEDULED_TASKS_DLQ, + messages: [{ value: message.value, key: message.key }], + }, + waitForAck: true, }) continue } @@ -190,8 +196,11 @@ const getTasksFromBatch = async (batch: Batch, producer: KafkaProducerWrapper) = if (!taskTypes.includes(task.taskType) || isNaN(task.pluginConfigId)) { status.warn('⚠️', `Invalid schema for partition ${batch.partition} offset ${message.offset}.`, task) await producer.queueMessage({ - topic: KAFKA_SCHEDULED_TASKS_DLQ, - messages: [{ value: message.value, key: message.key }], + kafkaMessage: { + topic: KAFKA_SCHEDULED_TASKS_DLQ, + messages: [{ value: message.value, key: message.key }], + }, + waitForAck: true, }) continue } diff --git a/plugin-server/src/main/ingestion-queues/session-recording/services/console-logs-ingester.ts b/plugin-server/src/main/ingestion-queues/session-recording/services/console-logs-ingester.ts index 5729da5cb373e..1c581451e44ec 100644 --- a/plugin-server/src/main/ingestion-queues/session-recording/services/console-logs-ingester.ts +++ b/plugin-server/src/main/ingestion-queues/session-recording/services/console-logs-ingester.ts @@ -163,6 +163,7 @@ export class ConsoleLogsIngester { topic: KAFKA_LOG_ENTRIES, value: Buffer.from(JSON.stringify(cle)), key: event.session_id, + waitForAck: true, }) ) } catch (error) { diff --git a/plugin-server/src/main/ingestion-queues/session-recording/services/replay-events-ingester.ts b/plugin-server/src/main/ingestion-queues/session-recording/services/replay-events-ingester.ts index 632f695a158f5..029f28f20bb9a 100644 --- a/plugin-server/src/main/ingestion-queues/session-recording/services/replay-events-ingester.ts +++ b/plugin-server/src/main/ingestion-queues/session-recording/services/replay-events-ingester.ts @@ -171,6 +171,7 @@ export class ReplayEventsIngester { topic: KAFKA_CLICKHOUSE_SESSION_REPLAY_EVENTS, value: Buffer.from(JSON.stringify(replayRecord)), key: event.session_id, + waitForAck: true, }), ] } catch (error) { diff --git a/plugin-server/src/utils/db/db.ts b/plugin-server/src/utils/db/db.ts index 2baa10671a91e..c7b6ce86a895a 100644 --- a/plugin-server/src/utils/db/db.ts +++ b/plugin-server/src/utils/db/db.ts @@ -707,7 +707,7 @@ export class DB { }) } - await this.kafkaProducer.queueMessages(kafkaMessages) + await this.kafkaProducer.queueMessages({ kafkaMessages, waitForAck: true }) return person } @@ -759,7 +759,7 @@ export class DB { if (tx) { kafkaMessages.push(message) } else { - await this.kafkaProducer.queueMessage(message) + await this.kafkaProducer.queueMessage({ kafkaMessage: message, waitForAck: true }) } status.debug( @@ -829,7 +829,7 @@ export class DB { public async addDistinctId(person: Person, distinctId: string): Promise { const kafkaMessages = await this.addDistinctIdPooled(person, distinctId) if (kafkaMessages.length) { - await this.kafkaProducer.queueMessages(kafkaMessages) + await this.kafkaProducer.queueMessages({ kafkaMessages, waitForAck: true }) } } @@ -1072,15 +1072,15 @@ export class DB { pluginLogEntryCounter.labels({ plugin_id: String(pluginConfig.plugin_id), source }).inc() try { - await this.kafkaProducer.queueSingleJsonMessage( - KAFKA_PLUGIN_LOG_ENTRIES, - parsedEntry.id, - parsedEntry, + await this.kafkaProducer.queueSingleJsonMessage({ + topic: KAFKA_PLUGIN_LOG_ENTRIES, + key: parsedEntry.id, + object: parsedEntry, // For logs, we relax our durability requirements a little and // do not wait for acks that Kafka has persisted the message to // disk. - false - ) + waitForAck: false, + }) } catch (e) { captureException(e, { tags: { team_id: entry.pluginConfig.team_id } }) console.error('Failed to produce message', e, parsedEntry) @@ -1409,19 +1409,22 @@ export class DB { version: number ): Promise { await this.kafkaProducer.queueMessage({ - topic: KAFKA_GROUPS, - messages: [ - { - value: JSON.stringify({ - group_type_index: groupTypeIndex, - group_key: groupKey, - team_id: teamId, - group_properties: JSON.stringify(properties), - created_at: castTimestampOrNow(createdAt, TimestampFormat.ClickHouseSecondPrecision), - version, - }), - }, - ], + kafkaMessage: { + topic: KAFKA_GROUPS, + messages: [ + { + value: JSON.stringify({ + group_type_index: groupTypeIndex, + group_key: groupKey, + team_id: teamId, + group_properties: JSON.stringify(properties), + created_at: castTimestampOrNow(createdAt, TimestampFormat.ClickHouseSecondPrecision), + version, + }), + }, + ], + }, + waitForAck: true, }) } diff --git a/plugin-server/src/utils/db/hub.ts b/plugin-server/src/utils/db/hub.ts index 0a50533a1dbdb..098a44e7d4aa6 100644 --- a/plugin-server/src/utils/db/hub.ts +++ b/plugin-server/src/utils/db/hub.ts @@ -159,13 +159,16 @@ export async function createHub( // chained, and if we do not manage to produce then the chain will be // broken. await kafkaProducer.queueMessage({ - topic: KAFKA_JOBS, - messages: [ - { - value: Buffer.from(JSON.stringify(job)), - key: Buffer.from(job.pluginConfigTeam.toString()), - }, - ], + kafkaMessage: { + topic: KAFKA_JOBS, + messages: [ + { + value: Buffer.from(JSON.stringify(job)), + key: Buffer.from(job.pluginConfigTeam.toString()), + }, + ], + }, + waitForAck: true, }) } diff --git a/plugin-server/src/utils/db/kafka-producer-wrapper.ts b/plugin-server/src/utils/db/kafka-producer-wrapper.ts index 8f7cef4c06b30..0ea1e01c5099f 100644 --- a/plugin-server/src/utils/db/kafka-producer-wrapper.ts +++ b/plugin-server/src/utils/db/kafka-producer-wrapper.ts @@ -35,7 +35,7 @@ export class KafkaProducerWrapper { key: MessageKey topic: string headers?: MessageHeader[] - waitForAck?: boolean + waitForAck: boolean }): Promise { try { kafkaProducerMessagesQueuedCounter.labels({ topic_name: topic }).inc() @@ -66,7 +66,7 @@ export class KafkaProducerWrapper { } } - async queueMessage(kafkaMessage: ProducerRecord, waitForAck?: boolean) { + async queueMessage({ kafkaMessage, waitForAck }: { kafkaMessage: ProducerRecord; waitForAck: boolean }) { return await Promise.all( kafkaMessage.messages.map((message) => this.produce({ @@ -80,23 +80,34 @@ export class KafkaProducerWrapper { ) } - async queueMessages(kafkaMessages: ProducerRecord[], waitForAck?: boolean): Promise { - await Promise.all(kafkaMessages.map((message) => this.queueMessage(message, waitForAck))) + async queueMessages({ + kafkaMessages, + waitForAck, + }: { + kafkaMessages: ProducerRecord[] + waitForAck: boolean + }): Promise { + await Promise.all(kafkaMessages.map((kafkaMessage) => this.queueMessage({ kafkaMessage, waitForAck }))) } - async queueSingleJsonMessage( - topic: string, - key: Message['key'], - object: Record, - waitForAck?: boolean - ): Promise { - await this.queueMessage( - { + async queueSingleJsonMessage({ + topic, + key, + object, + waitForAck, + }: { + topic: string + key: Message['key'] + object: Record + waitForAck: boolean + }): Promise { + await this.queueMessage({ + kafkaMessage: { topic, messages: [{ key, value: JSON.stringify(object) }], }, - waitForAck - ) + waitForAck, + }) } public async flush() { diff --git a/plugin-server/src/worker/ingestion/app-metrics.ts b/plugin-server/src/worker/ingestion/app-metrics.ts index d8f52a7401150..5e0a83c92ae31 100644 --- a/plugin-server/src/worker/ingestion/app-metrics.ts +++ b/plugin-server/src/worker/ingestion/app-metrics.ts @@ -183,8 +183,11 @@ export class AppMetrics { })) await this.kafkaProducer.queueMessage({ - topic: KAFKA_APP_METRICS, - messages: kafkaMessages, + kafkaMessage: { + topic: KAFKA_APP_METRICS, + messages: kafkaMessages, + }, + waitForAck: true, }) status.debug('🚽', `Finished flushing app metrics, took ${Date.now() - startTime}ms`) } diff --git a/plugin-server/src/worker/ingestion/event-pipeline/runner.ts b/plugin-server/src/worker/ingestion/event-pipeline/runner.ts index 7d71548381b16..6ae2248513073 100644 --- a/plugin-server/src/worker/ingestion/event-pipeline/runner.ts +++ b/plugin-server/src/worker/ingestion/event-pipeline/runner.ts @@ -25,7 +25,7 @@ import { processPersonsStep } from './processPersonsStep' export type EventPipelineResult = { // Promises that the batch handler should await on before committing offsets, // contains the Kafka producer ACKs, to avoid blocking after every message. - promises?: Array> + ackPromises?: Array> // Only used in tests // TODO: update to test for side-effects of running the pipeline rather than // this return type. @@ -78,14 +78,14 @@ export class EventPipelineRunner { drop_cause: 'disallowed', }) .inc() - return this.registerLastStep('eventDisallowedStep', null, [event]) + return this.registerLastStep('eventDisallowedStep', [event]) } let result: EventPipelineResult const eventWithTeam = await this.runStep(populateTeamDataStep, [this, event], event.team_id || -1) if (eventWithTeam != null) { result = await this.runEventPipelineSteps(eventWithTeam) } else { - result = this.registerLastStep('populateTeamDataStep', null, [event]) + result = this.registerLastStep('populateTeamDataStep', [event]) } eventProcessedAndIngestedCounter.inc() return result @@ -120,7 +120,7 @@ export class EventPipelineRunner { const processedEvent = await this.runStep(pluginsProcessEventStep, [this, event], event.team_id) if (processedEvent == null) { - return this.registerLastStep('pluginsProcessEventStep', event.team_id, [event]) + return this.registerLastStep('pluginsProcessEventStep', [event]) } const [normalizedEvent, person] = await this.runStep(processPersonsStep, [this, processedEvent], event.team_id) @@ -132,17 +132,12 @@ export class EventPipelineRunner { event.team_id ) - return this.registerLastStep('createEventStep', event.team_id, [rawClickhouseEvent, person], [eventAck]) + return this.registerLastStep('createEventStep', [rawClickhouseEvent, person], [eventAck]) } - registerLastStep( - stepName: string, - teamId: number | null, - args: any[], - promises?: Array> - ): EventPipelineResult { + registerLastStep(stepName: string, args: any[], ackPromises?: Array>): EventPipelineResult { pipelineLastStepCounter.labels(stepName).inc() - return { promises: promises, lastStep: stepName, args } + return { ackPromises, lastStep: stepName, args } } protected runStep any>( @@ -218,7 +213,7 @@ export class EventPipelineRunner { teamId, `plugin_server_ingest_event:${currentStepName}` ) - await this.hub.db.kafkaProducer!.queueMessage(message) + await this.hub.db.kafkaProducer!.queueMessage({ kafkaMessage: message, waitForAck: true }) } catch (dlqError) { status.info('🔔', `Errored trying to add event to dead letter queue. Error: ${dlqError}`) Sentry.captureException(dlqError, { diff --git a/plugin-server/src/worker/ingestion/person-state.ts b/plugin-server/src/worker/ingestion/person-state.ts index b2356f3652662..525bbbf84c910 100644 --- a/plugin-server/src/worker/ingestion/person-state.ts +++ b/plugin-server/src/worker/ingestion/person-state.ts @@ -453,7 +453,7 @@ export class PersonState { olderCreatedAt, // Keep the oldest created_at (i.e. the first time we've seen either person) properties ) - await this.db.kafkaProducer.queueMessages(kafkaMessages) + await this.db.kafkaProducer.queueMessages({ kafkaMessages, waitForAck: true }) return mergedPerson } @@ -767,7 +767,7 @@ export class DeferredPersonOverrideWorker { // Postgres for some reason -- the same row state should be // generated each call, and the receiving ReplacingMergeTree will // ensure we keep only the latest version after all writes settle.) - await this.kafkaProducer.queueMessages(messages, true) + await this.kafkaProducer.queueMessages({ kafkaMessages: messages, waitForAck: true }) return rows.length } diff --git a/plugin-server/src/worker/ingestion/utils.ts b/plugin-server/src/worker/ingestion/utils.ts index c52ef4ebba78e..9488ee759581b 100644 --- a/plugin-server/src/worker/ingestion/utils.ts +++ b/plugin-server/src/worker/ingestion/utils.ts @@ -80,18 +80,21 @@ export async function captureIngestionWarning( const limiter_key = `${teamId}:${type}:${debounce?.key || ''}` if (!!debounce?.alwaysSend || IngestionWarningLimiter.consume(limiter_key, 1)) { await kafkaProducer.queueMessage({ - topic: KAFKA_INGESTION_WARNINGS, - messages: [ - { - value: JSON.stringify({ - team_id: teamId, - type: type, - source: 'plugin-server', - details: JSON.stringify(details), - timestamp: castTimestampOrNow(null, TimestampFormat.ClickHouse), - }), - }, - ], + kafkaMessage: { + topic: KAFKA_INGESTION_WARNINGS, + messages: [ + { + value: JSON.stringify({ + team_id: teamId, + type: type, + source: 'plugin-server', + details: JSON.stringify(details), + timestamp: castTimestampOrNow(null, TimestampFormat.ClickHouse), + }), + }, + ], + }, + waitForAck: true, }) } else { return Promise.resolve() diff --git a/plugin-server/src/worker/vm/extensions/posthog.ts b/plugin-server/src/worker/vm/extensions/posthog.ts index c7a0a7124c50d..34e9cb2befd1c 100644 --- a/plugin-server/src/worker/vm/extensions/posthog.ts +++ b/plugin-server/src/worker/vm/extensions/posthog.ts @@ -29,22 +29,25 @@ async function queueEvent(hub: Hub, pluginConfig: PluginConfig, data: InternalDa const partitionKey = partitionKeyHash.digest('hex') await hub.kafkaProducer.queueMessage({ - topic: hub.KAFKA_CONSUMPTION_TOPIC!, - messages: [ - { - key: partitionKey, - value: JSON.stringify({ - distinct_id: data.distinct_id, - ip: '', - site_url: '', - data: JSON.stringify(data), - team_id: pluginConfig.team_id, - now: data.timestamp, - sent_at: data.timestamp, - uuid: data.uuid, - } as RawEventMessage), - }, - ], + kafkaMessage: { + topic: hub.KAFKA_CONSUMPTION_TOPIC!, + messages: [ + { + key: partitionKey, + value: JSON.stringify({ + distinct_id: data.distinct_id, + ip: '', + site_url: '', + data: JSON.stringify(data), + team_id: pluginConfig.team_id, + now: data.timestamp, + sent_at: data.timestamp, + uuid: data.uuid, + } as RawEventMessage), + }, + ], + }, + waitForAck: true, }) } diff --git a/plugin-server/tests/main/db.test.ts b/plugin-server/tests/main/db.test.ts index 2adc7567c8a5d..14448f196f9be 100644 --- a/plugin-server/tests/main/db.test.ts +++ b/plugin-server/tests/main/db.test.ts @@ -367,9 +367,10 @@ describe('DB', () => { expect(updatedPerson.properties).toEqual({ c: 'aaa' }) // verify correct Kafka message was sent - expect(db.kafkaProducer!.queueMessage).toHaveBeenLastCalledWith( - generateKafkaPersonUpdateMessage(updatedPerson) - ) + expect(db.kafkaProducer!.queueMessage).toHaveBeenLastCalledWith({ + kafkaMessage: generateKafkaPersonUpdateMessage(updatedPerson), + waitForAck: true, + }) }) }) @@ -416,7 +417,7 @@ describe('DB', () => { await delayUntilEventIngested(fetchPersonsRows, 2) const kafkaMessages = await db.deletePerson(person) - await db.kafkaProducer.queueMessages(kafkaMessages) + await db.kafkaProducer.queueMessages({ kafkaMessages, waitForAck: true }) await db.kafkaProducer.flush() const persons = await delayUntilEventIngested(fetchPersonsRows, 3) diff --git a/plugin-server/tests/main/ingestion-queues/analytics-events-ingestion-overflow-consumer.test.ts b/plugin-server/tests/main/ingestion-queues/analytics-events-ingestion-overflow-consumer.test.ts index 851bb23e2ac14..774475a5b34aa 100644 --- a/plugin-server/tests/main/ingestion-queues/analytics-events-ingestion-overflow-consumer.test.ts +++ b/plugin-server/tests/main/ingestion-queues/analytics-events-ingestion-overflow-consumer.test.ts @@ -97,20 +97,23 @@ describe('eachBatchParallelIngestion with overflow consume', () => { expect(queue.pluginsServer.teamManager.getTeamForEvent).toHaveBeenCalledTimes(1) expect(consume).toHaveBeenCalledWith('1:ingestion_capacity_overflow:id', 1) expect(mockQueueMessage).toHaveBeenCalledWith({ - topic: 'clickhouse_ingestion_warnings_test', - messages: [ - { - value: JSON.stringify({ - team_id: 1, - type: 'ingestion_capacity_overflow', - source: 'plugin-server', - details: JSON.stringify({ - overflowDistinctId: 'id', + kafkaMessage: { + topic: 'clickhouse_ingestion_warnings_test', + messages: [ + { + value: JSON.stringify({ + team_id: 1, + type: 'ingestion_capacity_overflow', + source: 'plugin-server', + details: JSON.stringify({ + overflowDistinctId: 'id', + }), + timestamp: castTimestampOrNow(null, TimestampFormat.ClickHouse), }), - timestamp: castTimestampOrNow(null, TimestampFormat.ClickHouse), - }), - }, - ], + }, + ], + }, + waitForAck: true, }) // Event is processed diff --git a/plugin-server/tests/main/ingestion-queues/each-batch.test.ts b/plugin-server/tests/main/ingestion-queues/each-batch.test.ts index 667c278d243f1..b0e61e62fdd66 100644 --- a/plugin-server/tests/main/ingestion-queues/each-batch.test.ts +++ b/plugin-server/tests/main/ingestion-queues/each-batch.test.ts @@ -408,7 +408,7 @@ describe('eachBatchX', () => { const batch = createBatch(captureEndpointEvent) runEventPipeline.mockImplementationOnce(() => Promise.resolve({ - promises: [Promise.resolve(), Promise.reject('deferred nopes out')], + ackPromises: [Promise.resolve(), Promise.reject('deferred nopes out')], }) ) const tokenBlockList = buildStringMatcher('another_token,more_token', false) diff --git a/plugin-server/tests/main/ingestion-queues/session-recording/services/console-log-ingester.test.ts b/plugin-server/tests/main/ingestion-queues/session-recording/services/console-log-ingester.test.ts index 42dfb9e55b5c1..6698b40a8ca6a 100644 --- a/plugin-server/tests/main/ingestion-queues/session-recording/services/console-log-ingester.test.ts +++ b/plugin-server/tests/main/ingestion-queues/session-recording/services/console-log-ingester.test.ts @@ -80,6 +80,7 @@ describe('console log ingester', () => { timestamp: '1970-01-01 00:00:00.000', }) ), + waitForAck: true, }, ], ]) @@ -124,6 +125,7 @@ describe('console log ingester', () => { timestamp: '1970-01-01 00:00:00.000', }) ), + waitForAck: true, }, ], [ @@ -142,6 +144,7 @@ describe('console log ingester', () => { timestamp: '1970-01-01 00:00:00.000', }) ), + waitForAck: true, }, ], ]) @@ -181,6 +184,7 @@ describe('console log ingester', () => { timestamp: '1970-01-01 00:00:00.000', }) ), + waitForAck: true, }, ], ]) diff --git a/plugin-server/tests/main/jobs/schedule.test.ts b/plugin-server/tests/main/jobs/schedule.test.ts index 150d171f97d3b..b6b52c892f23d 100644 --- a/plugin-server/tests/main/jobs/schedule.test.ts +++ b/plugin-server/tests/main/jobs/schedule.test.ts @@ -37,120 +37,147 @@ describe('Graphile Worker schedule', () => { } as any) expect(mockHubWithPluginSchedule.kafkaProducer.queueMessage).toHaveBeenNthCalledWith(1, { - topic: KAFKA_SCHEDULED_TASKS, - messages: [ - { - key: '1', - value: JSON.stringify({ - taskType: 'runEveryMinute', - pluginConfigId: 1, - }), - }, - ], + kafkaMessage: { + topic: KAFKA_SCHEDULED_TASKS, + messages: [ + { + key: '1', + value: JSON.stringify({ + taskType: 'runEveryMinute', + pluginConfigId: 1, + }), + }, + ], + }, + waitForAck: true, }) expect(mockHubWithPluginSchedule.kafkaProducer.queueMessage).toHaveBeenNthCalledWith(2, { - topic: KAFKA_SCHEDULED_TASKS, - messages: [ - { - key: '2', - value: JSON.stringify({ - taskType: 'runEveryMinute', - pluginConfigId: 2, - }), - }, - ], + kafkaMessage: { + topic: KAFKA_SCHEDULED_TASKS, + messages: [ + { + key: '2', + value: JSON.stringify({ + taskType: 'runEveryMinute', + pluginConfigId: 2, + }), + }, + ], + }, + waitForAck: true, }) expect(mockHubWithPluginSchedule.kafkaProducer.queueMessage).toHaveBeenNthCalledWith(3, { - topic: KAFKA_SCHEDULED_TASKS, - messages: [ - { - key: '3', - value: JSON.stringify({ - taskType: 'runEveryMinute', - pluginConfigId: 3, - }), - }, - ], + kafkaMessage: { + topic: KAFKA_SCHEDULED_TASKS, + messages: [ + { + key: '3', + value: JSON.stringify({ + taskType: 'runEveryMinute', + pluginConfigId: 3, + }), + }, + ], + }, + waitForAck: true, }) await runScheduledTasks(mockHubWithPluginSchedule, mockPiscina as any, 'runEveryHour', { job: { run_at: new Date() }, } as any) expect(mockHubWithPluginSchedule.kafkaProducer.queueMessage).toHaveBeenNthCalledWith(4, { - topic: KAFKA_SCHEDULED_TASKS, - messages: [ - { - key: '4', - value: JSON.stringify({ - taskType: 'runEveryHour', - pluginConfigId: 4, - }), - }, - ], + kafkaMessage: { + topic: KAFKA_SCHEDULED_TASKS, + messages: [ + { + key: '4', + value: JSON.stringify({ + taskType: 'runEveryHour', + pluginConfigId: 4, + }), + }, + ], + }, + waitForAck: true, }) expect(mockHubWithPluginSchedule.kafkaProducer.queueMessage).toHaveBeenNthCalledWith(5, { - topic: KAFKA_SCHEDULED_TASKS, - messages: [ - { - key: '5', - value: JSON.stringify({ - taskType: 'runEveryHour', - pluginConfigId: 5, - }), - }, - ], + kafkaMessage: { + topic: KAFKA_SCHEDULED_TASKS, + messages: [ + { + key: '5', + value: JSON.stringify({ + taskType: 'runEveryHour', + pluginConfigId: 5, + }), + }, + ], + }, + waitForAck: true, }) expect(mockHubWithPluginSchedule.kafkaProducer.queueMessage).toHaveBeenNthCalledWith(6, { - topic: KAFKA_SCHEDULED_TASKS, - messages: [ - { - key: '6', - value: JSON.stringify({ - taskType: 'runEveryHour', - pluginConfigId: 6, - }), - }, - ], + kafkaMessage: { + topic: KAFKA_SCHEDULED_TASKS, + messages: [ + { + key: '6', + value: JSON.stringify({ + taskType: 'runEveryHour', + pluginConfigId: 6, + }), + }, + ], + }, + waitForAck: true, }) await runScheduledTasks(mockHubWithPluginSchedule, mockPiscina as any, 'runEveryDay', { job: { run_at: new Date() }, } as any) expect(mockHubWithPluginSchedule.kafkaProducer.queueMessage).toHaveBeenNthCalledWith(7, { - topic: KAFKA_SCHEDULED_TASKS, - messages: [ - { - key: '7', - value: JSON.stringify({ - taskType: 'runEveryDay', - pluginConfigId: 7, - }), - }, - ], + kafkaMessage: { + topic: KAFKA_SCHEDULED_TASKS, + messages: [ + { + key: '7', + value: JSON.stringify({ + taskType: 'runEveryDay', + pluginConfigId: 7, + }), + }, + ], + }, + waitForAck: true, }) expect(mockHubWithPluginSchedule.kafkaProducer.queueMessage).toHaveBeenNthCalledWith(8, { - topic: KAFKA_SCHEDULED_TASKS, - messages: [ - { - key: '8', - value: JSON.stringify({ - taskType: 'runEveryDay', - pluginConfigId: 8, - }), - }, - ], + kafkaMessage: { + topic: KAFKA_SCHEDULED_TASKS, + messages: [ + { + key: '8', + value: JSON.stringify({ + taskType: 'runEveryDay', + pluginConfigId: 8, + }), + }, + ], + }, + waitForAck: true, }) expect(mockHubWithPluginSchedule.kafkaProducer.queueMessage).toHaveBeenNthCalledWith(9, { - topic: KAFKA_SCHEDULED_TASKS, - messages: [ - { - key: '9', - value: JSON.stringify({ - taskType: 'runEveryDay', - pluginConfigId: 9, - }), - }, - ], + kafkaMessage: { + topic: KAFKA_SCHEDULED_TASKS, + messages: [ + { + key: '9', + value: JSON.stringify({ + taskType: 'runEveryDay', + pluginConfigId: 9, + }), + }, + ], + }, + waitForAck: true, }) }) }) diff --git a/plugin-server/tests/worker/console.test.ts b/plugin-server/tests/worker/console.test.ts index 18c3480989700..4535f10f6e327 100644 --- a/plugin-server/tests/worker/console.test.ts +++ b/plugin-server/tests/worker/console.test.ts @@ -42,10 +42,10 @@ describe('console extension', () => { await (console[typeMethod](...args) as unknown as Promise) expect(queueSingleJsonMessageSpy).toHaveBeenCalledTimes(1) - expect(queueSingleJsonMessageSpy).toHaveBeenCalledWith( - KAFKA_PLUGIN_LOG_ENTRIES, - expect.any(String), - { + expect(queueSingleJsonMessageSpy).toHaveBeenCalledWith({ + topic: KAFKA_PLUGIN_LOG_ENTRIES, + key: expect.any(String), + object: { source: PluginLogEntrySource.Console, type, id: expect.any(String), @@ -56,8 +56,8 @@ describe('console extension', () => { message: expectedFinalMessage, instance_id: hub.instanceId.toString(), }, - false - ) + waitForAck: false, + }) }) }) }) diff --git a/plugin-server/tests/worker/ingestion/__snapshots__/app-metrics.test.ts.snap b/plugin-server/tests/worker/ingestion/__snapshots__/app-metrics.test.ts.snap index 10cf219b7e43b..1894a82b49dbd 100644 --- a/plugin-server/tests/worker/ingestion/__snapshots__/app-metrics.test.ts.snap +++ b/plugin-server/tests/worker/ingestion/__snapshots__/app-metrics.test.ts.snap @@ -4,12 +4,15 @@ exports[`AppMetrics() flush() flushes queued messages 1`] = ` Array [ Array [ Object { - "messages": Array [ - Object { - "value": "{\\"timestamp\\":\\"1970-01-01 00:16:40.000\\",\\"team_id\\":2,\\"plugin_config_id\\":2,\\"job_id\\":\\"000-000\\",\\"category\\":\\"processEvent\\",\\"successes\\":1,\\"successes_on_retry\\":0,\\"failures\\":0}", - }, - ], - "topic": "clickhouse_app_metrics_test", + "kafkaMessage": Object { + "messages": Array [ + Object { + "value": "{\\"timestamp\\":\\"1970-01-01 00:16:40.000\\",\\"team_id\\":2,\\"plugin_config_id\\":2,\\"job_id\\":\\"000-000\\",\\"category\\":\\"processEvent\\",\\"successes\\":1,\\"successes_on_retry\\":0,\\"failures\\":0}", + }, + ], + "topic": "clickhouse_app_metrics_test", + }, + "waitForAck": true, }, ], ] diff --git a/plugin-server/tests/worker/ingestion/event-pipeline/runner.test.ts b/plugin-server/tests/worker/ingestion/event-pipeline/runner.test.ts index 4bfc79f5e2379..364483f7c09a6 100644 --- a/plugin-server/tests/worker/ingestion/event-pipeline/runner.test.ts +++ b/plugin-server/tests/worker/ingestion/event-pipeline/runner.test.ts @@ -219,7 +219,9 @@ describe('EventPipelineRunner', () => { await runner.runEventPipeline(pipelineEvent) expect(hub.db.kafkaProducer.queueMessage).toHaveBeenCalledTimes(1) - expect(JSON.parse(hub.db.kafkaProducer.queueMessage.mock.calls[0][0].messages[0].value)).toMatchObject({ + expect( + JSON.parse(hub.db.kafkaProducer.queueMessage.mock.calls[0][0].kafkaMessage.messages[0].value) + ).toMatchObject({ team_id: 2, distinct_id: 'my_id', error: 'Event ingestion failed. Error: testError', diff --git a/plugin-server/tests/worker/ingestion/postgres-parity.test.ts b/plugin-server/tests/worker/ingestion/postgres-parity.test.ts index 5cdf1246c53f5..5c764e5809b40 100644 --- a/plugin-server/tests/worker/ingestion/postgres-parity.test.ts +++ b/plugin-server/tests/worker/ingestion/postgres-parity.test.ts @@ -339,7 +339,7 @@ describe('postgres parity', () => { // move distinct ids from person to to anotherPerson const kafkaMessages = await hub.db.moveDistinctIds(person, anotherPerson) - await hub.db!.kafkaProducer!.queueMessages(kafkaMessages) + await hub.db!.kafkaProducer!.queueMessages({ kafkaMessages, waitForAck: true }) await delayUntilEventIngested(() => hub.db.fetchDistinctIdValues(anotherPerson, Database.ClickHouse), 2) // it got added @@ -395,7 +395,7 @@ describe('postgres parity', () => { // delete person await hub.db.postgres.transaction(PostgresUse.COMMON_WRITE, '', async (client) => { const deletePersonMessage = await hub.db.deletePerson(person, client) - await hub.db!.kafkaProducer!.queueMessage(deletePersonMessage[0]) + await hub.db!.kafkaProducer!.queueMessage({ kafkaMessage: deletePersonMessage[0], waitForAck: true }) }) await delayUntilEventIngested(async () => diff --git a/plugin-server/tests/worker/vm.test.ts b/plugin-server/tests/worker/vm.test.ts index 7e3769de61328..5f1f727d4dbeb 100644 --- a/plugin-server/tests/worker/vm.test.ts +++ b/plugin-server/tests/worker/vm.test.ts @@ -689,10 +689,10 @@ describe('vm tests', () => { await vm.methods.processEvent!(event) expect(queueSingleJsonMessageSpy).toHaveBeenCalledTimes(1) - expect(queueSingleJsonMessageSpy).toHaveBeenCalledWith( - KAFKA_PLUGIN_LOG_ENTRIES, - expect.any(String), - { + expect(queueSingleJsonMessageSpy).toHaveBeenCalledWith({ + topic: KAFKA_PLUGIN_LOG_ENTRIES, + key: expect.any(String), + object: { id: expect.any(String), instance_id: hub.instanceId.toString(), message: 'logged event', @@ -703,8 +703,8 @@ describe('vm tests', () => { timestamp: expect.any(String), type: PluginLogEntryType.Log, }, - false - ) + waitForAck: false, + }) }) test('fetch', async () => { @@ -969,8 +969,8 @@ describe('vm tests', () => { expect(response).toBe('haha') expect(queueMessageSpy).toHaveBeenCalledTimes(1) - expect(queueMessageSpy.mock.calls[0][0].topic).toEqual(KAFKA_EVENTS_PLUGIN_INGESTION) - const parsedMessage = JSON.parse(queueMessageSpy.mock.calls[0][0].messages[0].value!.toString()) + expect(queueMessageSpy.mock.calls[0][0].kafkaMessage.topic).toEqual(KAFKA_EVENTS_PLUGIN_INGESTION) + const parsedMessage = JSON.parse(queueMessageSpy.mock.calls[0][0].kafkaMessage.messages[0].value!.toString()) expect(JSON.parse(parsedMessage.data)).toMatchObject({ distinct_id: 'plugin-id-60', event: 'my-new-event', @@ -998,8 +998,8 @@ describe('vm tests', () => { expect(response).toBe('haha') expect(queueMessageSpy).toHaveBeenCalledTimes(1) - expect(queueMessageSpy.mock.calls[0][0].topic).toEqual(KAFKA_EVENTS_PLUGIN_INGESTION) - const parsedMessage = JSON.parse(queueMessageSpy.mock.calls[0][0].messages[0].value!.toString()) + expect(queueMessageSpy.mock.calls[0][0].kafkaMessage.topic).toEqual(KAFKA_EVENTS_PLUGIN_INGESTION) + const parsedMessage = JSON.parse(queueMessageSpy.mock.calls[0][0].kafkaMessage.messages[0].value!.toString()) expect(JSON.parse(parsedMessage.data)).toMatchObject({ timestamp: '2020-02-23T02:15:00Z', // taken out of the properties distinct_id: 'plugin-id-60', @@ -1025,8 +1025,8 @@ describe('vm tests', () => { expect(response).toBe('haha') expect(response).toBe('haha') expect(queueMessageSpy).toHaveBeenCalledTimes(1) - expect(queueMessageSpy.mock.calls[0][0].topic).toEqual(KAFKA_EVENTS_PLUGIN_INGESTION) - const parsedMessage = JSON.parse(queueMessageSpy.mock.calls[0][0].messages[0].value!.toString()) + expect(queueMessageSpy.mock.calls[0][0].kafkaMessage.topic).toEqual(KAFKA_EVENTS_PLUGIN_INGESTION) + const parsedMessage = JSON.parse(queueMessageSpy.mock.calls[0][0].kafkaMessage.messages[0].value!.toString()) expect(JSON.parse(parsedMessage.data)).toMatchObject({ distinct_id: 'custom id', event: 'my-new-event', diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml index 04806adc67be8..73481ed11db17 100644 --- a/pnpm-lock.yaml +++ b/pnpm-lock.yaml @@ -254,8 +254,8 @@ dependencies: specifier: ^9.3.0 version: 9.3.0(postcss@8.4.31) posthog-js: - specifier: 1.116.4 - version: 1.116.4 + specifier: 1.116.6 + version: 1.116.6 posthog-js-lite: specifier: 2.5.0 version: 2.5.0 @@ -13633,7 +13633,7 @@ packages: hogan.js: 3.0.2 htm: 3.1.1 instantsearch-ui-components: 0.3.0 - preact: 10.20.0 + preact: 10.20.1 qs: 6.9.7 search-insights: 2.13.0 dev: false @@ -17454,19 +17454,19 @@ packages: resolution: {integrity: sha512-Urvlp0Vu9h3td0BVFWt0QXFJDoOZcaAD83XM9d91NKMKTVPZtfU0ysoxstIf5mw/ce9ZfuMgpWPaagrZI4rmSg==} dev: false - /posthog-js@1.116.4: - resolution: {integrity: sha512-PZg208/k5OZRQbd9tnGvUgtyRl1IAYyyh74teyIDIH3EnlsAolBlVM4gcoyEYoVkUi5sZLKitj9gTX3/vnEG4Q==} + /posthog-js@1.116.6: + resolution: {integrity: sha512-rvt8HxzJD4c2B/xsUa4jle8ApdqljeBI2Qqjp4XJMohQf18DXRyM6b96H5/UMs8jxYuZG14Er0h/kEIWeU6Fmw==} dependencies: fflate: 0.4.8 - preact: 10.20.0 + preact: 10.20.1 dev: false /potpack@2.0.0: resolution: {integrity: sha512-Q+/tYsFU9r7xoOJ+y/ZTtdVQwTWfzjbiXBDMM/JKUux3+QPP02iUuIoeBQ+Ot6oEDlC+/PGjB/5A3K7KKb7hcw==} dev: false - /preact@10.20.0: - resolution: {integrity: sha512-wU7iZw2BjsaKDal3pDRDy/HpPB6cuFOnVUCcw9aIPKG98+ZrXx3F+szkos8BVME5bquyKDKvRlOJFG8kMkcAbg==} + /preact@10.20.1: + resolution: {integrity: sha512-JIFjgFg9B2qnOoGiYMVBtrcFxHqn+dNXbq76bVmcaHYJFYR4lW67AOcXgAYQQTDYXDOg/kTZrKPNCdRgJ2UJmw==} dev: false /prelude-ls@1.2.1: diff --git a/posthog/api/decide.py b/posthog/api/decide.py index 93234aadd6a1d..3a6e08bc7a7a0 100644 --- a/posthog/api/decide.py +++ b/posthog/api/decide.py @@ -49,9 +49,11 @@ def on_permitted_recording_domain(team: Team, request: HttpRequest) -> bool: ) or hostname_in_allowed_url_list(team.recording_domains, referer) # TODO this is a short term fix for beta testers # TODO we will match on the app identifier in the origin instead and allow users to auth those - is_authorized_android_client: bool = user_agent is not None and "posthog-android" in user_agent + is_authorized_mobile_client: bool = user_agent is not None and any( + keyword in user_agent for keyword in ["posthog-android", "posthog-ios"] + ) - return is_authorized_web_client or is_authorized_android_client + return is_authorized_web_client or is_authorized_mobile_client def hostname_in_allowed_url_list(allowed_url_list: Optional[List[str]], hostname: Optional[str]) -> bool: diff --git a/posthog/api/query.py b/posthog/api/query.py index d8f45531253a0..e30853655c749 100644 --- a/posthog/api/query.py +++ b/posthog/api/query.py @@ -67,9 +67,10 @@ def create(self, request, *args, **kwargs) -> Response: if data.async_: query_status = enqueue_process_query_task( team_id=self.team.pk, + user_id=self.request.user.pk, query_json=request.data["query"], query_id=client_query_id, - refresh_requested=data.refresh, + refresh_requested=data.refresh or False, ) return Response(query_status.model_dump(), status=status.HTTP_202_ACCEPTED) diff --git a/posthog/api/test/dashboards/__snapshots__/test_dashboard.ambr b/posthog/api/test/dashboards/__snapshots__/test_dashboard.ambr index cccef08bc4a1f..9ae54e6e582eb 100644 --- a/posthog/api/test/dashboards/__snapshots__/test_dashboard.ambr +++ b/posthog/api/test/dashboards/__snapshots__/test_dashboard.ambr @@ -2762,6 +2762,24 @@ 5 /* ... */) /*controller='project_dashboards-list',route='api/projects/%28%3FP%3Cparent_lookup_team_id%3E%5B%5E/.%5D%2B%29/dashboards/%3F%24'*/ ''' # --- +# name: TestDashboard.test_listing_dashboards_is_not_nplus1.57 + ''' + SELECT "posthog_sharingconfiguration"."id", + "posthog_sharingconfiguration"."team_id", + "posthog_sharingconfiguration"."dashboard_id", + "posthog_sharingconfiguration"."insight_id", + "posthog_sharingconfiguration"."recording_id", + "posthog_sharingconfiguration"."created_at", + "posthog_sharingconfiguration"."enabled", + "posthog_sharingconfiguration"."access_token" + FROM "posthog_sharingconfiguration" + WHERE "posthog_sharingconfiguration"."dashboard_id" IN (1, + 2, + 3, + 4, + 5 /* ... */) /*controller='project_dashboards-list',route='api/projects/%28%3FP%3Cparent_lookup_team_id%3E%5B%5E/.%5D%2B%29/dashboards/%3F%24'*/ + ''' +# --- # name: TestDashboard.test_listing_dashboards_is_not_nplus1.6 ''' SELECT "posthog_team"."id", diff --git a/posthog/api/test/test_decide.py b/posthog/api/test/test_decide.py index e56d763ed869f..7cb58bbce2324 100644 --- a/posthog/api/test/test_decide.py +++ b/posthog/api/test/test_decide.py @@ -482,6 +482,20 @@ def test_user_session_recording_allowed_for_android(self, *args) -> None: "networkPayloadCapture": None, } + def test_user_session_recording_allowed_for_ios(self, *args) -> None: + self._update_team({"session_recording_opt_in": True, "recording_domains": ["https://my-website.io"]}) + + response = self._post_decide(origin="any.site.com", user_agent="posthog-ios/3.1.0").json() + assert response["sessionRecording"] == { + "endpoint": "/s/", + "recorderVersion": "v2", + "consoleLogRecordingEnabled": False, + "sampleRate": None, + "linkedFlag": None, + "minimumDurationMilliseconds": None, + "networkPayloadCapture": None, + } + def test_user_session_recording_allowed_when_permitted_domains_are_not_http_based(self, *args): self._update_team( { diff --git a/posthog/batch_exports/service.py b/posthog/batch_exports/service.py index b00f0f4c98c69..d51dfdb2fbc3c 100644 --- a/posthog/batch_exports/service.py +++ b/posthog/batch_exports/service.py @@ -90,6 +90,7 @@ class S3BatchExportInputs: kms_key_id: str | None = None batch_export_schema: BatchExportSchema | None = None endpoint_url: str | None = None + file_format: str = "JSONLines" @dataclass diff --git a/posthog/clickhouse/client/execute_async.py b/posthog/clickhouse/client/execute_async.py index 06f7fc639f824..4671b0060299b 100644 --- a/posthog/clickhouse/client/execute_async.py +++ b/posthog/clickhouse/client/execute_async.py @@ -1,5 +1,6 @@ import datetime import json +from typing import Optional import uuid import structlog @@ -69,11 +70,12 @@ def delete_query_status(self): def execute_process_query( - team_id, - query_id, - query_json, - limit_context, - refresh_requested, + team_id: int, + user_id: int, + query_id: str, + query_json: dict, + limit_context: Optional[LimitContext], + refresh_requested: bool, ): manager = QueryStatusManager(query_id, team_id) @@ -91,7 +93,7 @@ def execute_process_query( QUERY_WAIT_TIME.observe(wait_duration) try: - tag_queries(client_query_id=query_id, team_id=team_id) + tag_queries(client_query_id=query_id, team_id=team_id, user_id=user_id) results = process_query( team=team, query_json=query_json, limit_context=limit_context, refresh_requested=refresh_requested ) @@ -113,12 +115,13 @@ def execute_process_query( def enqueue_process_query_task( - team_id, - query_json, - query_id=None, - refresh_requested=False, - bypass_celery=False, - force=False, + team_id: int, + user_id: int, + query_json: dict, + query_id: Optional[str] = None, + refresh_requested: bool = False, + force: bool = False, + _test_only_bypass_celery: bool = False, ) -> QueryStatus: if not query_id: query_id = uuid.uuid4().hex @@ -136,14 +139,23 @@ def enqueue_process_query_task( query_status = QueryStatus(id=query_id, team_id=team_id, start_time=datetime.datetime.now(datetime.timezone.utc)) manager.store_query_status(query_status) - if bypass_celery: - # Call directly ( for testing ) + if _test_only_bypass_celery: process_query_task( - team_id, query_id, query_json, limit_context=LimitContext.QUERY_ASYNC, refresh_requested=refresh_requested + team_id, + user_id, + query_id, + query_json, + limit_context=LimitContext.QUERY_ASYNC, + refresh_requested=refresh_requested, ) else: task = process_query_task.delay( - team_id, query_id, query_json, limit_context=LimitContext.QUERY_ASYNC, refresh_requested=refresh_requested + team_id, + user_id, + query_id, + query_json, + limit_context=LimitContext.QUERY_ASYNC, + refresh_requested=refresh_requested, ) query_status.task_id = task.id manager.store_query_status(query_status) diff --git a/posthog/clickhouse/client/test/test_execute_async.py b/posthog/clickhouse/client/test/test_execute_async.py index 0d7a7281e6a4b..085e7708b9232 100644 --- a/posthog/clickhouse/client/test/test_execute_async.py +++ b/posthog/clickhouse/client/test/test_execute_async.py @@ -24,6 +24,7 @@ def setUp(self): self.organization = Organization.objects.create(name="test") self.team = Team.objects.create(organization=self.organization) self.team_id = self.team.pk + self.user_id = 1337 self.query_id = "test_query_id" self.query_json = {} self.limit_context = None @@ -41,7 +42,9 @@ def test_execute_process_query(self, mock_process_query, mock_redis_client): mock_process_query.return_value = [float("inf"), float("-inf"), float("nan"), 1.0, "👍"] - execute_process_query(self.team_id, self.query_id, self.query_json, self.limit_context, self.refresh_requested) + execute_process_query( + self.team_id, self.user_id, self.query_id, self.query_json, self.limit_context, self.refresh_requested + ) mock_redis_client.assert_called_once() mock_process_query.assert_called_once() @@ -55,15 +58,16 @@ def test_execute_process_query(self, mock_process_query, mock_redis_client): class ClickhouseClientTestCase(TestCase, ClickhouseTestMixin): def setUp(self): - self.organization = Organization.objects.create(name="test") - self.team = Team.objects.create(organization=self.organization) - self.team_id = self.team.pk + self.organization: Organization = Organization.objects.create(name="test") + self.team: Team = Team.objects.create(organization=self.organization) + self.team_id: int = self.team.pk + self.user_id: int = 2137 @snapshot_clickhouse_queries def test_async_query_client(self): query = build_query("SELECT 1+1") team_id = self.team_id - query_id = client.enqueue_process_query_task(team_id, query, bypass_celery=True).id + query_id = client.enqueue_process_query_task(team_id, self.user_id, query, _test_only_bypass_celery=True).id result = client.get_query_status(team_id, query_id) self.assertFalse(result.error, result.error_message) self.assertTrue(result.complete) @@ -74,11 +78,13 @@ def test_async_query_client_errors(self): self.assertRaises( HogQLException, client.enqueue_process_query_task, - **{"team_id": (self.team_id), "query_json": query, "bypass_celery": True}, + **{"team_id": self.team_id, "user_id": self.user_id, "query_json": query, "_test_only_bypass_celery": True}, ) query_id = uuid.uuid4().hex try: - client.enqueue_process_query_task(self.team_id, query, query_id=query_id, bypass_celery=True) + client.enqueue_process_query_task( + self.team_id, self.user_id, query, query_id=query_id, _test_only_bypass_celery=True + ) except Exception: pass @@ -89,7 +95,7 @@ def test_async_query_client_errors(self): def test_async_query_client_uuid(self): query = build_query("SELECT toUUID('00000000-0000-0000-0000-000000000000')") team_id = self.team_id - query_id = client.enqueue_process_query_task(team_id, query, bypass_celery=True).id + query_id = client.enqueue_process_query_task(team_id, self.user_id, query, _test_only_bypass_celery=True).id result = client.get_query_status(team_id, query_id) self.assertFalse(result.error, result.error_message) self.assertTrue(result.complete) @@ -99,7 +105,7 @@ def test_async_query_client_does_not_leak(self): query = build_query("SELECT 1+1") team_id = self.team_id wrong_team = 5 - query_id = client.enqueue_process_query_task(team_id, query, bypass_celery=True).id + query_id = client.enqueue_process_query_task(team_id, self.user_id, query, _test_only_bypass_celery=True).id try: client.get_query_status(wrong_team, query_id) @@ -111,13 +117,19 @@ def test_async_query_client_is_lazy(self, execute_sync_mock): query = build_query("SELECT 4 + 4") query_id = uuid.uuid4().hex team_id = self.team_id - client.enqueue_process_query_task(team_id, query, query_id=query_id, bypass_celery=True) + client.enqueue_process_query_task( + team_id, self.user_id, query, query_id=query_id, _test_only_bypass_celery=True + ) # Try the same query again - client.enqueue_process_query_task(team_id, query, query_id=query_id, bypass_celery=True) + client.enqueue_process_query_task( + team_id, self.user_id, query, query_id=query_id, _test_only_bypass_celery=True + ) # Try the same query again (for good measure!) - client.enqueue_process_query_task(team_id, query, query_id=query_id, bypass_celery=True) + client.enqueue_process_query_task( + team_id, self.user_id, query, query_id=query_id, _test_only_bypass_celery=True + ) # Assert that we only called clickhouse once execute_sync_mock.assert_called_once() @@ -127,13 +139,19 @@ def test_async_query_client_is_lazy_but_not_too_lazy(self, execute_sync_mock): query = build_query("SELECT 8 + 8") query_id = uuid.uuid4().hex team_id = self.team_id - client.enqueue_process_query_task(team_id, query, query_id=query_id, bypass_celery=True) + client.enqueue_process_query_task( + team_id, self.user_id, query, query_id=query_id, _test_only_bypass_celery=True + ) # Try the same query again, but with force - client.enqueue_process_query_task(team_id, query, query_id=query_id, bypass_celery=True, force=True) + client.enqueue_process_query_task( + team_id, self.user_id, query, query_id=query_id, _test_only_bypass_celery=True, force=True + ) # Try the same query again (for good measure!) - client.enqueue_process_query_task(team_id, query, query_id=query_id, bypass_celery=True) + client.enqueue_process_query_task( + team_id, self.user_id, query, query_id=query_id, _test_only_bypass_celery=True + ) # Assert that we called clickhouse twice self.assertEqual(execute_sync_mock.call_count, 2) @@ -145,13 +163,19 @@ def test_async_query_client_manual_query_uuid(self, execute_sync_mock): query = build_query("SELECT 8 + 8") team_id = self.team_id query_id = "I'm so unique" - client.enqueue_process_query_task(team_id, query, query_id=query_id, bypass_celery=True) + client.enqueue_process_query_task( + team_id, self.user_id, query, query_id=query_id, _test_only_bypass_celery=True + ) # Try the same query again, but with force - client.enqueue_process_query_task(team_id, query, query_id=query_id, bypass_celery=True, force=True) + client.enqueue_process_query_task( + team_id, self.user_id, query, query_id=query_id, _test_only_bypass_celery=True, force=True + ) # Try the same query again (for good measure!) - client.enqueue_process_query_task(team_id, query, query_id=query_id, bypass_celery=True) + client.enqueue_process_query_task( + team_id, self.user_id, query, query_id=query_id, _test_only_bypass_celery=True + ) # Assert that we called clickhouse twice self.assertEqual(execute_sync_mock.call_count, 2) @@ -186,4 +210,4 @@ def test_client_strips_comments_from_request(self): # Make sure it still includes the "annotation" comment that includes # request routing information for debugging purposes - self.assertIn("/* request:1 */", first_query) + self.assertIn(f"/* user_id:{self.user_id} request:1 */", first_query) diff --git a/posthog/errors.py b/posthog/errors.py index afa0cdd8648e7..a6e3536042a7f 100644 --- a/posthog/errors.py +++ b/posthog/errors.py @@ -1,6 +1,6 @@ from dataclasses import dataclass import re -from typing import Dict +from typing import Dict, Optional from clickhouse_driver.errors import ServerException @@ -8,9 +8,10 @@ class InternalCHQueryError(ServerException): - code_name: str + code_name: Optional[str] + """Can be null if re-raised from a thread (see `failhard_threadhook_context`).""" - def __init__(self, message, *, code=None, nested=None, code_name): + def __init__(self, message, *, code=None, nested=None, code_name=None): self.code_name = code_name super().__init__(message, code, nested) diff --git a/posthog/hogql/database/database.py b/posthog/hogql/database/database.py index 6909211070e59..afeac3c26a143 100644 --- a/posthog/hogql/database/database.py +++ b/posthog/hogql/database/database.py @@ -31,6 +31,11 @@ from posthog.hogql.database.schema.events import EventsTable from posthog.hogql.database.schema.groups import GroupsTable, RawGroupsTable from posthog.hogql.database.schema.numbers import NumbersTable +from posthog.hogql.database.schema.person_distinct_id_overrides import ( + PersonDistinctIdOverridesTable, + RawPersonDistinctIdOverridesTable, + join_with_person_distinct_id_overrides_table, +) from posthog.hogql.database.schema.person_distinct_ids import ( PersonDistinctIdsTable, RawPersonDistinctIdsTable, @@ -53,7 +58,6 @@ from posthog.models.team.team import WeekStartDay from posthog.schema import HogQLQueryModifiers, PersonsOnEventsMode - if TYPE_CHECKING: from posthog.models import Team @@ -66,6 +70,7 @@ class Database(BaseModel): groups: GroupsTable = GroupsTable() persons: PersonsTable = PersonsTable() person_distinct_ids: PersonDistinctIdsTable = PersonDistinctIdsTable() + person_distinct_id_overrides: PersonDistinctIdOverridesTable = PersonDistinctIdOverridesTable() person_overrides: PersonOverridesTable = PersonOverridesTable() session_replay_events: SessionReplayEventsTable = SessionReplayEventsTable() @@ -81,6 +86,7 @@ class Database(BaseModel): raw_persons: RawPersonsTable = RawPersonsTable() raw_groups: RawGroupsTable = RawGroupsTable() raw_cohort_people: RawCohortPeople = RawCohortPeople() + raw_person_distinct_id_overrides: RawPersonDistinctIdOverridesTable = RawPersonDistinctIdOverridesTable() raw_person_overrides: RawPersonOverridesTable = RawPersonOverridesTable() raw_sessions: RawSessionsTable = RawSessionsTable() @@ -186,6 +192,24 @@ def create_hogql_database( database.events.fields["poe"].fields["id"] = database.events.fields["person_id"] database.events.fields["person"] = FieldTraverser(chain=["poe"]) + elif modifiers.personsOnEventsMode == PersonsOnEventsMode.v3_enabled: + database.events.fields["event_person_id"] = StringDatabaseField(name="person_id") + database.events.fields["override"] = LazyJoin( + from_field=["distinct_id"], # ??? + join_table=PersonDistinctIdOverridesTable(), + join_function=join_with_person_distinct_id_overrides_table, + ) + database.events.fields["person_id"] = ExpressionField( + name="person_id", + expr=parse_expr( + # NOTE: assumes `join_use_nulls = 0` (the default), as ``override.distinct_id`` is not Nullable + "if(not(empty(override.distinct_id)), override.person_id, event_person_id)", + start=None, + ), + ) + database.events.fields["poe"].fields["id"] = database.events.fields["person_id"] + database.events.fields["person"] = FieldTraverser(chain=["poe"]) + database.persons.fields["$virt_initial_referring_domain_type"] = create_initial_domain_type( "$virt_initial_referring_domain_type" ) @@ -209,10 +233,22 @@ def create_hogql_database( ) if "timestamp" not in tables[warehouse_modifier.table_name].fields.keys(): - tables[warehouse_modifier.table_name].fields["timestamp"] = ExpressionField( - name="timestamp", - expr=ast.Call(name="toDateTime", args=[ast.Field(chain=[warehouse_modifier.timestamp_field])]), - ) + table_model = DataWarehouseTable.objects.filter( + team_id=team.pk, name=warehouse_modifier.table_name + ).latest("created_at") + timestamp_field_type = table_model.get_clickhouse_column_type(warehouse_modifier.timestamp_field) + + # If field type is none or datetime, we can use the field directly + if timestamp_field_type is None or timestamp_field_type.startswith("DateTime"): + tables[warehouse_modifier.table_name].fields["timestamp"] = ExpressionField( + name="timestamp", + expr=ast.Field(chain=[warehouse_modifier.timestamp_field]), + ) + else: + tables[warehouse_modifier.table_name].fields["timestamp"] = ExpressionField( + name="timestamp", + expr=ast.Call(name="toDateTime", args=[ast.Field(chain=[warehouse_modifier.timestamp_field])]), + ) # TODO: Need to decide how the distinct_id and person_id fields are going to be handled if "distinct_id" not in tables[warehouse_modifier.table_name].fields.keys(): diff --git a/posthog/hogql/database/schema/channel_type.py b/posthog/hogql/database/schema/channel_type.py index 5dee575fc59a3..1552a0e6aa6d4 100644 --- a/posthog/hogql/database/schema/channel_type.py +++ b/posthog/hogql/database/schema/channel_type.py @@ -62,6 +62,12 @@ def create_channel_type_expr( gclid: ast.Expr, gad_source: ast.Expr, ) -> ast.Expr: + def wrap_with_null_if_empty(expr: ast.Expr) -> ast.Expr: + return ast.Call( + name="nullIf", + args=[ast.Call(name="nullIf", args=[expr, ast.Constant(value="")]), ast.Constant(value="null")], + ) + return parse_expr( """ multiIf( @@ -95,8 +101,8 @@ def create_channel_type_expr( ( {referring_domain} = '$direct' - AND ({medium} IS NULL OR {medium} = '') - AND ({source} IS NULL OR {source} IN ('', '(direct)', 'direct')) + AND ({medium} IS NULL) + AND ({source} IS NULL OR {source} IN ('(direct)', 'direct')) ), 'Direct', @@ -122,11 +128,11 @@ def create_channel_type_expr( )""", start=None, placeholders={ - "campaign": campaign, - "medium": medium, - "source": source, + "campaign": wrap_with_null_if_empty(campaign), + "medium": wrap_with_null_if_empty(medium), + "source": wrap_with_null_if_empty(source), "referring_domain": referring_domain, - "gclid": gclid, - "gad_source": gad_source, + "gclid": wrap_with_null_if_empty(gclid), + "gad_source": wrap_with_null_if_empty(gad_source), }, ) diff --git a/posthog/hogql/database/schema/person_distinct_id_overrides.py b/posthog/hogql/database/schema/person_distinct_id_overrides.py new file mode 100644 index 0000000000000..34df59655c24d --- /dev/null +++ b/posthog/hogql/database/schema/person_distinct_id_overrides.py @@ -0,0 +1,91 @@ +from typing import Dict, List +from posthog.hogql.ast import SelectQuery +from posthog.hogql.context import HogQLContext + +from posthog.hogql.database.argmax import argmax_select +from posthog.hogql.database.models import ( + Table, + IntegerDatabaseField, + StringDatabaseField, + BooleanDatabaseField, + LazyJoin, + LazyTable, + FieldOrTable, +) +from posthog.hogql.database.schema.persons import join_with_persons_table +from posthog.hogql.errors import HogQLException + +PERSON_DISTINCT_ID_OVERRIDES_FIELDS = { + "team_id": IntegerDatabaseField(name="team_id"), + "distinct_id": StringDatabaseField(name="distinct_id"), + "person_id": StringDatabaseField(name="person_id"), + "person": LazyJoin( + from_field=["person_id"], + join_table="persons", + join_function=join_with_persons_table, + ), +} + + +def select_from_person_distinct_id_overrides_table(requested_fields: Dict[str, List[str | int]]): + # Always include "person_id", as it's the key we use to make further joins, and it'd be great if it's available + if "person_id" not in requested_fields: + requested_fields = {**requested_fields, "person_id": ["person_id"]} + return argmax_select( + table_name="raw_person_distinct_id_overrides", + select_fields=requested_fields, + group_fields=["distinct_id"], + argmax_field="version", + deleted_field="is_deleted", + ) + + +def join_with_person_distinct_id_overrides_table( + from_table: str, + to_table: str, + requested_fields: Dict[str, List[str]], + context: HogQLContext, + node: SelectQuery, +): + from posthog.hogql import ast + + if not requested_fields: + raise HogQLException("No fields requested from person_distinct_id_overrides") + join_expr = ast.JoinExpr(table=select_from_person_distinct_id_overrides_table(requested_fields)) + join_expr.join_type = "LEFT OUTER JOIN" + join_expr.alias = to_table + join_expr.constraint = ast.JoinConstraint( + expr=ast.CompareOperation( + op=ast.CompareOperationOp.Eq, + left=ast.Field(chain=[from_table, "distinct_id"]), + right=ast.Field(chain=[to_table, "distinct_id"]), + ) + ) + return join_expr + + +class RawPersonDistinctIdOverridesTable(Table): + fields: Dict[str, FieldOrTable] = { + **PERSON_DISTINCT_ID_OVERRIDES_FIELDS, + "is_deleted": BooleanDatabaseField(name="is_deleted"), + "version": IntegerDatabaseField(name="version"), + } + + def to_printed_clickhouse(self, context): + return "person_distinct_id_overrides" + + def to_printed_hogql(self): + return "raw_person_distinct_id_overrides" + + +class PersonDistinctIdOverridesTable(LazyTable): + fields: Dict[str, FieldOrTable] = PERSON_DISTINCT_ID_OVERRIDES_FIELDS + + def lazy_select(self, requested_fields: Dict[str, List[str | int]], context: HogQLContext, node: SelectQuery): + return select_from_person_distinct_id_overrides_table(requested_fields) + + def to_printed_clickhouse(self, context): + return "person_distinct_id_overrides" + + def to_printed_hogql(self): + return "person_distinct_id_overrides" diff --git a/posthog/hogql/database/schema/test/test_channel_type.py b/posthog/hogql/database/schema/test/test_channel_type.py index 89e026ff3aed0..97dba3e13ba38 100644 --- a/posthog/hogql/database/schema/test/test_channel_type.py +++ b/posthog/hogql/database/schema/test/test_channel_type.py @@ -106,6 +106,36 @@ def test_direct(self): ), ) + def test_direct_empty_string(self): + self.assertEqual( + "Direct", + self._get_initial_channel_type( + { + "$initial_referring_domain": "$direct", + "$initial_utm_source": "", + "$initial_utm_medium": "", + "$initial_utm_campaign": "", + "$initial_gclid": "", + "$initial_gad_source": "", + } + ), + ) + + def test_direct_null_string(self): + self.assertEqual( + "Direct", + self._get_initial_channel_type( + { + "$initial_referring_domain": "$direct", + "$initial_utm_source": "null", + "$initial_utm_medium": "null", + "$initial_utm_campaign": "null", + "$initial_gclid": "null", + "$initial_gad_source": "null", + } + ), + ) + def test_cross_network(self): self.assertEqual( "Cross Network", diff --git a/posthog/hogql/database/test/__snapshots__/test_database.ambr b/posthog/hogql/database/test/__snapshots__/test_database.ambr index db4dfc8f6df9f..63c2d16ce87aa 100644 --- a/posthog/hogql/database/test/__snapshots__/test_database.ambr +++ b/posthog/hogql/database/test/__snapshots__/test_database.ambr @@ -304,6 +304,31 @@ ] } ], + "person_distinct_id_overrides": [ + { + "key": "distinct_id", + "type": "string" + }, + { + "key": "person_id", + "type": "string" + }, + { + "key": "person", + "type": "lazy_table", + "table": "persons", + "fields": [ + "id", + "created_at", + "team_id", + "properties", + "is_identified", + "pdi", + "$virt_initial_referring_domain_type", + "$virt_initial_channel_type" + ] + } + ], "person_overrides": [ { "key": "old_person_id", @@ -790,6 +815,39 @@ "type": "integer" } ], + "raw_person_distinct_id_overrides": [ + { + "key": "distinct_id", + "type": "string" + }, + { + "key": "person_id", + "type": "string" + }, + { + "key": "person", + "type": "lazy_table", + "table": "persons", + "fields": [ + "id", + "created_at", + "team_id", + "properties", + "is_identified", + "pdi", + "$virt_initial_referring_domain_type", + "$virt_initial_channel_type" + ] + }, + { + "key": "is_deleted", + "type": "boolean" + }, + { + "key": "version", + "type": "integer" + } + ], "raw_person_overrides": [ { "key": "old_person_id", @@ -1155,6 +1213,31 @@ ] } ], + "person_distinct_id_overrides": [ + { + "key": "distinct_id", + "type": "string" + }, + { + "key": "person_id", + "type": "string" + }, + { + "key": "person", + "type": "lazy_table", + "table": "persons", + "fields": [ + "id", + "created_at", + "team_id", + "properties", + "is_identified", + "pdi", + "$virt_initial_referring_domain_type", + "$virt_initial_channel_type" + ] + } + ], "person_overrides": [ { "key": "old_person_id", @@ -1641,6 +1724,39 @@ "type": "integer" } ], + "raw_person_distinct_id_overrides": [ + { + "key": "distinct_id", + "type": "string" + }, + { + "key": "person_id", + "type": "string" + }, + { + "key": "person", + "type": "lazy_table", + "table": "persons", + "fields": [ + "id", + "created_at", + "team_id", + "properties", + "is_identified", + "pdi", + "$virt_initial_referring_domain_type", + "$virt_initial_channel_type" + ] + }, + { + "key": "is_deleted", + "type": "boolean" + }, + { + "key": "version", + "type": "integer" + } + ], "raw_person_overrides": [ { "key": "old_person_id", diff --git a/posthog/hogql/query.py b/posthog/hogql/query.py index 69b5656020904..f47c14c5cef86 100644 --- a/posthog/hogql/query.py +++ b/posthog/hogql/query.py @@ -148,6 +148,7 @@ def execute_hogql_query( has_joins="JOIN" in clickhouse_sql, has_json_operations="JSONExtract" in clickhouse_sql or "JSONHas" in clickhouse_sql, timings=timings_dict, + modifiers={k: v for k, v in modifiers.model_dump().items() if v is not None} if modifiers else {}, ) error = None diff --git a/posthog/hogql/test/test_modifiers.py b/posthog/hogql/test/test_modifiers.py index eba1f5195ab3d..b2b0ef1e40630 100644 --- a/posthog/hogql/test/test_modifiers.py +++ b/posthog/hogql/test/test_modifiers.py @@ -74,6 +74,13 @@ def test_modifiers_persons_on_events_mode_mapping(self): "events.person_properties AS properties", "toTimeZone(events.person_created_at, %(hogql_val_1)s) AS created_at", ), + ( + PersonsOnEventsMode.v3_enabled, + "events.event AS event", + "if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id) AS id", + "events.person_properties AS properties", + "toTimeZone(events.person_created_at, %(hogql_val_0)s) AS created_at", + ), ] for mode, *expected in test_cases: diff --git a/posthog/hogql/transforms/test/__snapshots__/test_in_cohort.ambr b/posthog/hogql/transforms/test/__snapshots__/test_in_cohort.ambr index 9ff7f8ee0ab49..e0f5ea847110d 100644 --- a/posthog/hogql/transforms/test/__snapshots__/test_in_cohort.ambr +++ b/posthog/hogql/transforms/test/__snapshots__/test_in_cohort.ambr @@ -31,7 +31,7 @@ FROM events LEFT JOIN ( SELECT person_static_cohort.person_id AS cohort_person_id, 1 AS matched, person_static_cohort.cohort_id AS cohort_id FROM person_static_cohort - WHERE and(equals(person_static_cohort.team_id, 420), in(person_static_cohort.cohort_id, [12]))) AS __in_cohort ON equals(__in_cohort.cohort_person_id, events.person_id) + WHERE and(equals(person_static_cohort.team_id, 420), in(person_static_cohort.cohort_id, [11]))) AS __in_cohort ON equals(__in_cohort.cohort_person_id, events.person_id) WHERE and(equals(events.team_id, 420), 1, ifNull(equals(__in_cohort.matched, 1), 0)) LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -42,7 +42,7 @@ FROM events LEFT JOIN ( SELECT person_id AS cohort_person_id, 1 AS matched, cohort_id FROM static_cohort_people - WHERE in(cohort_id, [12])) AS __in_cohort ON equals(__in_cohort.cohort_person_id, person_id) + WHERE in(cohort_id, [11])) AS __in_cohort ON equals(__in_cohort.cohort_person_id, person_id) WHERE and(1, equals(__in_cohort.matched, 1)) LIMIT 100 ''' @@ -55,7 +55,7 @@ FROM events LEFT JOIN ( SELECT person_static_cohort.person_id AS cohort_person_id, 1 AS matched, person_static_cohort.cohort_id AS cohort_id FROM person_static_cohort - WHERE and(equals(person_static_cohort.team_id, 420), in(person_static_cohort.cohort_id, [13]))) AS __in_cohort ON equals(__in_cohort.cohort_person_id, events.person_id) + WHERE and(equals(person_static_cohort.team_id, 420), in(person_static_cohort.cohort_id, [12]))) AS __in_cohort ON equals(__in_cohort.cohort_person_id, events.person_id) WHERE and(equals(events.team_id, 420), 1, ifNull(equals(__in_cohort.matched, 1), 0)) LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1 @@ -66,7 +66,7 @@ FROM events LEFT JOIN ( SELECT person_id AS cohort_person_id, 1 AS matched, cohort_id FROM static_cohort_people - WHERE in(cohort_id, [13])) AS __in_cohort ON equals(__in_cohort.cohort_person_id, person_id) + WHERE in(cohort_id, [12])) AS __in_cohort ON equals(__in_cohort.cohort_person_id, person_id) WHERE and(1, equals(__in_cohort.matched, 1)) LIMIT 100 ''' diff --git a/posthog/hogql_queries/insights/funnels/base.py b/posthog/hogql_queries/insights/funnels/base.py index ef8782fade54a..4e97d79b94534 100644 --- a/posthog/hogql_queries/insights/funnels/base.py +++ b/posthog/hogql_queries/insights/funnels/base.py @@ -284,6 +284,7 @@ def _get_breakdown_expr(self) -> ast.Expr: properties_column = f"group_{breakdownFilter.breakdown_group_type_index}.properties" return get_breakdown_expr(breakdown, properties_column) elif breakdownType == "hogql": + assert isinstance(breakdown, list) return ast.Alias( alias="value", expr=ast.Array(exprs=[parse_expr(str(value)) for value in breakdown]), @@ -530,6 +531,7 @@ def _add_breakdown_attribution_subquery(self, inner_query: ast.SelectQuery) -> a # so just select that. Except for the empty case, where we select the default. if self._query_has_array_breakdown(): + assert isinstance(breakdown, list) default_breakdown_value = f"""[{','.join(["''" for _ in range(len(breakdown or []))])}]""" # default is [''] when dealing with a single breakdown array, otherwise ['', '', ...., ''] breakdown_selector = parse_expr( @@ -613,7 +615,7 @@ def _build_step_query( event_expr = ast.Constant(value=True) else: # event - event_expr = parse_expr(f"event = '{entity.event}'") + event_expr = parse_expr("event = {event}", {"event": ast.Constant(value=entity.event)}) if entity.properties is not None and entity.properties != []: # add property filters @@ -657,11 +659,15 @@ def _get_funnel_person_step_condition(self) -> ast.Expr: raise ValueError("Missing both funnelStep and funnelCustomSteps") if funnelStepBreakdown is not None: - breakdown_prop_value = funnelStepBreakdown - if isinstance(breakdown_prop_value, int) and breakdownType != "cohort": - breakdown_prop_value = str(breakdown_prop_value) + if isinstance(funnelStepBreakdown, int) and breakdownType != "cohort": + funnelStepBreakdown = str(funnelStepBreakdown) - conditions.append(parse_expr(f"arrayFlatten(array(prop)) = arrayFlatten(array({breakdown_prop_value}))")) + conditions.append( + parse_expr( + "arrayFlatten(array(prop)) = arrayFlatten(array({funnelStepBreakdown}))", + {"funnelStepBreakdown": ast.Constant(value=funnelStepBreakdown)}, + ) + ) return ast.And(exprs=conditions) @@ -898,7 +904,12 @@ def _get_breakdown_prop_expr(self, group_remaining=False) -> List[ast.Expr]: BreakdownType.group, ]: breakdown_values = self._get_breakdown_conditions() - return [parse_expr(f"if(has({breakdown_values}, prop), prop, {other_aggregation}) as prop")] + return [ + parse_expr( + f"if(has({{breakdown_values}}, prop), prop, {other_aggregation}) as prop", + {"breakdown_values": ast.Constant(value=breakdown_values)}, + ) + ] else: # Cohorts don't have "Other" aggregation return [ast.Field(chain=["prop"])] diff --git a/posthog/hogql_queries/insights/funnels/funnel_query_context.py b/posthog/hogql_queries/insights/funnels/funnel_query_context.py index 66a0d28ad3d7f..3b777e3ff8026 100644 --- a/posthog/hogql_queries/insights/funnels/funnel_query_context.py +++ b/posthog/hogql_queries/insights/funnels/funnel_query_context.py @@ -25,7 +25,7 @@ class FunnelQueryContext(QueryContext): interval: IntervalType - breakdown: List[Union[str, int]] | None + breakdown: List[Union[str, int]] | str | int | None breakdownType: BreakdownType breakdownAttributionType: BreakdownAttributionType diff --git a/posthog/hogql_queries/insights/funnels/funnel_trends.py b/posthog/hogql_queries/insights/funnels/funnel_trends.py index 5c370512a20e8..9d486f1b06196 100644 --- a/posthog/hogql_queries/insights/funnels/funnel_trends.py +++ b/posthog/hogql_queries/insights/funnels/funnel_trends.py @@ -203,7 +203,16 @@ def get_query(self) -> ast.SelectQuery: [ ast.Alias( alias="breakdown_value", - expr=ast.Array(exprs=[parse_expr(str(value)) for value in self.breakdown_values]), + expr=ast.Array( + exprs=[ + ( + ast.Array(exprs=[ast.Constant(value=sub_value) for sub_value in value]) + if isinstance(value, list) + else ast.Constant(value=value) + ) + for value in self.breakdown_values + ] + ), hidden=False, ) ] diff --git a/posthog/hogql_queries/insights/funnels/test/test_funnel.py b/posthog/hogql_queries/insights/funnels/test/test_funnel.py index 98f4d060fb905..89382bebfb994 100644 --- a/posthog/hogql_queries/insights/funnels/test/test_funnel.py +++ b/posthog/hogql_queries/insights/funnels/test/test_funnel.py @@ -18,7 +18,14 @@ from posthog.models.group_type_mapping import GroupTypeMapping from posthog.models.property_definition import PropertyDefinition from posthog.queries.funnels import ClickhouseFunnelActors -from posthog.schema import ActorsQuery, EventsNode, FunnelsActorsQuery, FunnelsQuery +from posthog.schema import ( + ActorsQuery, + BreakdownFilter, + DateRange, + EventsNode, + FunnelsActorsQuery, + FunnelsQuery, +) from posthog.test.base import ( APIBaseTest, BaseTest, @@ -3576,6 +3583,72 @@ def test_funnel_window_ignores_dst_transition(self): self.assertEqual(results[1]["average_conversion_time"], 1_207_020) self.assertEqual(results[1]["median_conversion_time"], 1_207_020) + def test_parses_breakdowns_correctly(self): + _create_person( + distinct_ids=[f"user_1"], + team=self.team, + ) + + events_by_person = { + "user_1": [ + { + "event": "$pageview", + "timestamp": datetime(2024, 3, 22, 13, 46), + "properties": {"utm_medium": "test''123"}, + }, + { + "event": "$pageview", + "timestamp": datetime(2024, 3, 22, 13, 47), + "properties": {"utm_medium": "test''123"}, + }, + ], + } + journeys_for(events_by_person, self.team) + + query = FunnelsQuery( + series=[EventsNode(event="$pageview"), EventsNode(event="$pageview")], + dateRange=DateRange( + date_from="2024-03-22", + date_to="2024-03-22", + ), + breakdownFilter=BreakdownFilter(breakdown="utm_medium"), + ) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + self.assertEqual(results[0][1]["breakdown_value"], ["test'123"]) + self.assertEqual(results[0][1]["count"], 1) + + def test_funnel_parses_event_names_correctly(self): + _create_person( + distinct_ids=[f"user_1"], + team=self.team, + ) + + events_by_person = { + "user_1": [ + { + "event": "test''1", + "timestamp": datetime(2024, 3, 22, 13, 46), + }, + { + "event": "test''2", + "timestamp": datetime(2024, 3, 22, 13, 47), + }, + ], + } + journeys_for(events_by_person, self.team) + + query = FunnelsQuery( + series=[EventsNode(event="test'1"), EventsNode()], + dateRange=DateRange( + date_from="2024-03-22", + date_to="2024-03-22", + ), + ) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + self.assertEqual(results[0]["count"], 1) + return TestGetFunnel diff --git a/posthog/hogql_queries/insights/funnels/test/test_funnel_persons.py b/posthog/hogql_queries/insights/funnels/test/test_funnel_persons.py index 4c342d2f2926c..dec7bdd933b3e 100644 --- a/posthog/hogql_queries/insights/funnels/test/test_funnel_persons.py +++ b/posthog/hogql_queries/insights/funnels/test/test_funnel_persons.py @@ -626,3 +626,45 @@ def test_funnel_person_recordings(self): } ], ) + + def test_parses_step_breakdown_correctly(self): + person1 = _create_person( + distinct_ids=["person1"], + team_id=self.team.pk, + properties={"$country": "PL"}, + ) + journeys_for( + { + "person1": [ + { + "event": "sign up", + "timestamp": datetime(2020, 1, 1, 12), + "properties": {"$browser": "test''123"}, + }, + { + "event": "play movie", + "timestamp": datetime(2020, 1, 1, 13), + "properties": {"$browser": "test''123"}, + }, + ], + }, + self.team, + create_people=False, + ) + + filters = { + "insight": INSIGHT_FUNNELS, + "date_from": "2020-01-01", + "date_to": "2020-01-08", + "interval": "day", + "funnel_window_days": 7, + "events": [ + {"id": "sign up", "order": 0}, + {"id": "play movie", "order": 1}, + ], + "breakdown_type": "event", + "breakdown": "$browser", + } + + results = get_actors(filters, self.team, funnelStep=1, funnelStepBreakdown=["test'123"]) + self.assertCountEqual([results[0][0]], [person1.uuid]) diff --git a/posthog/hogql_queries/insights/funnels/test/test_funnel_trends.py b/posthog/hogql_queries/insights/funnels/test/test_funnel_trends.py index 6ca333b036f14..f9c7b107074de 100644 --- a/posthog/hogql_queries/insights/funnels/test/test_funnel_trends.py +++ b/posthog/hogql_queries/insights/funnels/test/test_funnel_trends.py @@ -1387,3 +1387,43 @@ def test_trend_for_hour_based_conversion_window(self): results = FunnelsQueryRunner(query=query, team=self.team, just_summarize=True).calculate().results conversion_rates = [row["conversion_rate"] for row in results] self.assertEqual(conversion_rates, [50.0, 0.0, 0.0, 0.0, 0.0, 0.0]) + + def test_parses_breakdown_correctly(self): + journeys_for( + { + "user_one": [ + { + "event": "step one", + "timestamp": datetime(2021, 5, 1), + "properties": {"$browser": "test''123"}, + }, + { + "event": "step two", + "timestamp": datetime(2021, 5, 3), + "properties": {"$browser": "test''123"}, + }, + ], + }, + self.team, + ) + + filters = { + "insight": INSIGHT_FUNNELS, + "funnel_viz_type": "trends", + "display": TRENDS_LINEAR, + "interval": "day", + "date_from": "2021-05-01 00:00:00", + "date_to": "2021-05-13 23:59:59", + "funnel_window_days": 7, + "events": [ + {"id": "step one", "order": 0}, + {"id": "step two", "order": 1}, + ], + "breakdown_type": "event", + "breakdown": "$browser", + } + + query = cast(FunnelsQuery, filter_to_query(filters)) + results = FunnelsQueryRunner(query=query, team=self.team).calculate().results + + self.assertEqual(len(results), 1) diff --git a/posthog/hogql_queries/insights/funnels/utils.py b/posthog/hogql_queries/insights/funnels/utils.py index 47c1487e5fbcc..cdccce0251a33 100644 --- a/posthog/hogql_queries/insights/funnels/utils.py +++ b/posthog/hogql_queries/insights/funnels/utils.py @@ -61,23 +61,26 @@ def funnel_window_interval_unit_to_sql( def get_breakdown_expr( - breakdown: List[str | int] | None, properties_column: str, normalize_url: bool | None = False + breakdowns: List[str | int] | str | int, properties_column: str, normalize_url: bool | None = False ) -> ast.Expr: - if isinstance(breakdown, str) or isinstance(breakdown, int) or breakdown is None: - return parse_expr(f"ifNull({properties_column}.\"{breakdown}\", '')") + if isinstance(breakdowns, str) or isinstance(breakdowns, int) or breakdowns is None: + return ast.Call( + name="ifNull", args=[ast.Field(chain=[*properties_column.split("."), breakdowns]), ast.Constant(value="")] + ) else: exprs = [] - for b in breakdown: - expr = parse_expr(normalize_url_breakdown(f"ifNull({properties_column}.\"{b}\", '')", normalize_url)) + for breakdown in breakdowns: + expr: ast.Expr = ast.Call( + name="ifNull", + args=[ast.Field(chain=[*properties_column.split("."), breakdown]), ast.Constant(value="")], + ) + if normalize_url: + regex = "[\\\\/?#]*$" + expr = parse_expr( + f"if( empty( replaceRegexpOne({{breakdown_value}}, '{regex}', '') ), '/', replaceRegexpOne({{breakdown_value}}, '{regex}', ''))", + {"breakdown_value": expr}, + ) exprs.append(expr) expression = ast.Array(exprs=exprs) return expression - - -def normalize_url_breakdown(breakdown_value, breakdown_normalize_url: bool | None): - if breakdown_normalize_url: - regex = "[\\\\/?#]*$" - return f"if( empty( replaceRegexpOne({breakdown_value}, '{regex}', '') ), '/', replaceRegexpOne({breakdown_value}, '{regex}', ''))" - - return breakdown_value diff --git a/posthog/hogql_queries/insights/lifecycle_query_runner.py b/posthog/hogql_queries/insights/lifecycle_query_runner.py index 62968f5349e0e..ea883eec542bc 100644 --- a/posthog/hogql_queries/insights/lifecycle_query_runner.py +++ b/posthog/hogql_queries/insights/lifecycle_query_runner.py @@ -126,7 +126,7 @@ def to_actors_query( def to_actors_query_options(self) -> InsightActorsQueryOptionsResponse: return InsightActorsQueryOptionsResponse( - day=[{"label": day, "value": day} for day in self.query_date_range.all_values()], + day=[{"label": format_label_date(value), "value": value} for value in self.query_date_range.all_values()], status=[ { "label": "Dormant", diff --git a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends_data_warehouse_query.ambr b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends_data_warehouse_query.ambr index 1e3bc1b5cbad6..bd7142030fe3a 100644 --- a/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends_data_warehouse_query.ambr +++ b/posthog/hogql_queries/insights/trends/test/__snapshots__/test_trends_data_warehouse_query.ambr @@ -4,7 +4,7 @@ SELECT toString(e.prop_1) AS value, count(e.id) AS count FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', 'id String, prop_1 String, prop_2 String, created DateTime64(3, \'UTC\')') AS e - WHERE and(and(ifNull(greaterOrEquals(toTimeZone(e.created, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toTimeZone(e.created, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0)), and(ifNull(greaterOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0))) + WHERE and(and(ifNull(greaterOrEquals(toTimeZone(e.created, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toTimeZone(e.created, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0)), and(ifNull(greaterOrEquals(toTimeZone(e.created, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toTimeZone(e.created, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0))) GROUP BY value ORDER BY count DESC, value DESC LIMIT 26 SETTINGS readonly=2, @@ -36,10 +36,10 @@ JOIN breakdown_value AS breakdown_value) AS sec ORDER BY sec.breakdown_value ASC, day_start ASC UNION ALL SELECT count(e.id) AS total, - toStartOfDay(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC')) AS day_start, + toStartOfDay(toTimeZone(e.created, 'UTC')) AS day_start, transform(ifNull(toString(e.prop_1), '$$_posthog_breakdown_null_$$'), ['d', 'c', 'b', 'a'], ['d', 'c', 'b', 'a'], '$$_posthog_breakdown_other_$$') AS breakdown_value FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', 'id String, prop_1 String, prop_2 String, created DateTime64(3, \'UTC\')') AS e - WHERE and(ifNull(greaterOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0), or(ifNull(equals(toString(e.prop_1), 'd'), 0), ifNull(equals(toString(e.prop_1), 'c'), 0), ifNull(equals(toString(e.prop_1), 'b'), 0), ifNull(equals(toString(e.prop_1), 'a'), 0))) + WHERE and(ifNull(greaterOrEquals(toTimeZone(e.created, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toTimeZone(e.created, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0), or(ifNull(equals(toString(e.prop_1), 'd'), 0), ifNull(equals(toString(e.prop_1), 'c'), 0), ifNull(equals(toString(e.prop_1), 'b'), 0), ifNull(equals(toString(e.prop_1), 'a'), 0))) GROUP BY day_start, breakdown_value) GROUP BY day_start, @@ -57,7 +57,7 @@ SELECT toString(e.prop_1) AS value, count(e.id) AS count FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', 'id String, prop_1 String, prop_2 String, created DateTime64(3, \'UTC\')') AS e - WHERE and(and(ifNull(greaterOrEquals(toTimeZone(e.created, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toTimeZone(e.created, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0)), and(ifNull(greaterOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0), equals(e.prop_1, 'a'))) + WHERE and(and(ifNull(greaterOrEquals(toTimeZone(e.created, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toTimeZone(e.created, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0)), and(ifNull(greaterOrEquals(toTimeZone(e.created, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toTimeZone(e.created, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0), equals(e.prop_1, 'a'))) GROUP BY value ORDER BY count DESC, value DESC LIMIT 26 SETTINGS readonly=2, @@ -89,10 +89,10 @@ JOIN breakdown_value AS breakdown_value) AS sec ORDER BY sec.breakdown_value ASC, day_start ASC UNION ALL SELECT count(e.id) AS total, - toStartOfDay(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC')) AS day_start, + toStartOfDay(toTimeZone(e.created, 'UTC')) AS day_start, transform(ifNull(toString(e.prop_1), '$$_posthog_breakdown_null_$$'), ['a'], ['a'], '$$_posthog_breakdown_other_$$') AS breakdown_value FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', 'id String, prop_1 String, prop_2 String, created DateTime64(3, \'UTC\')') AS e - WHERE and(ifNull(greaterOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0), equals(e.prop_1, 'a'), ifNull(equals(toString(e.prop_1), 'a'), 0)) + WHERE and(ifNull(greaterOrEquals(toTimeZone(e.created, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toTimeZone(e.created, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0), equals(e.prop_1, 'a'), ifNull(equals(toString(e.prop_1), 'a'), 0)) GROUP BY day_start, breakdown_value) GROUP BY day_start, @@ -119,9 +119,9 @@ UNION ALL SELECT 0 AS total, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC'))) AS day_start UNION ALL SELECT count(e.id) AS total, - toStartOfDay(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC')) AS day_start + toStartOfDay(toTimeZone(e.created, 'UTC')) AS day_start FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', 'id String, prop_1 String, prop_2 String, created DateTime64(3, \'UTC\')') AS e - WHERE and(ifNull(greaterOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0)) + WHERE and(ifNull(greaterOrEquals(toTimeZone(e.created, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toTimeZone(e.created, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0)) GROUP BY day_start) GROUP BY day_start ORDER BY day_start ASC) @@ -145,9 +145,9 @@ UNION ALL SELECT 0 AS total, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC'))) AS day_start UNION ALL SELECT count(e.id) AS total, - toStartOfDay(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC')) AS day_start + toStartOfDay(toTimeZone(e.created, 'UTC')) AS day_start FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', 'id String, prop_1 String, prop_2 String, created DateTime64(3, \'UTC\')') AS e - WHERE and(ifNull(greaterOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0), equals(e.prop_1, 'a')) + WHERE and(ifNull(greaterOrEquals(toTimeZone(e.created, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toTimeZone(e.created, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0), equals(e.prop_1, 'a')) GROUP BY day_start) GROUP BY day_start ORDER BY day_start ASC) @@ -171,9 +171,9 @@ UNION ALL SELECT 0 AS total, toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC'))) AS day_start UNION ALL SELECT count(e.id) AS total, - toStartOfDay(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC')) AS day_start + toStartOfDay(toTimeZone(e.created, 'UTC')) AS day_start FROM s3('http://host.docker.internal:19000/posthog/test_storage_bucket-posthog.hogql.datawarehouse.trendquery/*.parquet', 'object_storage_root_user', 'object_storage_root_password', 'Parquet', 'id String, prop_1 String, prop_2 String, created DateTime64(3, \'UTC\')') AS e - WHERE and(ifNull(greaterOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toDateTime(toTimeZone(e.created, 'UTC'), 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0), equals(e.prop_1, 'a')) + WHERE and(ifNull(greaterOrEquals(toTimeZone(e.created, 'UTC'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-01 00:00:00', 6, 'UTC')))), 0), ifNull(lessOrEquals(toTimeZone(e.created, 'UTC'), assumeNotNull(parseDateTime64BestEffortOrNull('2023-01-07 23:59:59', 6, 'UTC'))), 0), equals(e.prop_1, 'a')) GROUP BY day_start) GROUP BY day_start ORDER BY day_start ASC) diff --git a/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py b/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py index 8d14950ec23b2..6bb41b19c79cf 100644 --- a/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py +++ b/posthog/hogql_queries/insights/trends/test/test_trends_query_runner.py @@ -1,4 +1,6 @@ +import zoneinfo from dataclasses import dataclass +from datetime import datetime from typing import Dict, List, Optional from unittest.mock import MagicMock, patch from django.test import override_settings @@ -1478,18 +1480,18 @@ def test_to_actors_query_options(self): response = runner.to_actors_query_options() assert response.day == [ - DayItem(label="2020-01-09", value="2020-01-09"), - DayItem(label="2020-01-10", value="2020-01-10"), - DayItem(label="2020-01-11", value="2020-01-11"), - DayItem(label="2020-01-12", value="2020-01-12"), - DayItem(label="2020-01-13", value="2020-01-13"), - DayItem(label="2020-01-14", value="2020-01-14"), - DayItem(label="2020-01-15", value="2020-01-15"), - DayItem(label="2020-01-16", value="2020-01-16"), - DayItem(label="2020-01-17", value="2020-01-17"), - DayItem(label="2020-01-18", value="2020-01-18"), - DayItem(label="2020-01-19", value="2020-01-19"), - DayItem(label="2020-01-20", value="2020-01-20"), + DayItem(label="9-Jan-2020", value=datetime(2020, 1, 9, 0, 0, tzinfo=zoneinfo.ZoneInfo(key="UTC"))), + DayItem(label="10-Jan-2020", value=datetime(2020, 1, 10, 0, 0, tzinfo=zoneinfo.ZoneInfo(key="UTC"))), + DayItem(label="11-Jan-2020", value=datetime(2020, 1, 11, 0, 0, tzinfo=zoneinfo.ZoneInfo(key="UTC"))), + DayItem(label="12-Jan-2020", value=datetime(2020, 1, 12, 0, 0, tzinfo=zoneinfo.ZoneInfo(key="UTC"))), + DayItem(label="13-Jan-2020", value=datetime(2020, 1, 13, 0, 0, tzinfo=zoneinfo.ZoneInfo(key="UTC"))), + DayItem(label="14-Jan-2020", value=datetime(2020, 1, 14, 0, 0, tzinfo=zoneinfo.ZoneInfo(key="UTC"))), + DayItem(label="15-Jan-2020", value=datetime(2020, 1, 15, 0, 0, tzinfo=zoneinfo.ZoneInfo(key="UTC"))), + DayItem(label="16-Jan-2020", value=datetime(2020, 1, 16, 0, 0, tzinfo=zoneinfo.ZoneInfo(key="UTC"))), + DayItem(label="17-Jan-2020", value=datetime(2020, 1, 17, 0, 0, tzinfo=zoneinfo.ZoneInfo(key="UTC"))), + DayItem(label="18-Jan-2020", value=datetime(2020, 1, 18, 0, 0, tzinfo=zoneinfo.ZoneInfo(key="UTC"))), + DayItem(label="19-Jan-2020", value=datetime(2020, 1, 19, 0, 0, tzinfo=zoneinfo.ZoneInfo(key="UTC"))), + DayItem(label="20-Jan-2020", value=datetime(2020, 1, 20, 0, 0, tzinfo=zoneinfo.ZoneInfo(key="UTC"))), ] assert response.breakdown is None @@ -1513,18 +1515,18 @@ def test_to_actors_query_options_compare(self): response = runner.to_actors_query_options() assert response.day == [ - DayItem(label="2020-01-09", value="2020-01-09"), - DayItem(label="2020-01-10", value="2020-01-10"), - DayItem(label="2020-01-11", value="2020-01-11"), - DayItem(label="2020-01-12", value="2020-01-12"), - DayItem(label="2020-01-13", value="2020-01-13"), - DayItem(label="2020-01-14", value="2020-01-14"), - DayItem(label="2020-01-15", value="2020-01-15"), - DayItem(label="2020-01-16", value="2020-01-16"), - DayItem(label="2020-01-17", value="2020-01-17"), - DayItem(label="2020-01-18", value="2020-01-18"), - DayItem(label="2020-01-19", value="2020-01-19"), - DayItem(label="2020-01-20", value="2020-01-20"), + DayItem(label="9-Jan-2020", value=datetime(2020, 1, 9, 0, 0, tzinfo=zoneinfo.ZoneInfo(key="UTC"))), + DayItem(label="10-Jan-2020", value=datetime(2020, 1, 10, 0, 0, tzinfo=zoneinfo.ZoneInfo(key="UTC"))), + DayItem(label="11-Jan-2020", value=datetime(2020, 1, 11, 0, 0, tzinfo=zoneinfo.ZoneInfo(key="UTC"))), + DayItem(label="12-Jan-2020", value=datetime(2020, 1, 12, 0, 0, tzinfo=zoneinfo.ZoneInfo(key="UTC"))), + DayItem(label="13-Jan-2020", value=datetime(2020, 1, 13, 0, 0, tzinfo=zoneinfo.ZoneInfo(key="UTC"))), + DayItem(label="14-Jan-2020", value=datetime(2020, 1, 14, 0, 0, tzinfo=zoneinfo.ZoneInfo(key="UTC"))), + DayItem(label="15-Jan-2020", value=datetime(2020, 1, 15, 0, 0, tzinfo=zoneinfo.ZoneInfo(key="UTC"))), + DayItem(label="16-Jan-2020", value=datetime(2020, 1, 16, 0, 0, tzinfo=zoneinfo.ZoneInfo(key="UTC"))), + DayItem(label="17-Jan-2020", value=datetime(2020, 1, 17, 0, 0, tzinfo=zoneinfo.ZoneInfo(key="UTC"))), + DayItem(label="18-Jan-2020", value=datetime(2020, 1, 18, 0, 0, tzinfo=zoneinfo.ZoneInfo(key="UTC"))), + DayItem(label="19-Jan-2020", value=datetime(2020, 1, 19, 0, 0, tzinfo=zoneinfo.ZoneInfo(key="UTC"))), + DayItem(label="20-Jan-2020", value=datetime(2020, 1, 20, 0, 0, tzinfo=zoneinfo.ZoneInfo(key="UTC"))), ] assert response.breakdown is None diff --git a/posthog/hogql_queries/insights/trends/trends_query_runner.py b/posthog/hogql_queries/insights/trends/trends_query_runner.py index d61720740f52b..67e160084e68e 100644 --- a/posthog/hogql_queries/insights/trends/trends_query_runner.py +++ b/posthog/hogql_queries/insights/trends/trends_query_runner.py @@ -183,7 +183,13 @@ def to_actors_query_options(self) -> InsightActorsQueryOptionsResponse: res_compare: List[CompareItem] | None = None # Days - res_days: List[DayItem] = [DayItem(label=day, value=day) for day in self.query_date_range.all_values()] + res_days: list[DayItem] = [ + DayItem( + label=format_label_date(value, self.query_date_range.interval_name), + value=value, + ) + for value in self.query_date_range.all_values() + ] # Series for index, series in enumerate(self.query.series): diff --git a/posthog/hogql_queries/utils/query_date_range.py b/posthog/hogql_queries/utils/query_date_range.py index 5453d878b4017..b6386ac85f4ed 100644 --- a/posthog/hogql_queries/utils/query_date_range.py +++ b/posthog/hogql_queries/utils/query_date_range.py @@ -1,7 +1,7 @@ import re from datetime import datetime, timedelta from functools import cached_property -from typing import Literal, Optional, Dict, List +from typing import Literal, Optional, Dict from zoneinfo import ZoneInfo from dateutil.parser import parse @@ -140,16 +140,15 @@ def interval_relativedelta(self) -> relativedelta: hours=1 if self.interval_name == "hour" else 0, ) - def all_values(self) -> List[str]: + def all_values(self) -> list[datetime]: start = self.align_with_interval(self.date_from()) end: datetime = self.date_to() - values: List[str] = [] + delta = self.interval_relativedelta() + + values: list[datetime] = [] while start <= end: - if self.interval_name == "hour": - values.append(start.strftime("%Y-%m-%d %H:%M:%S")) - else: - values.append(start.strftime("%Y-%m-%d")) - start += self.interval_relativedelta() + values.append(start) + start += delta return values def date_to_as_hogql(self) -> ast.Expr: diff --git a/posthog/hogql_queries/utils/test/test_query_date_range.py b/posthog/hogql_queries/utils/test/test_query_date_range.py index fd38ef700e137..f377e06880bbe 100644 --- a/posthog/hogql_queries/utils/test/test_query_date_range.py +++ b/posthog/hogql_queries/utils/test/test_query_date_range.py @@ -61,32 +61,47 @@ def test_all_values(self): QueryDateRange( team=self.team, date_range=DateRange(date_from="-20h"), interval=IntervalType.day, now=now ).all_values(), - ["2021-08-24", "2021-08-25"], + [parser.isoparse("2021-08-24T00:00:00Z"), parser.isoparse("2021-08-25T00:00:00Z")], ) self.assertEqual( QueryDateRange( team=self.team, date_range=DateRange(date_from="-20d"), interval=IntervalType.week, now=now ).all_values(), - ["2021-08-01", "2021-08-08", "2021-08-15", "2021-08-22"], + [ + parser.isoparse("2021-08-01T00:00:00Z"), + parser.isoparse("2021-08-08T00:00:00Z"), + parser.isoparse("2021-08-15T00:00:00Z"), + parser.isoparse("2021-08-22T00:00:00Z"), + ], ) self.team.week_start_day = WeekStartDay.MONDAY self.assertEqual( QueryDateRange( team=self.team, date_range=DateRange(date_from="-20d"), interval=IntervalType.week, now=now ).all_values(), - ["2021-08-02", "2021-08-09", "2021-08-16", "2021-08-23"], + [ + parser.isoparse("2021-08-02T00:00:00Z"), + parser.isoparse("2021-08-09T00:00:00Z"), + parser.isoparse("2021-08-16T00:00:00Z"), + parser.isoparse("2021-08-23T00:00:00Z"), + ], ) self.assertEqual( QueryDateRange( team=self.team, date_range=DateRange(date_from="-50d"), interval=IntervalType.month, now=now ).all_values(), - ["2021-07-01", "2021-08-01"], + [parser.isoparse("2021-07-01T00:00:00Z"), parser.isoparse("2021-08-01T00:00:00Z")], ) self.assertEqual( QueryDateRange( team=self.team, date_range=DateRange(date_from="-3h"), interval=IntervalType.hour, now=now ).all_values(), - ["2021-08-24 21:00:00", "2021-08-24 22:00:00", "2021-08-24 23:00:00", "2021-08-25 00:00:00"], + [ + parser.isoparse("2021-08-24T21:00:00Z"), + parser.isoparse("2021-08-24T22:00:00Z"), + parser.isoparse("2021-08-24T23:00:00Z"), + parser.isoparse("2021-08-25T00:00:00Z"), + ], ) diff --git a/posthog/management/commands/backfill_sessions_table.py b/posthog/management/commands/backfill_sessions_table.py index 798a501eb5b60..c01f4b6159749 100644 --- a/posthog/management/commands/backfill_sessions_table.py +++ b/posthog/management/commands/backfill_sessions_table.py @@ -16,6 +16,10 @@ TARGET_TABLE = "sessions" +SETTINGS = { + "max_execution_time": 3600 # 1 hour +} + @dataclass class BackfillQuery: @@ -26,6 +30,7 @@ class BackfillQuery: def execute( self, dry_run: bool = True, + print_counts: bool = True, ) -> None: def source_column(column_name: str) -> str: return get_property_string_expr( @@ -108,13 +113,14 @@ def select_query(select_date: Optional[datetime] = None) -> str: """ # print the count of entries in the main sessions table - count_query = f"SELECT count(), uniq(session_id) FROM {TARGET_TABLE}" - [(sessions_row_count, sessions_event_count)] = sync_execute(count_query) - logger.info(f"{sessions_row_count} rows and {sessions_event_count} unique session_ids in sessions table") + if print_counts: + count_query = f"SELECT count(), uniq(session_id) FROM {TARGET_TABLE}" + [(sessions_row_count, sessions_event_count)] = sync_execute(count_query, settings=SETTINGS) + logger.info(f"{sessions_row_count} rows and {sessions_event_count} unique session_ids in sessions table") if dry_run: count_query = f"SELECT count(), uniq(session_id) FROM ({select_query()})" - [(events_count, sessions_count)] = sync_execute(count_query) + [(events_count, sessions_count)] = sync_execute(count_query, settings=SETTINGS) logger.info(f"{events_count} events and {sessions_count} sessions to backfill for") logger.info(f"The first select query would be:\n{select_query(self.start_date)}") return @@ -125,12 +131,14 @@ def select_query(select_date: Optional[datetime] = None) -> str: sync_execute( query=f"""INSERT INTO writable_sessions {select_query(select_date=date)} SETTINGS max_execution_time=3600""", workload=Workload.OFFLINE if self.use_offline_workload else Workload.DEFAULT, + settings=SETTINGS, ) # print the count of entries in the main sessions table - count_query = f"SELECT count(), uniq(session_id) FROM {TARGET_TABLE}" - [(sessions_row_count, sessions_event_count)] = sync_execute(count_query) - logger.info(f"{sessions_row_count} rows and {sessions_event_count} unique session_ids in sessions table") + if print_counts: + count_query = f"SELECT count(), uniq(session_id) FROM {TARGET_TABLE}" + [(sessions_row_count, sessions_event_count)] = sync_execute(count_query, settings=SETTINGS) + logger.info(f"{sessions_row_count} rows and {sessions_event_count} unique session_ids in sessions table") class Command(BaseCommand): @@ -149,11 +157,25 @@ def add_arguments(self, parser): parser.add_argument( "--use-offline-workload", action="store_true", help="actually execute INSERT queries (default is dry-run)" ) + parser.add_argument( + "--print-counts", action="store_true", help="print events and session count beforehand and afterwards" + ) - def handle(self, *, live_run: bool, start_date: str, end_date: str, use_offline_workload: bool, **options): + def handle( + self, + *, + live_run: bool, + start_date: str, + end_date: str, + use_offline_workload: bool, + print_counts: bool, + **options, + ): logger.setLevel(logging.INFO) start_datetime = datetime.strptime(start_date, "%Y-%m-%d") end_datetime = datetime.strptime(end_date, "%Y-%m-%d") - BackfillQuery(start_datetime, end_datetime, use_offline_workload).execute(dry_run=not live_run) + BackfillQuery(start_datetime, end_datetime, use_offline_workload).execute( + dry_run=not live_run, print_counts=print_counts + ) diff --git a/posthog/schema.py b/posthog/schema.py index dc77da163db17..9d83587351683 100644 --- a/posthog/schema.py +++ b/posthog/schema.py @@ -180,6 +180,10 @@ class DateRange(BaseModel): date_to: Optional[str] = None +class DatetimeDay(RootModel[AwareDatetime]): + root: AwareDatetime + + class Day(RootModel[int]): root: int @@ -418,6 +422,7 @@ class PersonsOnEventsMode(str, Enum): v1_enabled = "v1_enabled" v1_mixed = "v1_mixed" v2_enabled = "v2_enabled" + v3_enabled = "v3_enabled" class HogQLQueryModifiers(BaseModel): @@ -457,7 +462,7 @@ class DayItem(BaseModel): extra="forbid", ) label: str - value: Union[str, int] + value: Union[str, AwareDatetime, int] class IntervalItem(BaseModel): diff --git a/posthog/settings/feature_flags.py b/posthog/settings/feature_flags.py index 5e1ad234e6de4..371f497376663 100644 --- a/posthog/settings/feature_flags.py +++ b/posthog/settings/feature_flags.py @@ -8,5 +8,4 @@ "simplify-actions", "historical-exports-v2", "ingestion-warnings-enabled", - "role-based-access", ] diff --git a/posthog/tasks/tasks.py b/posthog/tasks/tasks.py index 5eff6afd33fe2..bead27cbd1eec 100644 --- a/posthog/tasks/tasks.py +++ b/posthog/tasks/tasks.py @@ -1,5 +1,5 @@ import time -from typing import Any, Optional +from typing import Optional from uuid import UUID from celery import shared_task @@ -9,6 +9,7 @@ from prometheus_client import Gauge from posthog.cloud_utils import is_cloud +from posthog.hogql.constants import LimitContext from posthog.metrics import pushed_metrics_registry from posthog.ph_client import get_ph_client from posthog.redis import get_client @@ -33,7 +34,12 @@ def redis_heartbeat() -> None: @shared_task(ignore_result=True, queue=CeleryQueue.ANALYTICS_QUERIES.value) def process_query_task( - team_id: str, query_id: str, query_json: Any, limit_context: Any = None, refresh_requested: bool = False + team_id: int, + user_id: int, + query_id: str, + query_json: dict, + limit_context: Optional[LimitContext] = None, + refresh_requested: bool = False, ) -> None: """ Kick off query @@ -43,6 +49,7 @@ def process_query_task( execute_process_query( team_id=team_id, + user_id=user_id, query_id=query_id, query_json=query_json, limit_context=limit_context, diff --git a/posthog/temporal/batch_exports/batch_exports.py b/posthog/temporal/batch_exports/batch_exports.py index c40950c654426..88cf9e32f274f 100644 --- a/posthog/temporal/batch_exports/batch_exports.py +++ b/posthog/temporal/batch_exports/batch_exports.py @@ -1,15 +1,10 @@ import collections.abc -import csv import dataclasses import datetime as dt -import gzip -import tempfile import typing import uuid from string import Template -import brotli -import orjson import pyarrow as pa from asgiref.sync import sync_to_async from django.conf import settings @@ -286,202 +281,6 @@ def get_data_interval(interval: str, data_interval_end: str | None) -> tuple[dt. return (data_interval_start_dt, data_interval_end_dt) -def json_dumps_bytes(d) -> bytes: - return orjson.dumps(d, default=str) - - -class BatchExportTemporaryFile: - """A TemporaryFile used to as an intermediate step while exporting data. - - This class does not implement the file-like interface but rather passes any calls - to the underlying tempfile.NamedTemporaryFile. We do override 'write' methods - to allow tracking bytes and records. - """ - - def __init__( - self, - mode: str = "w+b", - buffering=-1, - compression: str | None = None, - encoding: str | None = None, - newline: str | None = None, - suffix: str | None = None, - prefix: str | None = None, - dir: str | None = None, - *, - errors: str | None = None, - ): - self._file = tempfile.NamedTemporaryFile( - mode=mode, - encoding=encoding, - newline=newline, - buffering=buffering, - suffix=suffix, - prefix=prefix, - dir=dir, - errors=errors, - ) - self.compression = compression - self.bytes_total = 0 - self.records_total = 0 - self.bytes_since_last_reset = 0 - self.records_since_last_reset = 0 - self._brotli_compressor = None - - def __getattr__(self, name): - """Pass get attr to underlying tempfile.NamedTemporaryFile.""" - return self._file.__getattr__(name) - - def __enter__(self): - """Context-manager protocol enter method.""" - self._file.__enter__() - return self - - def __exit__(self, exc, value, tb): - """Context-manager protocol exit method.""" - return self._file.__exit__(exc, value, tb) - - def __iter__(self): - yield from self._file - - @property - def brotli_compressor(self): - if self._brotli_compressor is None: - self._brotli_compressor = brotli.Compressor() - return self._brotli_compressor - - def compress(self, content: bytes | str) -> bytes: - if isinstance(content, str): - encoded = content.encode("utf-8") - else: - encoded = content - - match self.compression: - case "gzip": - return gzip.compress(encoded) - case "brotli": - self.brotli_compressor.process(encoded) - return self.brotli_compressor.flush() - case None: - return encoded - case _: - raise ValueError(f"Unsupported compression: '{self.compression}'") - - def write(self, content: bytes | str): - """Write bytes to underlying file keeping track of how many bytes were written.""" - compressed_content = self.compress(content) - - if "b" in self.mode: - result = self._file.write(compressed_content) - else: - result = self._file.write(compressed_content.decode("utf-8")) - - self.bytes_total += result - self.bytes_since_last_reset += result - - return result - - def write_record_as_bytes(self, record: bytes): - result = self.write(record) - - self.records_total += 1 - self.records_since_last_reset += 1 - - return result - - def write_records_to_jsonl(self, records): - """Write records to a temporary file as JSONL.""" - if len(records) == 1: - jsonl_dump = orjson.dumps(records[0], option=orjson.OPT_APPEND_NEWLINE, default=str) - else: - jsonl_dump = b"\n".join(map(json_dumps_bytes, records)) - - result = self.write(jsonl_dump) - - self.records_total += len(records) - self.records_since_last_reset += len(records) - - return result - - def write_records_to_csv( - self, - records, - fieldnames: None | collections.abc.Sequence[str] = None, - extrasaction: typing.Literal["raise", "ignore"] = "ignore", - delimiter: str = ",", - quotechar: str = '"', - escapechar: str | None = "\\", - lineterminator: str = "\n", - quoting=csv.QUOTE_NONE, - ): - """Write records to a temporary file as CSV.""" - if len(records) == 0: - return - - if fieldnames is None: - fieldnames = list(records[0].keys()) - - writer = csv.DictWriter( - self, - fieldnames=fieldnames, - extrasaction=extrasaction, - delimiter=delimiter, - quotechar=quotechar, - escapechar=escapechar, - quoting=quoting, - lineterminator=lineterminator, - ) - writer.writerows(records) - - self.records_total += len(records) - self.records_since_last_reset += len(records) - - def write_records_to_tsv( - self, - records, - fieldnames: None | list[str] = None, - extrasaction: typing.Literal["raise", "ignore"] = "ignore", - quotechar: str = '"', - escapechar: str | None = "\\", - lineterminator: str = "\n", - quoting=csv.QUOTE_NONE, - ): - """Write records to a temporary file as TSV.""" - return self.write_records_to_csv( - records, - fieldnames=fieldnames, - extrasaction=extrasaction, - delimiter="\t", - quotechar=quotechar, - escapechar=escapechar, - quoting=quoting, - lineterminator=lineterminator, - ) - - def rewind(self): - """Rewind the file before reading it.""" - if self.compression == "brotli": - result = self._file.write(self.brotli_compressor.finish()) - - self.bytes_total += result - self.bytes_since_last_reset += result - - self._brotli_compressor = None - - self._file.seek(0) - - def reset(self): - """Reset underlying file by truncating it. - - Also resets the tracker attributes for bytes and records since last reset. - """ - self._file.seek(0) - self._file.truncate() - - self.bytes_since_last_reset = 0 - self.records_since_last_reset = 0 - - @dataclasses.dataclass class CreateBatchExportRunInputs: """Inputs to the create_export_run activity. diff --git a/posthog/temporal/batch_exports/bigquery_batch_export.py b/posthog/temporal/batch_exports/bigquery_batch_export.py index a0469de79bb9e..b754a7add16b4 100644 --- a/posthog/temporal/batch_exports/bigquery_batch_export.py +++ b/posthog/temporal/batch_exports/bigquery_batch_export.py @@ -15,7 +15,6 @@ from posthog.batch_exports.service import BatchExportField, BatchExportSchema, BigQueryBatchExportInputs from posthog.temporal.batch_exports.base import PostHogWorkflow from posthog.temporal.batch_exports.batch_exports import ( - BatchExportTemporaryFile, CreateBatchExportRunInputs, UpdateBatchExportRunStatusInputs, create_export_run, @@ -29,6 +28,9 @@ get_bytes_exported_metric, get_rows_exported_metric, ) +from posthog.temporal.batch_exports.temporary_file import ( + BatchExportTemporaryFile, +) from posthog.temporal.batch_exports.utils import peek_first_and_rewind from posthog.temporal.common.clickhouse import get_client from posthog.temporal.common.logger import bind_temporal_worker_logger diff --git a/posthog/temporal/batch_exports/http_batch_export.py b/posthog/temporal/batch_exports/http_batch_export.py index 8aca65c80ff38..2866d50c99876 100644 --- a/posthog/temporal/batch_exports/http_batch_export.py +++ b/posthog/temporal/batch_exports/http_batch_export.py @@ -13,7 +13,6 @@ from posthog.models import BatchExportRun from posthog.temporal.batch_exports.base import PostHogWorkflow from posthog.temporal.batch_exports.batch_exports import ( - BatchExportTemporaryFile, CreateBatchExportRunInputs, UpdateBatchExportRunStatusInputs, create_export_run, @@ -21,12 +20,15 @@ get_data_interval, get_rows_count, iter_records, - json_dumps_bytes, ) from posthog.temporal.batch_exports.metrics import ( get_bytes_exported_metric, get_rows_exported_metric, ) +from posthog.temporal.batch_exports.temporary_file import ( + BatchExportTemporaryFile, + json_dumps_bytes, +) from posthog.temporal.common.clickhouse import get_client from posthog.temporal.common.logger import bind_temporal_worker_logger diff --git a/posthog/temporal/batch_exports/postgres_batch_export.py b/posthog/temporal/batch_exports/postgres_batch_export.py index 5dbfc6faa4acf..98969ee78de79 100644 --- a/posthog/temporal/batch_exports/postgres_batch_export.py +++ b/posthog/temporal/batch_exports/postgres_batch_export.py @@ -17,7 +17,6 @@ from posthog.batch_exports.service import BatchExportField, BatchExportSchema, PostgresBatchExportInputs from posthog.temporal.batch_exports.base import PostHogWorkflow from posthog.temporal.batch_exports.batch_exports import ( - BatchExportTemporaryFile, CreateBatchExportRunInputs, UpdateBatchExportRunStatusInputs, create_export_run, @@ -31,6 +30,9 @@ get_bytes_exported_metric, get_rows_exported_metric, ) +from posthog.temporal.batch_exports.temporary_file import ( + BatchExportTemporaryFile, +) from posthog.temporal.batch_exports.utils import peek_first_and_rewind from posthog.temporal.common.clickhouse import get_client from posthog.temporal.common.logger import bind_temporal_worker_logger diff --git a/posthog/temporal/batch_exports/s3_batch_export.py b/posthog/temporal/batch_exports/s3_batch_export.py index 4d99cbeffd7c3..e83fe3f12915d 100644 --- a/posthog/temporal/batch_exports/s3_batch_export.py +++ b/posthog/temporal/batch_exports/s3_batch_export.py @@ -1,4 +1,5 @@ import asyncio +import collections.abc import contextlib import datetime as dt import io @@ -8,6 +9,8 @@ from dataclasses import dataclass import aioboto3 +import orjson +import pyarrow as pa from django.conf import settings from temporalio import activity, workflow from temporalio.common import RetryPolicy @@ -16,7 +19,6 @@ from posthog.batch_exports.service import BatchExportField, BatchExportSchema, S3BatchExportInputs from posthog.temporal.batch_exports.base import PostHogWorkflow from posthog.temporal.batch_exports.batch_exports import ( - BatchExportTemporaryFile, CreateBatchExportRunInputs, UpdateBatchExportRunStatusInputs, create_export_run, @@ -30,6 +32,15 @@ get_bytes_exported_metric, get_rows_exported_metric, ) +from posthog.temporal.batch_exports.temporary_file import ( + BatchExportTemporaryFile, + BatchExportWriter, + FlushCallable, + JSONLBatchExportWriter, + ParquetBatchExportWriter, + UnsupportedFileFormatError, +) +from posthog.temporal.batch_exports.utils import peek_first_and_rewind from posthog.temporal.common.clickhouse import get_client from posthog.temporal.common.logger import bind_temporal_worker_logger @@ -50,19 +61,31 @@ def get_allowed_template_variables(inputs) -> dict[str, str]: } +FILE_FORMAT_EXTENSIONS = { + "Parquet": "parquet", + "JSONLines": "jsonl", +} + +COMPRESSION_EXTENSIONS = { + "gzip": "gz", + "snappy": "sz", + "brotli": "br", + "ztsd": "zst", + "lz4": "lz4", +} + + def get_s3_key(inputs) -> str: """Return an S3 key given S3InsertInputs.""" template_variables = get_allowed_template_variables(inputs) key_prefix = inputs.prefix.format(**template_variables) + file_extension = FILE_FORMAT_EXTENSIONS[inputs.file_format] base_file_name = f"{inputs.data_interval_start}-{inputs.data_interval_end}" - match inputs.compression: - case "gzip": - file_name = base_file_name + ".jsonl.gz" - case "brotli": - file_name = base_file_name + ".jsonl.br" - case _: - file_name = base_file_name + ".jsonl" + if inputs.compression is not None: + file_name = base_file_name + f".{file_extension}.{COMPRESSION_EXTENSIONS[inputs.compression]}" + else: + file_name = base_file_name + f".{file_extension}" key = posixpath.join(key_prefix, file_name) @@ -311,6 +334,8 @@ class S3InsertInputs: kms_key_id: str | None = None batch_export_schema: BatchExportSchema | None = None endpoint_url: str | None = None + # TODO: In Python 3.11, this could be a enum.StrEnum. + file_format: str = "JSONLines" async def initialize_and_resume_multipart_upload(inputs: S3InsertInputs) -> tuple[S3MultiPartUpload, str]: @@ -451,7 +476,7 @@ async def insert_into_s3_activity(inputs: S3InsertInputs) -> int: last_uploaded_part_timestamp: str | None = None - async def worker_shutdown_handler(): + async def worker_shutdown_handler() -> None: """Handle the Worker shutting down by heart-beating our latest status.""" await activity.wait_for_worker_shutdown() logger.warn( @@ -466,50 +491,147 @@ async def worker_shutdown_handler(): asyncio.create_task(worker_shutdown_handler()) - record = None - async with s3_upload as s3_upload: - with BatchExportTemporaryFile(compression=inputs.compression) as local_results_file: + + async def flush_to_s3( + local_results_file, + records_since_last_flush: int, + bytes_since_last_flush: int, + last_inserted_at: dt.datetime, + last: bool, + ): + nonlocal last_uploaded_part_timestamp + + logger.debug( + "Uploading %s part %s containing %s records with size %s bytes", + "last " if last else "", + s3_upload.part_number + 1, + records_since_last_flush, + bytes_since_last_flush, + ) + + await s3_upload.upload_part(local_results_file) + rows_exported.add(records_since_last_flush) + bytes_exported.add(bytes_since_last_flush) + + last_uploaded_part_timestamp = str(last_inserted_at) + activity.heartbeat(last_uploaded_part_timestamp, s3_upload.to_state()) + + first_record_batch, record_iterator = peek_first_and_rewind(record_iterator) + first_record_batch = cast_record_batch_json_columns(first_record_batch) + column_names = first_record_batch.column_names + column_names.pop(column_names.index("_inserted_at")) + + schema = pa.schema( + # NOTE: For some reason, some batches set non-nullable fields as non-nullable, whereas other + # record batches have them as nullable. + # Until we figure it out, we set all fields to nullable. There are some fields we know + # are not nullable, but I'm opting for the more flexible option until we out why schemas differ + # between batches. + [field.with_nullable(True) for field in first_record_batch.select(column_names).schema] + ) + + writer = get_batch_export_writer( + inputs, + flush_callable=flush_to_s3, + max_bytes=settings.BATCH_EXPORT_S3_UPLOAD_CHUNK_SIZE_BYTES, + schema=schema, + ) + + async with writer.open_temporary_file(): rows_exported = get_rows_exported_metric() bytes_exported = get_bytes_exported_metric() - async def flush_to_s3(last_uploaded_part_timestamp: str, last=False): - logger.debug( - "Uploading %s part %s containing %s records with size %s bytes", - "last " if last else "", - s3_upload.part_number + 1, - local_results_file.records_since_last_reset, - local_results_file.bytes_since_last_reset, - ) + for record_batch in record_iterator: + record_batch = cast_record_batch_json_columns(record_batch) - await s3_upload.upload_part(local_results_file) - rows_exported.add(local_results_file.records_since_last_reset) - bytes_exported.add(local_results_file.bytes_since_last_reset) + await writer.write_record_batch(record_batch) - activity.heartbeat(last_uploaded_part_timestamp, s3_upload.to_state()) + await s3_upload.complete() - for record_batch in record_iterator: - for record in record_batch.to_pylist(): - for json_column in ("properties", "person_properties", "set", "set_once"): - if (json_str := record.get(json_column, None)) is not None: - record[json_column] = json.loads(json_str) + return writer.records_total - inserted_at = record.pop("_inserted_at") - local_results_file.write_records_to_jsonl([record]) +def get_batch_export_writer( + inputs: S3InsertInputs, flush_callable: FlushCallable, max_bytes: int, schema: pa.Schema | None = None +) -> BatchExportWriter: + """Return the `BatchExportWriter` corresponding to configured `file_format`. - if local_results_file.tell() > settings.BATCH_EXPORT_S3_UPLOAD_CHUNK_SIZE_BYTES: - last_uploaded_part_timestamp = str(inserted_at) - await flush_to_s3(last_uploaded_part_timestamp) - local_results_file.reset() + Raises: + UnsupportedFileFormatError: If no writer exists for given `file_format`. + """ + writer: BatchExportWriter - if local_results_file.tell() > 0 and record is not None: - last_uploaded_part_timestamp = str(inserted_at) - await flush_to_s3(last_uploaded_part_timestamp, last=True) + if inputs.file_format == "Parquet": + writer = ParquetBatchExportWriter( + max_bytes=max_bytes, + flush_callable=flush_callable, + compression=inputs.compression, + schema=schema, + ) + elif inputs.file_format == "JSONLines": + writer = JSONLBatchExportWriter( + max_bytes=settings.BATCH_EXPORT_S3_UPLOAD_CHUNK_SIZE_BYTES, + flush_callable=flush_callable, + compression=inputs.compression, + ) + else: + raise UnsupportedFileFormatError(inputs.file_format, "S3") - await s3_upload.complete() + return writer + + +def cast_record_batch_json_columns( + record_batch: pa.RecordBatch, + json_columns: collections.abc.Sequence = ("properties", "person_properties", "set", "set_once"), +) -> pa.RecordBatch: + """Cast json_columns in record_batch to JsonType. + + We return a new RecordBatch with any json_columns replaced by fields casted to JsonType. + Casting is not copying the underlying array buffers, so memory usage does not increase when creating + the new array or the new record batch. + """ + column_names = set(record_batch.column_names) + intersection = column_names & set(json_columns) + + casted_arrays = [] + for array in record_batch.select(intersection): + if pa.types.is_string(array.type): + casted_array = array.cast(JsonType()) + casted_arrays.append(casted_array) + + remaining_column_names = list(column_names - intersection) + return pa.RecordBatch.from_arrays( + record_batch.select(remaining_column_names).columns + casted_arrays, + names=remaining_column_names + list(intersection), + ) + + +class JsonScalar(pa.ExtensionScalar): + """Represents a JSON binary string.""" + + def as_py(self) -> dict | None: + if self.value: + return orjson.loads(self.value.as_py().encode("utf-8")) + else: + return None + + +class JsonType(pa.ExtensionType): + """Type for JSON binary strings.""" + + def __init__(self): + super().__init__(pa.string(), "json") + + def __arrow_ext_serialize__(self): + return b"" + + @classmethod + def __arrow_ext_deserialize__(self, storage_type, serialized): + return JsonType() - return local_results_file.records_total + def __arrow_ext_scalar_class__(self): + return JsonScalar @workflow.defn(name="s3-export") @@ -572,6 +694,7 @@ async def run(self, inputs: S3BatchExportInputs): encryption=inputs.encryption, kms_key_id=inputs.kms_key_id, batch_export_schema=inputs.batch_export_schema, + file_format=inputs.file_format, ) await execute_batch_export_insert_activity( diff --git a/posthog/temporal/batch_exports/snowflake_batch_export.py b/posthog/temporal/batch_exports/snowflake_batch_export.py index be94eca89a799..9053f3e1006ad 100644 --- a/posthog/temporal/batch_exports/snowflake_batch_export.py +++ b/posthog/temporal/batch_exports/snowflake_batch_export.py @@ -18,7 +18,6 @@ from posthog.batch_exports.service import BatchExportField, BatchExportSchema, SnowflakeBatchExportInputs from posthog.temporal.batch_exports.base import PostHogWorkflow from posthog.temporal.batch_exports.batch_exports import ( - BatchExportTemporaryFile, CreateBatchExportRunInputs, UpdateBatchExportRunStatusInputs, create_export_run, @@ -32,6 +31,9 @@ get_bytes_exported_metric, get_rows_exported_metric, ) +from posthog.temporal.batch_exports.temporary_file import ( + BatchExportTemporaryFile, +) from posthog.temporal.batch_exports.utils import peek_first_and_rewind from posthog.temporal.common.clickhouse import get_client from posthog.temporal.common.logger import bind_temporal_worker_logger diff --git a/posthog/temporal/batch_exports/temporary_file.py b/posthog/temporal/batch_exports/temporary_file.py new file mode 100644 index 0000000000000..f955f45553727 --- /dev/null +++ b/posthog/temporal/batch_exports/temporary_file.py @@ -0,0 +1,528 @@ +"""This module contains a temporary file to stage data in batch exports.""" +import abc +import collections.abc +import contextlib +import csv +import datetime as dt +import gzip +import tempfile +import typing + +import brotli +import orjson +import pyarrow as pa +import pyarrow.parquet as pq + + +def json_dumps_bytes(d) -> bytes: + return orjson.dumps(d, default=str) + + +class BatchExportTemporaryFile: + """A TemporaryFile used to as an intermediate step while exporting data. + + This class does not implement the file-like interface but rather passes any calls + to the underlying tempfile.NamedTemporaryFile. We do override 'write' methods + to allow tracking bytes and records. + """ + + def __init__( + self, + mode: str = "w+b", + buffering=-1, + compression: str | None = None, + encoding: str | None = None, + newline: str | None = None, + suffix: str | None = None, + prefix: str | None = None, + dir: str | None = None, + *, + errors: str | None = None, + ): + self._file = tempfile.NamedTemporaryFile( + mode=mode, + encoding=encoding, + newline=newline, + buffering=buffering, + suffix=suffix, + prefix=prefix, + dir=dir, + errors=errors, + ) + self.compression = compression + self.bytes_total = 0 + self.records_total = 0 + self.bytes_since_last_reset = 0 + self.records_since_last_reset = 0 + self._brotli_compressor = None + + def __getattr__(self, name): + """Pass get attr to underlying tempfile.NamedTemporaryFile.""" + return self._file.__getattr__(name) + + def __enter__(self): + """Context-manager protocol enter method.""" + self._file.__enter__() + return self + + def __exit__(self, exc, value, tb): + """Context-manager protocol exit method.""" + return self._file.__exit__(exc, value, tb) + + def __iter__(self): + yield from self._file + + @property + def brotli_compressor(self): + if self._brotli_compressor is None: + self._brotli_compressor = brotli.Compressor() + return self._brotli_compressor + + def finish_brotli_compressor(self): + """Flush remaining brotli bytes.""" + # TODO: Move compression out of `BatchExportTemporaryFile` to a standard class for all writers. + if self.compression != "brotli": + raise ValueError(f"Compression is '{self.compression}', not 'brotli'") + + result = self._file.write(self.brotli_compressor.finish()) + self.bytes_total += result + self.bytes_since_last_reset += result + self._brotli_compressor = None + + def compress(self, content: bytes | str) -> bytes: + if isinstance(content, str): + encoded = content.encode("utf-8") + else: + encoded = content + + match self.compression: + case "gzip": + return gzip.compress(encoded) + case "brotli": + self.brotli_compressor.process(encoded) + return self.brotli_compressor.flush() + case None: + return encoded + case _: + raise ValueError(f"Unsupported compression: '{self.compression}'") + + def write(self, content: bytes | str): + """Write bytes to underlying file keeping track of how many bytes were written.""" + compressed_content = self.compress(content) + + if "b" in self.mode: + result = self._file.write(compressed_content) + else: + result = self._file.write(compressed_content.decode("utf-8")) + + self.bytes_total += result + self.bytes_since_last_reset += result + + return result + + def write_record_as_bytes(self, record: bytes): + result = self.write(record) + + self.records_total += 1 + self.records_since_last_reset += 1 + + return result + + def write_records_to_jsonl(self, records): + """Write records to a temporary file as JSONL.""" + if len(records) == 1: + jsonl_dump = orjson.dumps(records[0], option=orjson.OPT_APPEND_NEWLINE, default=str) + else: + jsonl_dump = b"\n".join(map(json_dumps_bytes, records)) + + result = self.write(jsonl_dump) + + self.records_total += len(records) + self.records_since_last_reset += len(records) + + return result + + def write_records_to_csv( + self, + records, + fieldnames: None | collections.abc.Sequence[str] = None, + extrasaction: typing.Literal["raise", "ignore"] = "ignore", + delimiter: str = ",", + quotechar: str = '"', + escapechar: str | None = "\\", + lineterminator: str = "\n", + quoting=csv.QUOTE_NONE, + ): + """Write records to a temporary file as CSV.""" + if len(records) == 0: + return + + if fieldnames is None: + fieldnames = list(records[0].keys()) + + writer = csv.DictWriter( + self, + fieldnames=fieldnames, + extrasaction=extrasaction, + delimiter=delimiter, + quotechar=quotechar, + escapechar=escapechar, + quoting=quoting, + lineterminator=lineterminator, + ) + writer.writerows(records) + + self.records_total += len(records) + self.records_since_last_reset += len(records) + + def write_records_to_tsv( + self, + records, + fieldnames: None | list[str] = None, + extrasaction: typing.Literal["raise", "ignore"] = "ignore", + quotechar: str = '"', + escapechar: str | None = "\\", + lineterminator: str = "\n", + quoting=csv.QUOTE_NONE, + ): + """Write records to a temporary file as TSV.""" + return self.write_records_to_csv( + records, + fieldnames=fieldnames, + extrasaction=extrasaction, + delimiter="\t", + quotechar=quotechar, + escapechar=escapechar, + quoting=quoting, + lineterminator=lineterminator, + ) + + def rewind(self): + """Rewind the file before reading it.""" + self._file.seek(0) + + def reset(self): + """Reset underlying file by truncating it. + + Also resets the tracker attributes for bytes and records since last reset. + """ + self._file.seek(0) + self._file.truncate() + + self.bytes_since_last_reset = 0 + self.records_since_last_reset = 0 + + +LastInsertedAt = dt.datetime +IsLast = bool +RecordsSinceLastFlush = int +BytesSinceLastFlush = int +FlushCallable = collections.abc.Callable[ + [BatchExportTemporaryFile, RecordsSinceLastFlush, BytesSinceLastFlush, LastInsertedAt, IsLast], + collections.abc.Awaitable[None], +] + + +class UnsupportedFileFormatError(Exception): + """Raised when a writer for an unsupported file format is requested.""" + + def __init__(self, file_format: str, destination: str): + super().__init__(f"{file_format} is not a supported format for {destination} batch exports.") + + +class BatchExportWriter(abc.ABC): + """A temporary file writer to be used by batch export workflows. + + Subclasses should define `_write_record_batch` with the particular intricacies + of the format they are writing as. + + Actual writing calls are passed to the underlying `batch_export_file`. + + Attributes: + _batch_export_file: The temporary file we are writing to. + max_bytes: Flush the temporary file with the provided `flush_callable` + upon reaching or surpassing this threshold. Keep in mind we write on a RecordBatch + per RecordBatch basis, which means the threshold will be surpassed by at most the + size of a RecordBatch before a flush occurs. + flush_callable: A callback to flush the temporary file when `max_bytes` is reached. + The temporary file will be reset after calling `flush_callable`. When calling + `flush_callable` the following positional arguments will be passed: The temporary file + that must be flushed, the number of records since the last flush, the number of bytes + since the last flush, the latest recorded `_inserted_at`, and a `bool` indicating if + this is the last flush (when exiting the context manager). + file_kwargs: Optional keyword arguments passed when initializing `_batch_export_file`. + last_inserted_at: Latest `_inserted_at` written. This attribute leaks some implementation + details, as we are assuming assume `_inserted_at` is present, as it's added to all + batch export queries. + records_total: The total number of records (not RecordBatches!) written. + records_since_last_flush: The number of records written since last flush. + bytes_total: The total number of bytes written. + bytes_since_last_flush: The number of bytes written since last flush. + """ + + def __init__( + self, + flush_callable: FlushCallable, + max_bytes: int, + file_kwargs: collections.abc.Mapping[str, typing.Any] | None = None, + ): + self.flush_callable = flush_callable + self.max_bytes = max_bytes + self.file_kwargs: collections.abc.Mapping[str, typing.Any] = file_kwargs or {} + + self._batch_export_file: BatchExportTemporaryFile | None = None + self.reset_writer_tracking() + + def reset_writer_tracking(self): + """Reset this writer's tracking state.""" + self.last_inserted_at: dt.datetime | None = None + self.records_total = 0 + self.records_since_last_flush = 0 + self.bytes_total = 0 + self.bytes_since_last_flush = 0 + + @contextlib.asynccontextmanager + async def open_temporary_file(self): + """Explicitly open the temporary file this writer is writing to. + + The underlying `BatchExportTemporaryFile` is only accessible within this context manager. This helps + us separate the lifetime of the underlying temporary file from the writer: The writer may still be + accessed even after the temporary file is closed, while on the other hand we ensure the file and all + its data is flushed and not leaked outside the context. Any relevant tracking information is copied + to the writer. + """ + self.reset_writer_tracking() + + with BatchExportTemporaryFile(**self.file_kwargs) as temp_file: + self._batch_export_file = temp_file + + try: + yield + finally: + self.track_bytes_written(temp_file) + + if self.last_inserted_at is not None and self.bytes_since_last_flush > 0: + # `bytes_since_last_flush` should be 0 unless: + # 1. The last batch wasn't flushed as it didn't reach `max_bytes`. + # 2. The last batch was flushed but there was another write after the last call to + # `write_record_batch`. For example, footer bytes. + await self.flush(self.last_inserted_at, is_last=True) + + self._batch_export_file = None + + @property + def batch_export_file(self): + """Property for underlying temporary file. + + Raises: + ValueError: if attempting to access the temporary file before it has been opened. + """ + if self._batch_export_file is None: + raise ValueError("Batch export file is closed. Did you forget to call 'open_temporary_file'?") + return self._batch_export_file + + @abc.abstractmethod + def _write_record_batch(self, record_batch: pa.RecordBatch) -> None: + """Write a record batch to the underlying `BatchExportTemporaryFile`. + + Subclasses must override this to provide the actual implementation according to the supported + file format. + """ + pass + + def track_records_written(self, record_batch: pa.RecordBatch) -> None: + """Update this writer's state with the number of records in `record_batch`.""" + self.records_total += record_batch.num_rows + self.records_since_last_flush += record_batch.num_rows + + def track_bytes_written(self, batch_export_file: BatchExportTemporaryFile) -> None: + """Update this writer's state with the bytes in `batch_export_file`.""" + self.bytes_total = batch_export_file.bytes_total + self.bytes_since_last_flush = batch_export_file.bytes_since_last_reset + + async def write_record_batch(self, record_batch: pa.RecordBatch) -> None: + """Issue a record batch write tracking progress and flushing if required.""" + record_batch = record_batch.sort_by("_inserted_at") + last_inserted_at = record_batch.column("_inserted_at")[-1].as_py() + + column_names = record_batch.column_names + column_names.pop(column_names.index("_inserted_at")) + + self._write_record_batch(record_batch.select(column_names)) + + self.last_inserted_at = last_inserted_at + self.track_records_written(record_batch) + self.track_bytes_written(self.batch_export_file) + + if self.bytes_since_last_flush >= self.max_bytes: + await self.flush(last_inserted_at) + + async def flush(self, last_inserted_at: dt.datetime, is_last: bool = False) -> None: + """Call the provided `flush_callable` and reset underlying file. + + The underlying batch export temporary file will be reset after calling `flush_callable`. + """ + if is_last is True and self.batch_export_file.compression == "brotli": + self.batch_export_file.finish_brotli_compressor() + + self.batch_export_file.seek(0) + + await self.flush_callable( + self.batch_export_file, + self.records_since_last_flush, + self.bytes_since_last_flush, + last_inserted_at, + is_last, + ) + self.batch_export_file.reset() + + self.records_since_last_flush = 0 + self.bytes_since_last_flush = 0 + + +class JSONLBatchExportWriter(BatchExportWriter): + """A `BatchExportWriter` for JSONLines format. + + Attributes: + default: The default function to use to cast non-serializable Python objects to serializable objects. + By default, non-serializable objects will be cast to string via `str()`. + """ + + def __init__( + self, + max_bytes: int, + flush_callable: FlushCallable, + compression: None | str = None, + default: typing.Callable = str, + ): + super().__init__( + max_bytes=max_bytes, + flush_callable=flush_callable, + file_kwargs={"compression": compression}, + ) + + self.default = default + + def write(self, content: bytes) -> int: + """Write a single row of JSONL.""" + n = self.batch_export_file.write(orjson.dumps(content, default=str) + b"\n") + return n + + def _write_record_batch(self, record_batch: pa.RecordBatch) -> None: + """Write records to a temporary file as JSONL.""" + for record in record_batch.to_pylist(): + self.write(record) + + +class CSVBatchExportWriter(BatchExportWriter): + """A `BatchExportWriter` for CSV format.""" + + def __init__( + self, + max_bytes: int, + flush_callable: FlushCallable, + field_names: collections.abc.Sequence[str], + extras_action: typing.Literal["raise", "ignore"] = "ignore", + delimiter: str = ",", + quote_char: str = '"', + escape_char: str | None = "\\", + line_terminator: str = "\n", + quoting=csv.QUOTE_NONE, + compression: str | None = None, + ): + super().__init__( + max_bytes=max_bytes, + flush_callable=flush_callable, + file_kwargs={"compression": compression}, + ) + self.field_names = field_names + self.extras_action: typing.Literal["raise", "ignore"] = extras_action + self.delimiter = delimiter + self.quote_char = quote_char + self.escape_char = escape_char + self.line_terminator = line_terminator + self.quoting = quoting + + self._csv_writer: csv.DictWriter | None = None + + @property + def csv_writer(self) -> csv.DictWriter: + if self._csv_writer is None: + self._csv_writer = csv.DictWriter( + self.batch_export_file, + fieldnames=self.field_names, + extrasaction=self.extras_action, + delimiter=self.delimiter, + quotechar=self.quote_char, + escapechar=self.escape_char, + quoting=self.quoting, + lineterminator=self.line_terminator, + ) + + return self._csv_writer + + def _write_record_batch(self, record_batch: pa.RecordBatch) -> None: + """Write records to a temporary file as CSV.""" + self.csv_writer.writerows(record_batch.to_pylist()) + + +class ParquetBatchExportWriter(BatchExportWriter): + """A `BatchExportWriter` for Apache Parquet format. + + We utilize and wrap a `pyarrow.parquet.ParquetWriter` to do the actual writing. We default to their + defaults for most parameters; however this class could be extended with more attributes to pass along + to `pyarrow.parquet.ParquetWriter`. + + See the pyarrow docs for more details on what parameters can the writer be configured with: + https://arrow.apache.org/docs/python/generated/pyarrow.parquet.ParquetWriter.html + + In contrast to other writers, instead of us handling compression we let `pyarrow.parquet.ParquetWriter` + handle it, so `BatchExportTemporaryFile` is always initialized with `compression=None`. + + Attributes: + schema: The schema used by the Parquet file. Should match the schema of written RecordBatches. + compression: Compression codec passed to underlying `pyarrow.parquet.ParquetWriter`. + """ + + def __init__( + self, + max_bytes: int, + flush_callable: FlushCallable, + schema: pa.Schema, + compression: str | None = "snappy", + ): + super().__init__( + max_bytes=max_bytes, + flush_callable=flush_callable, + file_kwargs={"compression": None}, # ParquetWriter handles compression + ) + self.schema = schema + self.compression = compression + + self._parquet_writer: pq.ParquetWriter | None = None + + @property + def parquet_writer(self) -> pq.ParquetWriter: + if self._parquet_writer is None: + self._parquet_writer = pq.ParquetWriter( + self.batch_export_file, + schema=self.schema, + compression="none" if self.compression is None else self.compression, + ) + return self._parquet_writer + + @contextlib.asynccontextmanager + async def open_temporary_file(self): + """Ensure underlying Parquet writer is closed before flushing and closing temporary file.""" + async with super().open_temporary_file(): + try: + yield + finally: + if self._parquet_writer is not None: + self._parquet_writer.writer.close() + self._parquet_writer = None + + def _write_record_batch(self, record_batch: pa.RecordBatch) -> None: + """Write records to a temporary file as Parquet.""" + + self.parquet_writer.write_batch(record_batch.select(self.parquet_writer.schema.names)) diff --git a/posthog/temporal/tests/batch_exports/test_batch_exports.py b/posthog/temporal/tests/batch_exports/test_batch_exports.py index 0afbfcabb71cb..756c07e442e4f 100644 --- a/posthog/temporal/tests/batch_exports/test_batch_exports.py +++ b/posthog/temporal/tests/batch_exports/test_batch_exports.py @@ -1,6 +1,4 @@ -import csv import datetime as dt -import io import json import operator from random import randint @@ -9,11 +7,9 @@ from django.test import override_settings from posthog.temporal.batch_exports.batch_exports import ( - BatchExportTemporaryFile, get_data_interval, get_rows_count, iter_records, - json_dumps_bytes, ) from posthog.temporal.tests.utils.events import generate_test_events_in_clickhouse @@ -558,181 +554,3 @@ def test_get_data_interval(interval, data_interval_end, expected): """Test get_data_interval returns the expected data interval tuple.""" result = get_data_interval(interval, data_interval_end) assert result == expected - - -@pytest.mark.parametrize( - "to_write", - [ - (b"",), - (b"", b""), - (b"12345",), - (b"12345", b"12345"), - (b"abbcccddddeeeee",), - (b"abbcccddddeeeee", b"abbcccddddeeeee"), - ], -) -def test_batch_export_temporary_file_tracks_bytes(to_write): - """Test the bytes written by BatchExportTemporaryFile match expected.""" - with BatchExportTemporaryFile() as be_file: - for content in to_write: - be_file.write(content) - - assert be_file.bytes_total == sum(len(content) for content in to_write) - assert be_file.bytes_since_last_reset == sum(len(content) for content in to_write) - - be_file.reset() - - assert be_file.bytes_total == sum(len(content) for content in to_write) - assert be_file.bytes_since_last_reset == 0 - - -TEST_RECORDS = [ - [], - [ - {"id": "record-1", "property": "value", "property_int": 1}, - {"id": "record-2", "property": "another-value", "property_int": 2}, - { - "id": "record-3", - "property": {"id": "nested-record", "property": "nested-value"}, - "property_int": 3, - }, - ], -] - - -@pytest.mark.parametrize( - "records", - TEST_RECORDS, -) -def test_batch_export_temporary_file_write_records_to_jsonl(records): - """Test JSONL records written by BatchExportTemporaryFile match expected.""" - jsonl_dump = b"\n".join(map(json_dumps_bytes, records)) - - with BatchExportTemporaryFile() as be_file: - be_file.write_records_to_jsonl(records) - - assert be_file.bytes_total == len(jsonl_dump) - assert be_file.bytes_since_last_reset == len(jsonl_dump) - assert be_file.records_total == len(records) - assert be_file.records_since_last_reset == len(records) - - be_file.seek(0) - lines = be_file.readlines() - assert len(lines) == len(records) - - for line_index, jsonl_record in enumerate(lines): - json_loaded = json.loads(jsonl_record) - assert json_loaded == records[line_index] - - be_file.reset() - - assert be_file.bytes_total == len(jsonl_dump) - assert be_file.bytes_since_last_reset == 0 - assert be_file.records_total == len(records) - assert be_file.records_since_last_reset == 0 - - -@pytest.mark.parametrize( - "records", - TEST_RECORDS, -) -def test_batch_export_temporary_file_write_records_to_csv(records): - """Test CSV written by BatchExportTemporaryFile match expected.""" - in_memory_file_obj = io.StringIO() - writer = csv.DictWriter( - in_memory_file_obj, - fieldnames=records[0].keys() if len(records) > 0 else [], - delimiter=",", - quotechar='"', - escapechar="\\", - lineterminator="\n", - quoting=csv.QUOTE_NONE, - ) - writer.writerows(records) - - with BatchExportTemporaryFile(mode="w+") as be_file: - be_file.write_records_to_csv(records) - - assert be_file.bytes_total == in_memory_file_obj.tell() - assert be_file.bytes_since_last_reset == in_memory_file_obj.tell() - assert be_file.records_total == len(records) - assert be_file.records_since_last_reset == len(records) - - be_file.seek(0) - reader = csv.reader( - be_file._file, - delimiter=",", - quotechar='"', - escapechar="\\", - quoting=csv.QUOTE_NONE, - ) - - rows = [row for row in reader] - assert len(rows) == len(records) - - for row_index, csv_record in enumerate(rows): - for value_index, value in enumerate(records[row_index].values()): - # Everything returned by csv.reader is a str. - # This means type information is lost when writing to CSV - # but this just a limitation of the format. - assert csv_record[value_index] == str(value) - - be_file.reset() - - assert be_file.bytes_total == in_memory_file_obj.tell() - assert be_file.bytes_since_last_reset == 0 - assert be_file.records_total == len(records) - assert be_file.records_since_last_reset == 0 - - -@pytest.mark.parametrize( - "records", - TEST_RECORDS, -) -def test_batch_export_temporary_file_write_records_to_tsv(records): - """Test TSV written by BatchExportTemporaryFile match expected.""" - in_memory_file_obj = io.StringIO() - writer = csv.DictWriter( - in_memory_file_obj, - fieldnames=records[0].keys() if len(records) > 0 else [], - delimiter="\t", - quotechar='"', - escapechar="\\", - lineterminator="\n", - quoting=csv.QUOTE_NONE, - ) - writer.writerows(records) - - with BatchExportTemporaryFile(mode="w+") as be_file: - be_file.write_records_to_tsv(records) - - assert be_file.bytes_total == in_memory_file_obj.tell() - assert be_file.bytes_since_last_reset == in_memory_file_obj.tell() - assert be_file.records_total == len(records) - assert be_file.records_since_last_reset == len(records) - - be_file.seek(0) - reader = csv.reader( - be_file._file, - delimiter="\t", - quotechar='"', - escapechar="\\", - quoting=csv.QUOTE_NONE, - ) - - rows = [row for row in reader] - assert len(rows) == len(records) - - for row_index, csv_record in enumerate(rows): - for value_index, value in enumerate(records[row_index].values()): - # Everything returned by csv.reader is a str. - # This means type information is lost when writing to CSV - # but this just a limitation of the format. - assert csv_record[value_index] == str(value) - - be_file.reset() - - assert be_file.bytes_total == in_memory_file_obj.tell() - assert be_file.bytes_since_last_reset == 0 - assert be_file.records_total == len(records) - assert be_file.records_since_last_reset == 0 diff --git a/posthog/temporal/tests/batch_exports/test_s3_batch_export_workflow.py b/posthog/temporal/tests/batch_exports/test_s3_batch_export_workflow.py index e04e345d11245..e6583d049e2a8 100644 --- a/posthog/temporal/tests/batch_exports/test_s3_batch_export_workflow.py +++ b/posthog/temporal/tests/batch_exports/test_s3_batch_export_workflow.py @@ -10,10 +10,12 @@ import aioboto3 import botocore.exceptions import brotli +import pyarrow.parquet as pq import pytest import pytest_asyncio from django.conf import settings from django.test import override_settings +from pyarrow import fs from temporalio import activity from temporalio.client import WorkflowFailureError from temporalio.common import RetryPolicy @@ -27,6 +29,7 @@ update_export_run_status, ) from posthog.temporal.batch_exports.s3_batch_export import ( + FILE_FORMAT_EXTENSIONS, HeartbeatDetails, S3BatchExportInputs, S3BatchExportWorkflow, @@ -107,6 +110,15 @@ def s3_key_prefix(): return f"posthog-events-{str(uuid4())}" +@pytest.fixture +def file_format(request) -> str: + """S3 file format.""" + try: + return request.param + except AttributeError: + return f"JSONLines" + + async def delete_all_from_s3(minio_client, bucket_name: str, key_prefix: str): """Delete all objects in bucket_name under key_prefix.""" response = await minio_client.list_objects_v2(Bucket=bucket_name, Prefix=key_prefix) @@ -138,6 +150,61 @@ async def minio_client(bucket_name): await minio_client.delete_bucket(Bucket=bucket_name) +async def read_parquet_from_s3(bucket_name: str, key: str, json_columns) -> list: + async with aioboto3.Session().client("sts") as sts: + try: + await sts.get_caller_identity() + except botocore.exceptions.NoCredentialsError: + s3 = fs.S3FileSystem( + access_key="object_storage_root_user", + secret_key="object_storage_root_password", + endpoint_override=settings.OBJECT_STORAGE_ENDPOINT, + ) + + else: + if os.getenv("S3_TEST_BUCKET") is not None: + s3 = fs.S3FileSystem() + else: + s3 = fs.S3FileSystem( + access_key="object_storage_root_user", + secret_key="object_storage_root_password", + endpoint_override=settings.OBJECT_STORAGE_ENDPOINT, + ) + + table = pq.read_table(f"{bucket_name}/{key}", filesystem=s3) + + parquet_data = [] + for batch in table.to_batches(): + for record in batch.to_pylist(): + casted_record = {} + for k, v in record.items(): + if isinstance(v, dt.datetime): + # We read data from clickhouse as string, but parquet already casts them as dates. + # To facilitate comparison, we isoformat the dates. + casted_record[k] = v.isoformat() + elif k in json_columns and v is not None: + # Parquet doesn't have a variable map type, so JSON fields are just strings. + casted_record[k] = json.loads(v) + else: + casted_record[k] = v + parquet_data.append(casted_record) + + return parquet_data + + +def read_s3_data_as_json(data: bytes, compression: str | None) -> list: + match compression: + case "gzip": + data = gzip.decompress(data) + case "brotli": + data = brotli.decompress(data) + case _: + pass + + json_data = [json.loads(line) for line in data.decode("utf-8").split("\n") if line] + return json_data + + async def assert_clickhouse_records_in_s3( s3_compatible_client, clickhouse_client: ClickHouseClient, @@ -150,6 +217,7 @@ async def assert_clickhouse_records_in_s3( include_events: list[str] | None = None, batch_export_schema: BatchExportSchema | None = None, compression: str | None = None, + file_format: str = "JSONLines", ): """Assert ClickHouse records are written to JSON in key_prefix in S3 bucket_name. @@ -175,28 +243,24 @@ async def assert_clickhouse_records_in_s3( # Get the object. key = objects["Contents"][0].get("Key") assert key - s3_object = await s3_compatible_client.get_object(Bucket=bucket_name, Key=key) - data = await s3_object["Body"].read() - # Check that the data is correct. - match compression: - case "gzip": - data = gzip.decompress(data) - case "brotli": - data = brotli.decompress(data) - case _: - pass + json_columns = ("properties", "person_properties", "set", "set_once") - json_data = [json.loads(line) for line in data.decode("utf-8").split("\n") if line] - # Pull out the fields we inserted only + if file_format == "Parquet": + s3_data = await read_parquet_from_s3(bucket_name, key, json_columns) + + elif file_format == "JSONLines": + s3_object = await s3_compatible_client.get_object(Bucket=bucket_name, Key=key) + data = await s3_object["Body"].read() + s3_data = read_s3_data_as_json(data, compression) + else: + raise ValueError(f"Unsupported file format: {file_format}") if batch_export_schema is not None: schema_column_names = [field["alias"] for field in batch_export_schema["fields"]] else: schema_column_names = [field["alias"] for field in s3_default_fields()] - json_columns = ("properties", "person_properties", "set", "set_once") - expected_records = [] for record_batch in iter_records( client=clickhouse_client, @@ -225,9 +289,9 @@ async def assert_clickhouse_records_in_s3( expected_records.append(expected_record) - assert len(json_data) == len(expected_records) - assert json_data[0] == expected_records[0] - assert json_data == expected_records + assert len(s3_data) == len(expected_records) + assert s3_data[0] == expected_records[0] + assert s3_data == expected_records TEST_S3_SCHEMAS: list[BatchExportSchema | None] = [ @@ -255,6 +319,7 @@ async def assert_clickhouse_records_in_s3( @pytest.mark.parametrize("compression", [None, "gzip", "brotli"], indirect=True) @pytest.mark.parametrize("exclude_events", [None, ["test-exclude"]], indirect=True) @pytest.mark.parametrize("batch_export_schema", TEST_S3_SCHEMAS) +@pytest.mark.parametrize("file_format", FILE_FORMAT_EXTENSIONS.keys()) async def test_insert_into_s3_activity_puts_data_into_s3( clickhouse_client, bucket_name, @@ -262,6 +327,7 @@ async def test_insert_into_s3_activity_puts_data_into_s3( activity_environment, compression, exclude_events, + file_format, batch_export_schema: BatchExportSchema | None, ): """Test that the insert_into_s3_activity function ends up with data into S3. @@ -339,12 +405,15 @@ async def test_insert_into_s3_activity_puts_data_into_s3( compression=compression, exclude_events=exclude_events, batch_export_schema=batch_export_schema, + file_format=file_format, ) with override_settings( BATCH_EXPORT_S3_UPLOAD_CHUNK_SIZE_BYTES=5 * 1024**2 ): # 5MB, the minimum for Multipart uploads - await activity_environment.run(insert_into_s3_activity, insert_inputs) + records_total = await activity_environment.run(insert_into_s3_activity, insert_inputs) + + assert records_total == 10005 await assert_clickhouse_records_in_s3( s3_compatible_client=minio_client, @@ -358,6 +427,7 @@ async def test_insert_into_s3_activity_puts_data_into_s3( exclude_events=exclude_events, include_events=None, compression=compression, + file_format=file_format, ) @@ -371,6 +441,7 @@ async def s3_batch_export( exclude_events, temporal_client, encryption, + file_format, ): destination_data = { "type": "S3", @@ -385,6 +456,7 @@ async def s3_batch_export( "exclude_events": exclude_events, "encryption": encryption, "kms_key_id": os.getenv("S3_TEST_KMS_KEY_ID") if encryption == "aws:kms" else None, + "file_format": file_format, }, } @@ -410,6 +482,7 @@ async def s3_batch_export( @pytest.mark.parametrize("compression", [None, "gzip", "brotli"], indirect=True) @pytest.mark.parametrize("exclude_events", [None, ["test-exclude"]], indirect=True) @pytest.mark.parametrize("batch_export_schema", TEST_S3_SCHEMAS) +@pytest.mark.parametrize("file_format", FILE_FORMAT_EXTENSIONS.keys(), indirect=True) async def test_s3_export_workflow_with_minio_bucket( clickhouse_client, minio_client, @@ -421,6 +494,7 @@ async def test_s3_export_workflow_with_minio_bucket( exclude_events, s3_key_prefix, batch_export_schema, + file_format, ): """Test S3BatchExport Workflow end-to-end by using a local MinIO bucket instead of S3. @@ -508,6 +582,7 @@ async def test_s3_export_workflow_with_minio_bucket( batch_export_schema=batch_export_schema, exclude_events=exclude_events, compression=compression, + file_format=file_format, ) @@ -537,6 +612,7 @@ async def s3_client(bucket_name, s3_key_prefix): @pytest.mark.parametrize("encryption", [None, "AES256", "aws:kms"], indirect=True) @pytest.mark.parametrize("bucket_name", [os.getenv("S3_TEST_BUCKET")], indirect=True) @pytest.mark.parametrize("batch_export_schema", TEST_S3_SCHEMAS) +@pytest.mark.parametrize("file_format", FILE_FORMAT_EXTENSIONS.keys(), indirect=True) async def test_s3_export_workflow_with_s3_bucket( s3_client, clickhouse_client, @@ -549,6 +625,7 @@ async def test_s3_export_workflow_with_s3_bucket( exclude_events, ateam, batch_export_schema, + file_format, ): """Test S3 Export Workflow end-to-end by using an S3 bucket. @@ -646,6 +723,7 @@ async def test_s3_export_workflow_with_s3_bucket( exclude_events=exclude_events, include_events=None, compression=compression, + file_format=file_format, ) @@ -1206,6 +1284,49 @@ async def never_finish_activity(_: S3InsertInputs) -> str: ), "nested/prefix/2023-01-01 00:00:00-2023-01-01 01:00:00.jsonl.br", ), + ( + S3InsertInputs( + prefix="/nested/prefix/", + data_interval_start="2023-01-01 00:00:00", + data_interval_end="2023-01-01 01:00:00", + file_format="Parquet", + compression="snappy", + **base_inputs, # type: ignore + ), + "nested/prefix/2023-01-01 00:00:00-2023-01-01 01:00:00.parquet.sz", + ), + ( + S3InsertInputs( + prefix="/nested/prefix/", + data_interval_start="2023-01-01 00:00:00", + data_interval_end="2023-01-01 01:00:00", + file_format="Parquet", + **base_inputs, # type: ignore + ), + "nested/prefix/2023-01-01 00:00:00-2023-01-01 01:00:00.parquet", + ), + ( + S3InsertInputs( + prefix="/nested/prefix/", + data_interval_start="2023-01-01 00:00:00", + data_interval_end="2023-01-01 01:00:00", + compression="gzip", + file_format="Parquet", + **base_inputs, # type: ignore + ), + "nested/prefix/2023-01-01 00:00:00-2023-01-01 01:00:00.parquet.gz", + ), + ( + S3InsertInputs( + prefix="/nested/prefix/", + data_interval_start="2023-01-01 00:00:00", + data_interval_end="2023-01-01 01:00:00", + compression="brotli", + file_format="Parquet", + **base_inputs, # type: ignore + ), + "nested/prefix/2023-01-01 00:00:00-2023-01-01 01:00:00.parquet.br", + ), ], ) def test_get_s3_key(inputs, expected): @@ -1271,7 +1392,7 @@ def assert_heartbeat_details(*details): endpoint_url=settings.OBJECT_STORAGE_ENDPOINT, ) - with override_settings(BATCH_EXPORT_S3_UPLOAD_CHUNK_SIZE_BYTES=5 * 1024**2): + with override_settings(BATCH_EXPORT_S3_UPLOAD_CHUNK_SIZE_BYTES=1, CLICKHOUSE_MAX_BLOCK_SIZE_DEFAULT=1): await activity_environment.run(insert_into_s3_activity, insert_inputs) # This checks that the assert_heartbeat_details function was actually called. diff --git a/posthog/temporal/tests/batch_exports/test_temporary_file.py b/posthog/temporal/tests/batch_exports/test_temporary_file.py new file mode 100644 index 0000000000000..4fd7e69c0c12f --- /dev/null +++ b/posthog/temporal/tests/batch_exports/test_temporary_file.py @@ -0,0 +1,389 @@ +import csv +import datetime as dt +import io +import json + +import pyarrow as pa +import pyarrow.parquet as pq +import pytest + +from posthog.temporal.batch_exports.temporary_file import ( + BatchExportTemporaryFile, + CSVBatchExportWriter, + JSONLBatchExportWriter, + ParquetBatchExportWriter, + json_dumps_bytes, +) + + +@pytest.mark.parametrize( + "to_write", + [ + (b"",), + (b"", b""), + (b"12345",), + (b"12345", b"12345"), + (b"abbcccddddeeeee",), + (b"abbcccddddeeeee", b"abbcccddddeeeee"), + ], +) +def test_batch_export_temporary_file_tracks_bytes(to_write): + """Test the bytes written by BatchExportTemporaryFile match expected.""" + with BatchExportTemporaryFile() as be_file: + for content in to_write: + be_file.write(content) + + assert be_file.bytes_total == sum(len(content) for content in to_write) + assert be_file.bytes_since_last_reset == sum(len(content) for content in to_write) + + be_file.reset() + + assert be_file.bytes_total == sum(len(content) for content in to_write) + assert be_file.bytes_since_last_reset == 0 + + +TEST_RECORDS = [ + [], + [ + {"id": "record-1", "property": "value", "property_int": 1}, + {"id": "record-2", "property": "another-value", "property_int": 2}, + { + "id": "record-3", + "property": {"id": "nested-record", "property": "nested-value"}, + "property_int": 3, + }, + ], +] + + +@pytest.mark.parametrize( + "records", + TEST_RECORDS, +) +def test_batch_export_temporary_file_write_records_to_jsonl(records): + """Test JSONL records written by BatchExportTemporaryFile match expected.""" + jsonl_dump = b"\n".join(map(json_dumps_bytes, records)) + + with BatchExportTemporaryFile() as be_file: + be_file.write_records_to_jsonl(records) + + assert be_file.bytes_total == len(jsonl_dump) + assert be_file.bytes_since_last_reset == len(jsonl_dump) + assert be_file.records_total == len(records) + assert be_file.records_since_last_reset == len(records) + + be_file.seek(0) + lines = be_file.readlines() + assert len(lines) == len(records) + + for line_index, jsonl_record in enumerate(lines): + json_loaded = json.loads(jsonl_record) + assert json_loaded == records[line_index] + + be_file.reset() + + assert be_file.bytes_total == len(jsonl_dump) + assert be_file.bytes_since_last_reset == 0 + assert be_file.records_total == len(records) + assert be_file.records_since_last_reset == 0 + + +@pytest.mark.parametrize( + "records", + TEST_RECORDS, +) +def test_batch_export_temporary_file_write_records_to_csv(records): + """Test CSV written by BatchExportTemporaryFile match expected.""" + in_memory_file_obj = io.StringIO() + writer = csv.DictWriter( + in_memory_file_obj, + fieldnames=records[0].keys() if len(records) > 0 else [], + delimiter=",", + quotechar='"', + escapechar="\\", + lineterminator="\n", + quoting=csv.QUOTE_NONE, + ) + writer.writerows(records) + + with BatchExportTemporaryFile(mode="w+") as be_file: + be_file.write_records_to_csv(records) + + assert be_file.bytes_total == in_memory_file_obj.tell() + assert be_file.bytes_since_last_reset == in_memory_file_obj.tell() + assert be_file.records_total == len(records) + assert be_file.records_since_last_reset == len(records) + + be_file.seek(0) + reader = csv.reader( + be_file._file, + delimiter=",", + quotechar='"', + escapechar="\\", + quoting=csv.QUOTE_NONE, + ) + + rows = [row for row in reader] + assert len(rows) == len(records) + + for row_index, csv_record in enumerate(rows): + for value_index, value in enumerate(records[row_index].values()): + # Everything returned by csv.reader is a str. + # This means type information is lost when writing to CSV + # but this just a limitation of the format. + assert csv_record[value_index] == str(value) + + be_file.reset() + + assert be_file.bytes_total == in_memory_file_obj.tell() + assert be_file.bytes_since_last_reset == 0 + assert be_file.records_total == len(records) + assert be_file.records_since_last_reset == 0 + + +@pytest.mark.parametrize( + "records", + TEST_RECORDS, +) +def test_batch_export_temporary_file_write_records_to_tsv(records): + """Test TSV written by BatchExportTemporaryFile match expected.""" + in_memory_file_obj = io.StringIO() + writer = csv.DictWriter( + in_memory_file_obj, + fieldnames=records[0].keys() if len(records) > 0 else [], + delimiter="\t", + quotechar='"', + escapechar="\\", + lineterminator="\n", + quoting=csv.QUOTE_NONE, + ) + writer.writerows(records) + + with BatchExportTemporaryFile(mode="w+") as be_file: + be_file.write_records_to_tsv(records) + + assert be_file.bytes_total == in_memory_file_obj.tell() + assert be_file.bytes_since_last_reset == in_memory_file_obj.tell() + assert be_file.records_total == len(records) + assert be_file.records_since_last_reset == len(records) + + be_file.seek(0) + reader = csv.reader( + be_file._file, + delimiter="\t", + quotechar='"', + escapechar="\\", + quoting=csv.QUOTE_NONE, + ) + + rows = [row for row in reader] + assert len(rows) == len(records) + + for row_index, csv_record in enumerate(rows): + for value_index, value in enumerate(records[row_index].values()): + # Everything returned by csv.reader is a str. + # This means type information is lost when writing to CSV + # but this just a limitation of the format. + assert csv_record[value_index] == str(value) + + be_file.reset() + + assert be_file.bytes_total == in_memory_file_obj.tell() + assert be_file.bytes_since_last_reset == 0 + assert be_file.records_total == len(records) + assert be_file.records_since_last_reset == 0 + + +TEST_RECORD_BATCHES = [ + pa.RecordBatch.from_pydict( + { + "event": pa.array(["test-event-0", "test-event-1", "test-event-2"]), + "properties": pa.array(['{"prop_0": 1, "prop_1": 2}', "{}", "null"]), + "_inserted_at": pa.array([0, 1, 2]), + } + ) +] + + +@pytest.mark.parametrize( + "record_batch", + TEST_RECORD_BATCHES, +) +@pytest.mark.asyncio +async def test_jsonl_writer_writes_record_batches(record_batch): + """Test record batches are written as valid JSONL.""" + in_memory_file_obj = io.BytesIO() + inserted_ats_seen = [] + + async def store_in_memory_on_flush( + batch_export_file, records_since_last_flush, bytes_since_last_flush, last_inserted_at, is_last + ): + in_memory_file_obj.write(batch_export_file.read()) + inserted_ats_seen.append(last_inserted_at) + + writer = JSONLBatchExportWriter(max_bytes=1, flush_callable=store_in_memory_on_flush) + + record_batch = record_batch.sort_by("_inserted_at") + async with writer.open_temporary_file(): + await writer.write_record_batch(record_batch) + + lines = in_memory_file_obj.readlines() + for index, line in enumerate(lines): + written_jsonl = json.loads(line) + + single_record_batch = record_batch.slice(offset=index, length=1) + expected_jsonl = single_record_batch.to_pylist()[0] + + assert "_inserted_at" not in written_jsonl + assert written_jsonl == expected_jsonl + + assert inserted_ats_seen == [record_batch.column("_inserted_at")[-1].as_py()] + + +@pytest.mark.parametrize( + "record_batch", + TEST_RECORD_BATCHES, +) +@pytest.mark.asyncio +async def test_csv_writer_writes_record_batches(record_batch): + """Test record batches are written as valid CSV.""" + in_memory_file_obj = io.StringIO() + inserted_ats_seen = [] + + async def store_in_memory_on_flush( + batch_export_file, records_since_last_flush, bytes_since_last_flush, last_inserted_at, is_last + ): + in_memory_file_obj.write(batch_export_file.read().decode("utf-8")) + inserted_ats_seen.append(last_inserted_at) + + schema_columns = [column_name for column_name in record_batch.column_names if column_name != "_inserted_at"] + writer = CSVBatchExportWriter(max_bytes=1, field_names=schema_columns, flush_callable=store_in_memory_on_flush) + + record_batch = record_batch.sort_by("_inserted_at") + async with writer.open_temporary_file(): + await writer.write_record_batch(record_batch) + + reader = csv.reader( + in_memory_file_obj, + delimiter=",", + quotechar='"', + escapechar="\\", + quoting=csv.QUOTE_NONE, + ) + for index, written_csv_row in enumerate(reader): + single_record_batch = record_batch.slice(offset=index, length=1) + expected_csv = single_record_batch.to_pylist()[0] + + assert "_inserted_at" not in written_csv_row + assert written_csv_row == expected_csv + + assert inserted_ats_seen == [record_batch.column("_inserted_at")[-1].as_py()] + + +@pytest.mark.parametrize( + "record_batch", + TEST_RECORD_BATCHES, +) +@pytest.mark.asyncio +async def test_parquet_writer_writes_record_batches(record_batch): + """Test record batches are written as valid Parquet.""" + in_memory_file_obj = io.BytesIO() + inserted_ats_seen = [] + + async def store_in_memory_on_flush( + batch_export_file, records_since_last_flush, bytes_since_last_flush, last_inserted_at, is_last + ): + in_memory_file_obj.write(batch_export_file.read()) + inserted_ats_seen.append(last_inserted_at) + + schema_columns = [column_name for column_name in record_batch.column_names if column_name != "_inserted_at"] + + writer = ParquetBatchExportWriter( + max_bytes=1, + flush_callable=store_in_memory_on_flush, + schema=record_batch.select(schema_columns).schema, + ) + + record_batch = record_batch.sort_by("_inserted_at") + async with writer.open_temporary_file(): + await writer.write_record_batch(record_batch) + + written_parquet = pq.read_table(in_memory_file_obj) + + for index, written_row_as_dict in enumerate(written_parquet.to_pylist()): + single_record_batch = record_batch.slice(offset=index, length=1) + expected_row_as_dict = single_record_batch.select(schema_columns).to_pylist()[0] + + assert "_inserted_at" not in written_row_as_dict + assert written_row_as_dict == expected_row_as_dict + + # NOTE: Parquet gets flushed twice due to the extra flush at the end for footer bytes, so our mock function + # will see this value twice. + assert inserted_ats_seen == [ + record_batch.column("_inserted_at")[-1].as_py(), + record_batch.column("_inserted_at")[-1].as_py(), + ] + + +@pytest.mark.parametrize( + "record_batch", + TEST_RECORD_BATCHES, +) +@pytest.mark.asyncio +async def test_writing_out_of_scope_of_temporary_file_raises(record_batch): + """Test attempting a write out of temporary file scope raises a `ValueError`.""" + + async def do_nothing(*args, **kwargs): + pass + + schema_columns = [column_name for column_name in record_batch.column_names if column_name != "_inserted_at"] + writer = ParquetBatchExportWriter( + max_bytes=10, + flush_callable=do_nothing, + schema=record_batch.select(schema_columns).schema, + ) + + async with writer.open_temporary_file(): + pass + + with pytest.raises(ValueError, match="Batch export file is closed"): + await writer.write_record_batch(record_batch) + + +@pytest.mark.parametrize( + "record_batch", + TEST_RECORD_BATCHES, +) +@pytest.mark.asyncio +async def test_flushing_parquet_writer_resets_underlying_file(record_batch): + """Test flushing a writer resets underlying file.""" + flush_counter = 0 + + async def track_flushes(*args, **kwargs): + nonlocal flush_counter + flush_counter += 1 + + schema_columns = [column_name for column_name in record_batch.column_names if column_name != "_inserted_at"] + writer = ParquetBatchExportWriter( + max_bytes=10000000, + flush_callable=track_flushes, + schema=record_batch.select(schema_columns).schema, + ) + + async with writer.open_temporary_file(): + await writer.write_record_batch(record_batch) + + assert writer.batch_export_file.tell() > 0 + assert writer.bytes_since_last_flush > 0 + assert writer.bytes_since_last_flush == writer.batch_export_file.bytes_since_last_reset + assert writer.records_since_last_flush == record_batch.num_rows + + await writer.flush(dt.datetime.now()) + + assert flush_counter == 1 + assert writer.batch_export_file.tell() == 0 + assert writer.bytes_since_last_flush == 0 + assert writer.bytes_since_last_flush == writer.batch_export_file.bytes_since_last_reset + assert writer.records_since_last_flush == 0 + + assert flush_counter == 2 diff --git a/posthog/warehouse/models/table.py b/posthog/warehouse/models/table.py index 91c6f61709d6e..23cc5a7ce9541 100644 --- a/posthog/warehouse/models/table.py +++ b/posthog/warehouse/models/table.py @@ -1,4 +1,4 @@ -from typing import Dict +from typing import Dict, Optional from django.db import models from posthog.client import sync_execute @@ -175,6 +175,17 @@ def hogql_definition(self) -> S3Table: structure=", ".join(structure), ) + def get_clickhouse_column_type(self, column_name: str) -> Optional[str]: + clickhouse_type = self.columns.get(column_name, None) + + if isinstance(clickhouse_type, dict) and self.columns[column_name].get("clickhouse"): + clickhouse_type = self.columns[column_name].get("clickhouse") + + if clickhouse_type.startswith("Nullable("): + clickhouse_type = clickhouse_type.replace("Nullable(", "")[:-1] + + return clickhouse_type + def _safe_expose_ch_error(self, err): err = wrap_query_error(err) for key, value in ExtractErrors.items():