diff --git a/contents/docs/experiments/data-warehouse.mdx b/contents/docs/experiments/data-warehouse.mdx new file mode 100644 index 000000000000..351e32b2535b --- /dev/null +++ b/contents/docs/experiments/data-warehouse.mdx @@ -0,0 +1,59 @@ +--- +title: Using data warehouse tables in experiments +--- + +> 🚧 **Note:** Data warehouse integration is currently considered in `beta`. We are keen to gather as much feedback as possible so if you try this out please let us know. You can email [daniel.b@posthog.com](mailto:daniel.@posthog.com), send feedback via the [in-app support panel](https://us.posthog.com#panel=support%3Afeedback%3Aexperiments%3Alow), or use one of our other [support options](/docs/support-options). + +Evaluating experiment metrics always depends on events. They rely on something happening at a certain point in time. If one of your [data warehouse](/docs/data-warehouse) tables includes event-like data, you can use it as a primary or secondary metric for your trends experiment. + +To use a data warehouse table with an experiment, you'll first need to join the 'events' table to your data warehouse table: + +1. Navigate to the [Data warehouse](https://us.posthog.com/data-warehouse) tab and click on **Add join** from the triple dot menu next to your table. +2. Join the `events` table to your data warehouse table: + a. Under **Source Table Key**, specify a column that holds the value of the `distinct_id` present for the `$feature_flag_called` event. + b. Check **Optimize for Experiments** to ensure only the most recent matching event is joined to your table. + c. Under **Source Timestamp Key**, specify a column that represents the timestamp of the table row. It will be compared with the event timestamp to determine the most recent `$feature_flag_called` event for the row. + + + + + +Once you've joined the `events` table to your data warehouse table, you can use the data warehouse table in the experiment goal or as a secondary metric: + +1. When picking an experiment goal, click on the **Data warehouse tables** category and select your table. +2. Specify the columns in your data warehouse table that represent the unique ID, the distinct ID, and the timestamp. + + + +When you select a data warehouse table as an experiment goal or secondary metric, PostHog sees it as an 'events-like' table and is thus able to use it to calculate results. + + + +The PostHog uses the join to the `events` table to determine which variant a given data warehouse table row is associated with. For trends experiments, the most recent `$feature_flag_called` event before the row represents their active variant assignment: + +``` +Data Warehouse: timestamp=2024-01-03 12:00, distinct_id=user1 +Events: + - 2024-01-02 12:00: (user1, variant='control') <- This event will be joined + - 2024-01-03 18:00: (user1, variant='test') <- Ignored (occurs after data warehouse timestamp) +``` diff --git a/src/navs/index.js b/src/navs/index.js index d7e8d51c8468..3c5862ba0d23 100644 --- a/src/navs/index.js +++ b/src/navs/index.js @@ -2643,6 +2643,16 @@ export const docsMenu = { icon: 'IconPeople', color: 'orange', }, + { + name: 'Data warehouse', + url: '/docs/experiments/data-warehouse', + icon: 'IconDatabase', + color: 'blue', + badge: { + title: 'Beta', + className: 'uppercase !bg-blue/10 !text-blue !dark:text-white !dark:bg-blue/50', + }, + }, ], }, {