From 16d6c09fdafb33f5f08c30cefe0d8232f4ac169e Mon Sep 17 00:00:00 2001 From: Daniel Bachhuber Date: Sat, 30 Nov 2024 04:09:52 -0800 Subject: [PATCH] Use stored fields in the actual JOIN --- .../test_experiment_trends_query_runner.py | 22 +++++++++---------- posthog/warehouse/models/join.py | 15 ++++++++----- 2 files changed, 21 insertions(+), 16 deletions(-) diff --git a/posthog/hogql_queries/experiments/test/test_experiment_trends_query_runner.py b/posthog/hogql_queries/experiments/test/test_experiment_trends_query_runner.py index 40814d3afec5c..a0cd5c03c527e 100644 --- a/posthog/hogql_queries/experiments/test/test_experiment_trends_query_runner.py +++ b/posthog/hogql_queries/experiments/test/test_experiment_trends_query_runner.py @@ -137,7 +137,7 @@ def create_data_warehouse_table_with_payments(self): ) distinct_id = pa.array(["user_control_0", "user_test_1", "user_test_2", "user_test_3", "user_extra"]) amount = pa.array([100, 50, 75, 80, 90]) - names = ["id", "timestamp", "distinct_id", "amount"] + names = ["id", "dw_timestamp", "dw_distinct_id", "amount"] pq.write_to_dataset( pa.Table.from_arrays([id, timestamp, distinct_id, amount], names=names), @@ -163,8 +163,8 @@ def create_data_warehouse_table_with_payments(self): team=self.team, columns={ "id": "String", - "timestamp": "DateTime64(3, 'UTC')", - "distinct_id": "String", + "dw_timestamp": "DateTime64(3, 'UTC')", + "dw_distinct_id": "String", "amount": "Int64", }, credential=credential, @@ -173,11 +173,11 @@ def create_data_warehouse_table_with_payments(self): DataWarehouseJoin.objects.create( team=self.team, source_table_name=table_name, - source_table_key="distinct_id", + source_table_key="dw_distinct_id", joining_table_name="events", joining_table_key="distinct_id", field_name="events", - configuration={"experiments_optimized": True}, + configuration={"experiments_optimized": True, "experiments_timestamp_field": "dw_timestamp"}, ) return table_name @@ -504,10 +504,10 @@ def test_query_runner_with_data_warehouse_series(self): series=[ DataWarehouseNode( id=table_name, - distinct_id_field="distinct_id", - id_field="distinct_id", + distinct_id_field="dw_distinct_id", + id_field="id", table_name=table_name, - timestamp_field="timestamp", + timestamp_field="dw_timestamp", ) ] ) @@ -597,10 +597,10 @@ def test_query_runner_with_invalid_data_warehouse_table_name(self): series=[ DataWarehouseNode( id=table_name, - distinct_id_field="distinct_id", - id_field="distinct_id", + distinct_id_field="dw_distinct_id", + id_field="id", table_name=table_name, - timestamp_field="timestamp", + timestamp_field="dw_timestamp", ) ] ) diff --git a/posthog/warehouse/models/join.py b/posthog/warehouse/models/join.py index 407d2ce910339..71c954745c503 100644 --- a/posthog/warehouse/models/join.py +++ b/posthog/warehouse/models/join.py @@ -97,14 +97,19 @@ def _join_function( return _join_function - def join_function_for_experiments( - self, override_source_table_key: Optional[str] = None, override_joining_table_key: Optional[str] = None - ): + def join_function_for_experiments(self): def _join_function_for_experiments( join_to_add: LazyJoinToAdd, context: HogQLContext, node: SelectQuery, ): + if not self.configuration.get("experiments_optimized"): + raise ResolutionError("experiments_optimized is not enabled for this join") + + timestamp_field = self.configuration.get("experiments_timestamp_field") + if not timestamp_field: + raise ResolutionError("experiments_timestamp_field is not set for this join") + return ast.JoinExpr( table=ast.SelectQuery( select=[ @@ -147,7 +152,7 @@ def _join_function_for_experiments( left=ast.Field( chain=[ join_to_add.from_table, - "distinct_id", + self.source_table_key, ] ), op=ast.CompareOperationOp.Eq, @@ -157,7 +162,7 @@ def _join_function_for_experiments( left=ast.Field( chain=[ join_to_add.from_table, - "timestamp", + timestamp_field, ] ), op=ast.CompareOperationOp.GtEq,