diff --git a/posthog/hogql/database/database.py b/posthog/hogql/database/database.py index dd8ffc8a377a9..69ebf975fe312 100644 --- a/posthog/hogql/database/database.py +++ b/posthog/hogql/database/database.py @@ -53,6 +53,7 @@ from posthog.hogql.database.schema.person_distinct_ids import ( PersonDistinctIdsTable, RawPersonDistinctIdsTable, + join_data_warehouse_experiment_table_with_person_distinct_ids_table, ) from posthog.hogql.database.schema.persons import ( PersonsTable, @@ -457,6 +458,14 @@ def define_mappings(warehouse: dict[str, Table], get_table: Callable): ), ) + if "events" in join.joining_table_name and join.configuration.get("experiments_optimized"): + source_table.fields["pdi"] = LazyJoin( + from_field=from_field, + join_table=PersonDistinctIdsTable(), + join_function=join_data_warehouse_experiment_table_with_person_distinct_ids_table, + ) + source_table.fields["person"] = FieldTraverser(chain=["pdi", "person"]) + if join.source_table_name == "persons": person_field = database.events.fields["person"] if isinstance(person_field, ast.FieldTraverser): diff --git a/posthog/hogql/database/schema/person_distinct_ids.py b/posthog/hogql/database/schema/person_distinct_ids.py index 9ebfd9e17fde1..2fecbae4f5960 100644 --- a/posthog/hogql/database/schema/person_distinct_ids.py +++ b/posthog/hogql/database/schema/person_distinct_ids.py @@ -67,6 +67,29 @@ def join_with_person_distinct_ids_table( return join_expr +def join_data_warehouse_experiment_table_with_person_distinct_ids_table( + join_to_add: LazyJoinToAdd, + context: HogQLContext, + node: SelectQuery, +): + from posthog.hogql import ast + + if not join_to_add.fields_accessed: + raise ResolutionError("No fields requested from person_distinct_ids") + join_expr = ast.JoinExpr(table=select_from_person_distinct_ids_table(join_to_add.fields_accessed)) + join_expr.join_type = "LEFT JOIN" + join_expr.alias = join_to_add.to_table + join_expr.constraint = ast.JoinConstraint( + expr=ast.CompareOperation( + op=ast.CompareOperationOp.Eq, + left=ast.Field(chain=[join_to_add.from_table, *join_to_add.lazy_join.from_field]), + right=ast.Field(chain=[join_to_add.to_table, "distinct_id"]), + ), + constraint_type="ON", + ) + return join_expr + + class RawPersonDistinctIdsTable(Table): fields: dict[str, FieldOrTable] = { **PERSON_DISTINCT_IDS_FIELDS, diff --git a/posthog/hogql_queries/experiments/test/test_experiment_trends_query_runner.py b/posthog/hogql_queries/experiments/test/test_experiment_trends_query_runner.py index 9ed6636bc2324..9198c2abaef99 100644 --- a/posthog/hogql_queries/experiments/test/test_experiment_trends_query_runner.py +++ b/posthog/hogql_queries/experiments/test/test_experiment_trends_query_runner.py @@ -246,15 +246,6 @@ def create_data_warehouse_table_with_usage(self): field_name="events", configuration={"experiments_optimized": True, "experiments_timestamp_key": "ds"}, ) - - DataWarehouseJoin.objects.create( - team=self.team, - source_table_name=table_name, - source_table_key="userid", - joining_table_name="persons", - joining_table_key="properties.$user_id", - field_name="person", - ) return table_name @freeze_time("2020-01-01T12:00:00Z") @@ -869,8 +860,9 @@ def test_query_runner_with_data_warehouse_series_no_end_date_and_nested_id(self) _create_person( team=self.team, - distinct_ids=["internal_test_1"], - properties={"email": "internal_test_1@posthog.com", "$user_id": "internal_test_1"}, + uuid="018f14b8-6cf3-7ffd-80bb-5ef1a9e4d328", + distinct_ids=["018f14b8-6cf3-7ffd-80bb-5ef1a9e4d328", "internal_test_1"], + properties={"email": "internal_test_1@posthog.com"}, ) _create_event( @@ -946,7 +938,7 @@ def test_query_runner_with_data_warehouse_series_no_end_date_and_nested_id(self) ) # Assert the expected join condition in the clickhouse SQL - expected_join_condition = f"and(equals(events.team_id, {query_runner.count_query_runner.team.id}), equals(event, %(hogql_val_12)s), greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_13)s, 6, %(hogql_val_14)s))), lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_15)s, 6, %(hogql_val_16)s))))) AS e__events ON" + expected_join_condition = f"and(equals(events.team_id, {query_runner.count_query_runner.team.id}), equals(event, %(hogql_val_11)s), greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_12)s, 6, %(hogql_val_13)s))), lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_14)s, 6, %(hogql_val_15)s))))) AS e__events ON" self.assertIn( expected_join_condition, str(response.clickhouse),