From 7ee81e3ee5f363251775cac28245eb7f88ac1e0b Mon Sep 17 00:00:00 2001 From: Eric Duong Date: Thu, 2 May 2024 16:43:31 -0400 Subject: [PATCH] chore(data-warehouse): clean up logic for person prop filtering (#22043) * clean up logic for person prop filtering * add test * typing * Update query snapshots * Update query snapshots --------- Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com> --- posthog/hogql/property.py | 6 ++-- posthog/hogql/transforms/property_types.py | 36 +++++++++++++------ .../__snapshots__/test_property_types.ambr | 17 +++++++++ .../transforms/test/test_property_types.py | 35 ++++++++++++++++++ posthog/models/property/property.py | 1 + 5 files changed, 83 insertions(+), 12 deletions(-) diff --git a/posthog/hogql/property.py b/posthog/hogql/property.py index d7bde5e7e4318..530bb197104da 100644 --- a/posthog/hogql/property.py +++ b/posthog/hogql/property.py @@ -146,12 +146,14 @@ def property_to_expr( raise NotImplementedError(f"The '{property.type}' property filter does not work in '{scope}' scope") operator = cast(Optional[PropertyOperator], property.operator) or PropertyOperator.exact value = property.value + if property.type == "person" and scope != "person": chain = ["person", "properties"] elif property.type == "data_warehouse_person_property": - if isinstance(property.value, str): - table, value = property.value.split(": ") + if isinstance(property.key, str): + table, key = property.key.split(": ") chain = ["person", table] + property.key = key else: raise NotImplementedError("Data warehouse person property filter value must be a string") elif property.type == "group": diff --git a/posthog/hogql/transforms/property_types.py b/posthog/hogql/transforms/property_types.py index 5627980fa0dfc..4a4f98be80094 100644 --- a/posthog/hogql/transforms/property_types.py +++ b/posthog/hogql/transforms/property_types.py @@ -2,11 +2,17 @@ from posthog.hogql import ast from posthog.hogql.context import HogQLContext -from posthog.hogql.database.models import DateTimeDatabaseField +from posthog.hogql.database.models import ( + DateTimeDatabaseField, + BooleanDatabaseField, + IntegerDatabaseField, + FloatDatabaseField, +) from posthog.hogql.escape_sql import escape_hogql_identifier from posthog.hogql.visitor import CloningVisitor, TraversingVisitor from posthog.models.property import PropertyName, TableColumn from posthog.schema import PersonsOnEventsMode +from posthog.hogql.database.s3_table import S3Table def resolve_property_types(node: ast.Expr, context: HogQLContext) -> ast.Expr: @@ -56,15 +62,6 @@ def resolve_property_types(node: ast.Expr, context: HogQLContext) -> ast.Expr: {f"{group_id}_{name}": property_type for name, property_type in group_property_values if property_type} ) - # swap them out - if ( - len(event_properties) == 0 - and len(person_properties) == 0 - and len(group_properties) == 0 - and not property_finder.found_timestamps - ): - return node - timezone = context.database.get_timezone() if context and context.database else "UTC" property_swapper = PropertySwapper( timezone=timezone, @@ -148,6 +145,22 @@ def visit_field(self, node: ast.Field): ), ) + if isinstance(node.type.table_type, ast.LazyJoinType) and isinstance( + node.type.table_type.lazy_join.join_table, S3Table + ): + field = node.chain[-1] + field_type = node.type.table_type.lazy_join.join_table.fields.get(str(field), None) + prop_type = "String" + + if isinstance(field_type, IntegerDatabaseField) or isinstance(field_type, FloatDatabaseField): + prop_type = "Float" + if isinstance(field_type, DateTimeDatabaseField): + prop_type = "DateTime" + if isinstance(field_type, BooleanDatabaseField): + prop_type = "Boolean" + + return self._field_type_to_property_call(node, prop_type) + type = node.type if isinstance(type, ast.PropertyType) and type.field_type.name == "properties" and len(type.chain) == 1: property_name = str(type.chain[0]) @@ -200,6 +213,9 @@ def _convert_string_property_to_type( field_type = "Float" if posthog_field_type == "Numeric" else posthog_field_type or "String" self._add_property_notice(node, property_type, field_type) + return self._field_type_to_property_call(node, field_type) + + def _field_type_to_property_call(self, node: ast.Field, field_type: str): if field_type == "DateTime": return ast.Call(name="toDateTime", args=[node]) if field_type == "Float": diff --git a/posthog/hogql/transforms/test/__snapshots__/test_property_types.ambr b/posthog/hogql/transforms/test/__snapshots__/test_property_types.ambr index 4fa50d0dc5f36..f302f8c4bfa2c 100644 --- a/posthog/hogql/transforms/test/__snapshots__/test_property_types.ambr +++ b/posthog/hogql/transforms/test/__snapshots__/test_property_types.ambr @@ -1,4 +1,21 @@ # serializer version: 1 +# name: TestPropertyTypes.test_data_warehouse_person_property_types + ''' + + SELECT persons__extended_properties.string_prop AS string_prop, accurateCastOrNull(persons__extended_properties.int_prop, %(hogql_val_6)s) AS int_prop, transform(persons__extended_properties.bool_prop, %(hogql_val_7)s, %(hogql_val_8)s, NULL) AS bool_prop + FROM ( + SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', ''), person.version) AS persons___properties___email, person.id AS id + FROM person + WHERE equals(person.team_id, 420) + GROUP BY person.id + HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0) + SETTINGS optimize_aggregation_in_order=1) AS persons LEFT JOIN ( + SELECT extended_properties.string_prop AS string_prop, extended_properties.int_prop AS int_prop, extended_properties.bool_prop AS bool_prop, string_prop AS persons__extended_properties___string_prop + FROM s3(%(hogql_val_1_sensitive)s, %(hogql_val_4_sensitive)s, %(hogql_val_5_sensitive)s, %(hogql_val_2)s, %(hogql_val_3)s) AS extended_properties) AS persons__extended_properties ON equals(persons.persons___properties___email, persons__extended_properties.persons__extended_properties___string_prop) + WHERE ifNull(equals(bool_prop, true), 0) + LIMIT 10000 + ''' +# --- # name: TestPropertyTypes.test_group_property_types ''' diff --git a/posthog/hogql/transforms/test/test_property_types.py b/posthog/hogql/transforms/test/test_property_types.py index 7992c46a0ce6e..bf1a517cc7751 100644 --- a/posthog/hogql/transforms/test/test_property_types.py +++ b/posthog/hogql/transforms/test/test_property_types.py @@ -11,6 +11,8 @@ from posthog.models.group.util import create_group from posthog.test.base import BaseTest +from posthog.warehouse.models import DataWarehouseTable, DataWarehouseJoin, DataWarehouseCredential + class TestPropertyTypes(BaseTest): snapshot: Any @@ -113,6 +115,39 @@ def test_group_property_types(self): printed = self._print_select("select organization.properties.inty from events") assert printed == self.snapshot + @pytest.mark.usefixtures("unittest_snapshot") + @override_settings(PERSON_ON_EVENTS_OVERRIDE=False, PERSON_ON_EVENTS_V2_OVERRIDE=False) + def test_data_warehouse_person_property_types(self): + credential = DataWarehouseCredential.objects.create( + team=self.team, access_key="_accesskey", access_secret="_secret" + ) + DataWarehouseTable.objects.create( + team=self.team, + name="extended_properties", + columns={ + "string_prop": {"hogql": "StringDatabaseField", "clickhouse": "Nullable(String)"}, + "int_prop": {"hogql": "IntegerDatabaseField", "clickhouse": "Nullable(Int64)"}, + "bool_prop": {"hogql": "BooleanDatabaseField", "clickhouse": "Nullable(Bool)"}, + }, + credential=credential, + url_pattern="", + ) + + DataWarehouseJoin.objects.create( + team=self.team, + source_table_name="persons", + source_table_key="properties.email", + joining_table_name="extended_properties", + joining_table_key="string_prop", + field_name="extended_properties", + ) + + printed = self._print_select( + "select persons.extended_properties.string_prop, persons.extended_properties.int_prop, persons.extended_properties.bool_prop AS bool_prop from persons WHERE bool_prop = true" + ) + + assert printed == self.snapshot + def _print_select(self, select: str): expr = parse_select(select) query = print_ast( diff --git a/posthog/models/property/property.py b/posthog/models/property/property.py index 74ef611e257a3..7185306b8ccb2 100644 --- a/posthog/models/property/property.py +++ b/posthog/models/property/property.py @@ -89,6 +89,7 @@ class BehavioralPropertyType(str, Enum): "event": ["key", "value"], "person": ["key", "value"], "data_warehouse": ["key", "value"], + "data_warehouse_person_property": ["key", "value"], "cohort": ["key", "value"], "element": ["key", "value"], "static-cohort": ["key", "value"],