Skip to content

Commit

Permalink
chore(data-warehouse): clean up logic for person prop filtering (#22043)
Browse files Browse the repository at this point in the history
* clean up logic for person prop filtering

* add test

* typing

* Update query snapshots

* Update query snapshots

---------

Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
  • Loading branch information
EDsCODE and github-actions[bot] authored May 2, 2024
1 parent e5dcaf1 commit 7ee81e3
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 12 deletions.
6 changes: 4 additions & 2 deletions posthog/hogql/property.py
Original file line number Diff line number Diff line change
Expand Up @@ -146,12 +146,14 @@ def property_to_expr(
raise NotImplementedError(f"The '{property.type}' property filter does not work in '{scope}' scope")
operator = cast(Optional[PropertyOperator], property.operator) or PropertyOperator.exact
value = property.value

if property.type == "person" and scope != "person":
chain = ["person", "properties"]
elif property.type == "data_warehouse_person_property":
if isinstance(property.value, str):
table, value = property.value.split(": ")
if isinstance(property.key, str):
table, key = property.key.split(": ")
chain = ["person", table]
property.key = key
else:
raise NotImplementedError("Data warehouse person property filter value must be a string")
elif property.type == "group":
Expand Down
36 changes: 26 additions & 10 deletions posthog/hogql/transforms/property_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,17 @@

from posthog.hogql import ast
from posthog.hogql.context import HogQLContext
from posthog.hogql.database.models import DateTimeDatabaseField
from posthog.hogql.database.models import (
DateTimeDatabaseField,
BooleanDatabaseField,
IntegerDatabaseField,
FloatDatabaseField,
)
from posthog.hogql.escape_sql import escape_hogql_identifier
from posthog.hogql.visitor import CloningVisitor, TraversingVisitor
from posthog.models.property import PropertyName, TableColumn
from posthog.schema import PersonsOnEventsMode
from posthog.hogql.database.s3_table import S3Table


def resolve_property_types(node: ast.Expr, context: HogQLContext) -> ast.Expr:
Expand Down Expand Up @@ -56,15 +62,6 @@ def resolve_property_types(node: ast.Expr, context: HogQLContext) -> ast.Expr:
{f"{group_id}_{name}": property_type for name, property_type in group_property_values if property_type}
)

# swap them out
if (
len(event_properties) == 0
and len(person_properties) == 0
and len(group_properties) == 0
and not property_finder.found_timestamps
):
return node

timezone = context.database.get_timezone() if context and context.database else "UTC"
property_swapper = PropertySwapper(
timezone=timezone,
Expand Down Expand Up @@ -148,6 +145,22 @@ def visit_field(self, node: ast.Field):
),
)

if isinstance(node.type.table_type, ast.LazyJoinType) and isinstance(
node.type.table_type.lazy_join.join_table, S3Table
):
field = node.chain[-1]
field_type = node.type.table_type.lazy_join.join_table.fields.get(str(field), None)
prop_type = "String"

if isinstance(field_type, IntegerDatabaseField) or isinstance(field_type, FloatDatabaseField):
prop_type = "Float"
if isinstance(field_type, DateTimeDatabaseField):
prop_type = "DateTime"
if isinstance(field_type, BooleanDatabaseField):
prop_type = "Boolean"

return self._field_type_to_property_call(node, prop_type)

type = node.type
if isinstance(type, ast.PropertyType) and type.field_type.name == "properties" and len(type.chain) == 1:
property_name = str(type.chain[0])
Expand Down Expand Up @@ -200,6 +213,9 @@ def _convert_string_property_to_type(
field_type = "Float" if posthog_field_type == "Numeric" else posthog_field_type or "String"
self._add_property_notice(node, property_type, field_type)

return self._field_type_to_property_call(node, field_type)

def _field_type_to_property_call(self, node: ast.Field, field_type: str):
if field_type == "DateTime":
return ast.Call(name="toDateTime", args=[node])
if field_type == "Float":
Expand Down
Original file line number Diff line number Diff line change
@@ -1,4 +1,21 @@
# serializer version: 1
# name: TestPropertyTypes.test_data_warehouse_person_property_types
'''

SELECT persons__extended_properties.string_prop AS string_prop, accurateCastOrNull(persons__extended_properties.int_prop, %(hogql_val_6)s) AS int_prop, transform(persons__extended_properties.bool_prop, %(hogql_val_7)s, %(hogql_val_8)s, NULL) AS bool_prop
FROM (
SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, %(hogql_val_0)s), ''), 'null'), '^"|"$', ''), person.version) AS persons___properties___email, person.id AS id
FROM person
WHERE equals(person.team_id, 420)
GROUP BY person.id
HAVING ifNull(equals(argMax(person.is_deleted, person.version), 0), 0)
SETTINGS optimize_aggregation_in_order=1) AS persons LEFT JOIN (
SELECT extended_properties.string_prop AS string_prop, extended_properties.int_prop AS int_prop, extended_properties.bool_prop AS bool_prop, string_prop AS persons__extended_properties___string_prop
FROM s3(%(hogql_val_1_sensitive)s, %(hogql_val_4_sensitive)s, %(hogql_val_5_sensitive)s, %(hogql_val_2)s, %(hogql_val_3)s) AS extended_properties) AS persons__extended_properties ON equals(persons.persons___properties___email, persons__extended_properties.persons__extended_properties___string_prop)
WHERE ifNull(equals(bool_prop, true), 0)
LIMIT 10000
'''
# ---
# name: TestPropertyTypes.test_group_property_types
'''

Expand Down
35 changes: 35 additions & 0 deletions posthog/hogql/transforms/test/test_property_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,8 @@
from posthog.models.group.util import create_group
from posthog.test.base import BaseTest

from posthog.warehouse.models import DataWarehouseTable, DataWarehouseJoin, DataWarehouseCredential


class TestPropertyTypes(BaseTest):
snapshot: Any
Expand Down Expand Up @@ -113,6 +115,39 @@ def test_group_property_types(self):
printed = self._print_select("select organization.properties.inty from events")
assert printed == self.snapshot

@pytest.mark.usefixtures("unittest_snapshot")
@override_settings(PERSON_ON_EVENTS_OVERRIDE=False, PERSON_ON_EVENTS_V2_OVERRIDE=False)
def test_data_warehouse_person_property_types(self):
credential = DataWarehouseCredential.objects.create(
team=self.team, access_key="_accesskey", access_secret="_secret"
)
DataWarehouseTable.objects.create(
team=self.team,
name="extended_properties",
columns={
"string_prop": {"hogql": "StringDatabaseField", "clickhouse": "Nullable(String)"},
"int_prop": {"hogql": "IntegerDatabaseField", "clickhouse": "Nullable(Int64)"},
"bool_prop": {"hogql": "BooleanDatabaseField", "clickhouse": "Nullable(Bool)"},
},
credential=credential,
url_pattern="",
)

DataWarehouseJoin.objects.create(
team=self.team,
source_table_name="persons",
source_table_key="properties.email",
joining_table_name="extended_properties",
joining_table_key="string_prop",
field_name="extended_properties",
)

printed = self._print_select(
"select persons.extended_properties.string_prop, persons.extended_properties.int_prop, persons.extended_properties.bool_prop AS bool_prop from persons WHERE bool_prop = true"
)

assert printed == self.snapshot

def _print_select(self, select: str):
expr = parse_select(select)
query = print_ast(
Expand Down
1 change: 1 addition & 0 deletions posthog/models/property/property.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ class BehavioralPropertyType(str, Enum):
"event": ["key", "value"],
"person": ["key", "value"],
"data_warehouse": ["key", "value"],
"data_warehouse_person_property": ["key", "value"],
"cohort": ["key", "value"],
"element": ["key", "value"],
"static-cohort": ["key", "value"],
Expand Down

0 comments on commit 7ee81e3

Please sign in to comment.