From 967eb552a838648be3200e4b81ac981d6fd59422 Mon Sep 17 00:00:00 2001 From: timgl Date: Wed, 24 Jul 2024 01:29:31 +0100 Subject: [PATCH] revert: perf: use elements chain materialization (#23934) --- ee/clickhouse/materialized_columns/columns.py | 1 - mypy-baseline.txt | 3 + posthog/api/test/test_action.py | 4 + .../test/__snapshots__/test_schema.ambr | 20 +- posthog/hogql/database/schema/events.py | 5 - .../test/__snapshots__/test_database.ambr | 84 +-- posthog/hogql/property.py | 513 ++++++-------- .../test/__snapshots__/test_resolver.ambr | 652 ------------------ posthog/hogql/test/test_property.py | 115 ++- .../test/__snapshots__/test_funnel.ambr | 4 +- posthog/models/event/sql.py | 12 +- posthog/test/base.py | 5 - 12 files changed, 285 insertions(+), 1133 deletions(-) diff --git a/ee/clickhouse/materialized_columns/columns.py b/ee/clickhouse/materialized_columns/columns.py index 135f27b392190..3dc21ccde59f6 100644 --- a/ee/clickhouse/materialized_columns/columns.py +++ b/ee/clickhouse/materialized_columns/columns.py @@ -44,7 +44,6 @@ def get_materialized_columns( WHERE database = %(database)s AND table = %(table)s AND comment LIKE '%%column_materializer::%%' - AND comment not LIKE '%%column_materializer::elements_chain::%%' """, {"database": CLICKHOUSE_DATABASE, "table": table}, ) diff --git a/mypy-baseline.txt b/mypy-baseline.txt index 503f30e14b414..7ba31e2481702 100644 --- a/mypy-baseline.txt +++ b/mypy-baseline.txt @@ -256,6 +256,9 @@ posthog/models/property/util.py:0: error: Argument 1 to "append" of "list" has i posthog/models/property/util.py:0: error: Argument 1 to "append" of "list" has incompatible type "str | int"; expected "str" [arg-type] posthog/models/property/util.py:0: error: Argument 1 to "append" of "list" has incompatible type "str | int"; expected "str" [arg-type] posthog/queries/trends/util.py:0: error: Argument 1 to "translate_hogql" has incompatible type "str | None"; expected "str" [arg-type] +posthog/hogql/property.py:0: error: Argument "chain" to "Field" has incompatible type "list[str]"; expected "list[str | int]" [arg-type] +posthog/hogql/property.py:0: note: "List" is invariant -- see https://mypy.readthedocs.io/en/stable/common_issues.html#variance +posthog/hogql/property.py:0: note: Consider using "Sequence" instead, which is covariant posthog/hogql/property.py:0: error: Incompatible type for lookup 'id': (got "str | int | list[str]", expected "str | int") [misc] posthog/hogql/property.py:0: error: Incompatible type for lookup 'pk': (got "str | float", expected "str | int") [misc] posthog/hogql/filters.py:0: error: Incompatible default for argument "team" (default has type "None", argument has type "Team") [assignment] diff --git a/posthog/api/test/test_action.py b/posthog/api/test/test_action.py index 3e4a0c4f56d87..2d162ff872ae1 100644 --- a/posthog/api/test/test_action.py +++ b/posthog/api/test/test_action.py @@ -136,7 +136,9 @@ def test_update_action(self, patch_capture, *args): action = Action.objects.create( name="user signed up", team=self.team, steps_json=[{"event": "$autocapture", "text": "sign me up!"}] ) + action.refresh_bytecode() action.save() + previous_bytecode = action.bytecode response = self.client.patch( f"/api/projects/{self.team.id}/actions/{action.pk}/", @@ -196,6 +198,8 @@ def test_update_action(self, patch_capture, *args): action.refresh_from_db() assert action.name == "user signed up 2" + assert previous_bytecode != action.bytecode + # Assert analytics are sent patch_capture.assert_called_with( user, diff --git a/posthog/clickhouse/test/__snapshots__/test_schema.ambr b/posthog/clickhouse/test/__snapshots__/test_schema.ambr index 69407e9000939..344ac139affa4 100644 --- a/posthog/clickhouse/test/__snapshots__/test_schema.ambr +++ b/posthog/clickhouse/test/__snapshots__/test_schema.ambr @@ -595,10 +595,6 @@ , $group_4 VARCHAR COMMENT 'column_materializer::$group_4' , $window_id VARCHAR COMMENT 'column_materializer::$window_id' , $session_id VARCHAR COMMENT 'column_materializer::$session_id' - , elements_chain_href String COMMENT 'column_materializer::elements_chain::href' - , elements_chain_texts Array(String) COMMENT 'column_materializer::elements_chain::texts' - , elements_chain_ids Array(String) COMMENT 'column_materializer::elements_chain::ids' - , elements_chain_elements Array(Enum('a', 'button', 'form', 'input', 'select', 'textarea', 'label')) COMMENT 'column_materializer::elements_chain::elements' , _timestamp DateTime @@ -2177,10 +2173,6 @@ , $group_4 VARCHAR MATERIALIZED replaceRegexpAll(JSONExtractRaw(properties, '$group_4'), '^"|"$', '') COMMENT 'column_materializer::$group_4' , $window_id VARCHAR MATERIALIZED replaceRegexpAll(JSONExtractRaw(properties, '$window_id'), '^"|"$', '') COMMENT 'column_materializer::$window_id' , $session_id VARCHAR MATERIALIZED replaceRegexpAll(JSONExtractRaw(properties, '$session_id'), '^"|"$', '') COMMENT 'column_materializer::$session_id' - , elements_chain_href String MATERIALIZED extract(elements_chain, '(?::|")href="(.*?)"') - , elements_chain_texts Array(String) MATERIALIZED arrayDistinct(extractAll(elements_chain, '(?::|")text="(.*?)"')) - , elements_chain_ids Array(String) MATERIALIZED arrayDistinct(extractAll(elements_chain, '(?::|")attr_id="(.*?)"')) - , elements_chain_elements Array(Enum('a', 'button', 'form', 'input', 'select', 'textarea', 'label')) MATERIALIZED arrayDistinct(extractAll(elements_chain, '(?:^|;)(a|button|form|input|select|textarea|label)(?:\.|$|:)')) , INDEX `minmax_$group_0` `$group_0` TYPE minmax GRANULARITY 1 , INDEX `minmax_$group_1` `$group_1` TYPE minmax GRANULARITY 1 , INDEX `minmax_$group_2` `$group_2` TYPE minmax GRANULARITY 1 @@ -2188,6 +2180,10 @@ , INDEX `minmax_$group_4` `$group_4` TYPE minmax GRANULARITY 1 , INDEX `minmax_$window_id` `$window_id` TYPE minmax GRANULARITY 1 , INDEX `minmax_$session_id` `$session_id` TYPE minmax GRANULARITY 1 + , elements_chain_href String MATERIALIZED extract(elements_chain, '(?::|")href="(.*?)"') + , elements_chain_texts Array(String) MATERIALIZED arrayDistinct(extractAll(elements_chain, '(?::|")text="(.*?)"')) + , elements_chain_ids Array(String) MATERIALIZED arrayDistinct(extractAll(elements_chain, '(?::|")attr_id="(.*?)"')) + , elements_chain_elements Array(Enum('a', 'button', 'form', 'input', 'select', 'textarea', 'label')) MATERIALIZED arrayDistinct(extractAll(elements_chain, '(?:^|;)(a|button|form|input|select|textarea|label)(?:\.|$|:)')) , _timestamp DateTime @@ -3271,10 +3267,6 @@ , $group_4 VARCHAR MATERIALIZED replaceRegexpAll(JSONExtractRaw(properties, '$group_4'), '^"|"$', '') COMMENT 'column_materializer::$group_4' , $window_id VARCHAR MATERIALIZED replaceRegexpAll(JSONExtractRaw(properties, '$window_id'), '^"|"$', '') COMMENT 'column_materializer::$window_id' , $session_id VARCHAR MATERIALIZED replaceRegexpAll(JSONExtractRaw(properties, '$session_id'), '^"|"$', '') COMMENT 'column_materializer::$session_id' - , elements_chain_href String MATERIALIZED extract(elements_chain, '(?::|")href="(.*?)"') - , elements_chain_texts Array(String) MATERIALIZED arrayDistinct(extractAll(elements_chain, '(?::|")text="(.*?)"')) - , elements_chain_ids Array(String) MATERIALIZED arrayDistinct(extractAll(elements_chain, '(?::|")attr_id="(.*?)"')) - , elements_chain_elements Array(Enum('a', 'button', 'form', 'input', 'select', 'textarea', 'label')) MATERIALIZED arrayDistinct(extractAll(elements_chain, '(?:^|;)(a|button|form|input|select|textarea|label)(?:\.|$|:)')) , INDEX `minmax_$group_0` `$group_0` TYPE minmax GRANULARITY 1 , INDEX `minmax_$group_1` `$group_1` TYPE minmax GRANULARITY 1 , INDEX `minmax_$group_2` `$group_2` TYPE minmax GRANULARITY 1 @@ -3282,6 +3274,10 @@ , INDEX `minmax_$group_4` `$group_4` TYPE minmax GRANULARITY 1 , INDEX `minmax_$window_id` `$window_id` TYPE minmax GRANULARITY 1 , INDEX `minmax_$session_id` `$session_id` TYPE minmax GRANULARITY 1 + , elements_chain_href String MATERIALIZED extract(elements_chain, '(?::|")href="(.*?)"') + , elements_chain_texts Array(String) MATERIALIZED arrayDistinct(extractAll(elements_chain, '(?::|")text="(.*?)"')) + , elements_chain_ids Array(String) MATERIALIZED arrayDistinct(extractAll(elements_chain, '(?::|")attr_id="(.*?)"')) + , elements_chain_elements Array(Enum('a', 'button', 'form', 'input', 'select', 'textarea', 'label')) MATERIALIZED arrayDistinct(extractAll(elements_chain, '(?:^|;)(a|button|form|input|select|textarea|label)(?:\.|$|:)')) , _timestamp DateTime diff --git a/posthog/hogql/database/schema/events.py b/posthog/hogql/database/schema/events.py index a0225bd334b2d..1a489e5a9d90f 100644 --- a/posthog/hogql/database/schema/events.py +++ b/posthog/hogql/database/schema/events.py @@ -3,7 +3,6 @@ StringDatabaseField, DateTimeDatabaseField, StringJSONDatabaseField, - StringArrayDatabaseField, IntegerDatabaseField, Table, LazyJoin, @@ -115,10 +114,6 @@ class EventsTable(Table): join_table=SessionsTableV1(), join_function=join_events_table_to_sessions_table, ), - "elements_chain_href": StringDatabaseField(name="elements_chain_href"), - "elements_chain_texts": StringArrayDatabaseField(name="elements_chain_texts"), - "elements_chain_ids": StringArrayDatabaseField(name="elements_chain_ids"), - "elements_chain_elements": StringArrayDatabaseField(name="elements_chain_elements"), } def to_printed_clickhouse(self, context): diff --git a/posthog/hogql/database/test/__snapshots__/test_database.ambr b/posthog/hogql/database/test/__snapshots__/test_database.ambr index 77a2452922e7e..ce03813aa1004 100644 --- a/posthog/hogql/database/test/__snapshots__/test_database.ambr +++ b/posthog/hogql/database/test/__snapshots__/test_database.ambr @@ -377,42 +377,6 @@ "schema_valid": true, "table": "sessions", "type": "lazy_table" - }, - "elements_chain_href": { - "chain": null, - "fields": null, - "hogql_value": "elements_chain_href", - "name": "elements_chain_href", - "schema_valid": true, - "table": null, - "type": "string" - }, - "elements_chain_texts": { - "chain": null, - "fields": null, - "hogql_value": "elements_chain_texts", - "name": "elements_chain_texts", - "schema_valid": true, - "table": null, - "type": "array" - }, - "elements_chain_ids": { - "chain": null, - "fields": null, - "hogql_value": "elements_chain_ids", - "name": "elements_chain_ids", - "schema_valid": true, - "table": null, - "type": "array" - }, - "elements_chain_elements": { - "chain": null, - "fields": null, - "hogql_value": "elements_chain_elements", - "name": "elements_chain_elements", - "schema_valid": true, - "table": null, - "type": "array" } }, "id": "events", @@ -748,11 +712,7 @@ "group_3", "$group_4", "group_4", - "session", - "elements_chain_href", - "elements_chain_texts", - "elements_chain_ids", - "elements_chain_elements" + "session" ], "hogql_value": "events", "name": "events", @@ -1734,42 +1694,6 @@ "schema_valid": true, "table": "sessions", "type": "lazy_table" - }, - "elements_chain_href": { - "chain": null, - "fields": null, - "hogql_value": "elements_chain_href", - "name": "elements_chain_href", - "schema_valid": true, - "table": null, - "type": "string" - }, - "elements_chain_texts": { - "chain": null, - "fields": null, - "hogql_value": "elements_chain_texts", - "name": "elements_chain_texts", - "schema_valid": true, - "table": null, - "type": "array" - }, - "elements_chain_ids": { - "chain": null, - "fields": null, - "hogql_value": "elements_chain_ids", - "name": "elements_chain_ids", - "schema_valid": true, - "table": null, - "type": "array" - }, - "elements_chain_elements": { - "chain": null, - "fields": null, - "hogql_value": "elements_chain_elements", - "name": "elements_chain_elements", - "schema_valid": true, - "table": null, - "type": "array" } }, "id": "events", @@ -2105,11 +2029,7 @@ "group_3", "$group_4", "group_4", - "session", - "elements_chain_href", - "elements_chain_texts", - "elements_chain_ids", - "elements_chain_elements" + "session" ], "hogql_value": "events", "name": "events", diff --git a/posthog/hogql/property.py b/posthog/hogql/property.py index 560a694fbf564..52236b64fb6dd 100644 --- a/posthog/hogql/property.py +++ b/posthog/hogql/property.py @@ -1,4 +1,5 @@ -from typing import Literal, Optional, cast +import re +from typing import Literal, Optional, Union, cast from pydantic import BaseModel @@ -21,28 +22,16 @@ Team, ) from posthog.models.event import Selector -from posthog.models.element import Element -from posthog.models.property import PropertyGroup, ValueT +from posthog.models.property import PropertyGroup from posthog.models.property.util import build_selector_regex from posthog.models.property_definition import PropertyType from posthog.schema import ( + EmptyPropertyFilter, FilterLogicalOperator, PropertyGroupFilter, PropertyGroupFilterValue, PropertyOperator, RetentionEntity, - EventPropertyFilter, - PersonPropertyFilter, - ElementPropertyFilter, - SessionPropertyFilter, - CohortPropertyFilter, - RecordingPropertyFilter, - GroupPropertyFilter, - FeaturePropertyFilter, - HogQLPropertyFilter, - EmptyPropertyFilter, - DataWarehousePropertyFilter, - DataWarehousePersonPropertyFilter, ) from posthog.warehouse.models import DataWarehouseJoin, DataWarehouseSavedQuery, DataWarehouseTable from posthog.utils import get_from_dict_or_attr @@ -78,194 +67,8 @@ def visit_call(self, node: ast.Call): self.visit(arg) -def _handle_bool_values(value: ValueT, field: ast.Field, property: Property, team: Team) -> ValueT | bool: - if value != "true" and value != "false": - return value - if property.type == "person": - property_types = PropertyDefinition.objects.filter( - team=team, - name=property.key, - type=PropertyDefinition.Type.PERSON, - ) - elif property.type == "group": - property_types = PropertyDefinition.objects.filter( - team=team, - name=property.key, - type=PropertyDefinition.Type.GROUP, - group_type_index=property.group_type_index, - ) - elif property.type == "data_warehouse_person_property": - key = field.chain[-2] - - # TODO: pass id of table item being filtered on instead of searching through joins - current_join: DataWarehouseJoin | None = ( - DataWarehouseJoin.objects.filter(Q(deleted__isnull=True) | Q(deleted=False)) - .filter(team=team, source_table_name="persons", field_name=key) - .first() - ) - - if not current_join: - raise Exception(f"Could not find join for key {key}") - - prop_type = None - - maybe_view = ( - DataWarehouseSavedQuery.objects.filter(Q(deleted__isnull=True) | Q(deleted=False)) - .filter(team=team, name=current_join.joining_table_name) - .first() - ) - - if maybe_view: - prop_type_dict = maybe_view.columns.get(property.key, None) - prop_type = prop_type_dict.get("hogql") - - maybe_table = ( - DataWarehouseTable.objects.filter(Q(deleted__isnull=True) | Q(deleted=False)) - .filter(team=team, name=current_join.joining_table_name) - .first() - ) - - if maybe_table: - prop_type_dict = maybe_table.columns.get(property.key, None) - prop_type = prop_type_dict.get("hogql") - - if not maybe_view and not maybe_table: - raise Exception(f"Could not find table or view for key {key}") - - if prop_type == "BooleanDatabaseField": - if value == "true": - value = True - if value == "false": - value = False - - return value - - else: - property_types = PropertyDefinition.objects.filter( - team=team, - name=property.key, - type=PropertyDefinition.Type.EVENT, - ) - property_type = property_types[0].property_type if len(property_types) > 0 else None - - if property_type == PropertyType.Boolean: - if value == "true": - return True - if value == "false": - return False - return value - - -def _field_to_compare_op( - field: ast.Field, value: ValueT, operator: PropertyOperator, property: Property, is_json_field: bool, team: Team -) -> ast.Expr: - if operator == PropertyOperator.IS_SET: - return ast.CompareOperation( - op=ast.CompareOperationOp.NotEq, - left=field, - right=ast.Constant(value=None), - ) - elif operator == PropertyOperator.IS_NOT_SET: - return ast.Or( - exprs=[ - ast.CompareOperation( - op=ast.CompareOperationOp.Eq, - left=field, - right=ast.Constant(value=None), - ) - ] - + ( - [ - ast.Not( - expr=ast.Call( - name="JSONHas", - args=[ast.Field(chain=field.chain[:-1]), ast.Constant(value=property.key)], - ) - ) - ] - if is_json_field - else [] - ) - ) - elif operator == PropertyOperator.ICONTAINS: - return ast.CompareOperation( - op=ast.CompareOperationOp.ILike, - left=field, - right=ast.Constant(value=f"%{value}%"), - ) - elif operator == PropertyOperator.NOT_ICONTAINS: - return ast.CompareOperation( - op=ast.CompareOperationOp.NotILike, - left=field, - right=ast.Constant(value=f"%{value}%"), - ) - elif operator == PropertyOperator.REGEX: - return ast.Call( - name="ifNull", - args=[ - ast.Call(name="match", args=[ast.Call(name="toString", args=[field]), ast.Constant(value=value)]), - ast.Constant(value=0), - ], - ) - elif operator == PropertyOperator.NOT_REGEX: - return ast.Call( - name="ifNull", - args=[ - ast.Call( - name="not", - args=[ - ast.Call( - name="match", args=[ast.Call(name="toString", args=[field]), ast.Constant(value=value)] - ) - ], - ), - ast.Constant(value=1), - ], - ) - elif operator == PropertyOperator.EXACT or operator == PropertyOperator.IS_DATE_EXACT: - return ast.CompareOperation( - op=ast.CompareOperationOp.Eq, - left=field, - right=ast.Constant(value=_handle_bool_values(value, field, property, team)), - ) - elif operator == PropertyOperator.IS_NOT: - return ast.CompareOperation( - op=ast.CompareOperationOp.NotEq, - left=field, - right=ast.Constant(value=_handle_bool_values(value, field, property, team)), - ) - elif operator == PropertyOperator.LT or operator == PropertyOperator.IS_DATE_BEFORE: - return ast.CompareOperation(op=ast.CompareOperationOp.Lt, left=field, right=ast.Constant(value=value)) - elif operator == PropertyOperator.GT or operator == PropertyOperator.IS_DATE_AFTER: - return ast.CompareOperation(op=ast.CompareOperationOp.Gt, left=field, right=ast.Constant(value=value)) - elif operator == PropertyOperator.LTE: - return ast.CompareOperation(op=ast.CompareOperationOp.LtEq, left=field, right=ast.Constant(value=value)) - elif operator == PropertyOperator.GTE: - return ast.CompareOperation(op=ast.CompareOperationOp.GtEq, left=field, right=ast.Constant(value=value)) - else: - raise NotImplementedError(f"PropertyOperator {operator} not implemented") - - def property_to_expr( - property: list - | dict - | PropertyGroup - | PropertyGroupFilter - | PropertyGroupFilterValue - | Property - | ast.Expr - | EventPropertyFilter - | PersonPropertyFilter - | ElementPropertyFilter - | SessionPropertyFilter - | CohortPropertyFilter - | RecordingPropertyFilter - | GroupPropertyFilter - | FeaturePropertyFilter - | HogQLPropertyFilter - | EmptyPropertyFilter - | DataWarehousePropertyFilter - | DataWarehousePersonPropertyFilter, + property: Union[BaseModel, PropertyGroup, Property, dict, list, ast.Expr], team: Team, scope: Literal["event", "person", "session", "replay", "replay_entity", "replay_pdi"] = "event", ) -> ast.Expr: @@ -367,11 +170,16 @@ def property_to_expr( else: chain = ["properties"] + if property.type == "session": + properties_field = None + else: + properties_field = ast.Field(chain=chain) + field = ast.Field(chain=[*chain, property.key]) if isinstance(value, list): if len(value) == 0: - return ast.Constant(value=1) + return ast.Constant(value=True) elif len(value) == 1: value = value[0] else: @@ -398,14 +206,164 @@ def property_to_expr( return ast.And(exprs=exprs) return ast.Or(exprs=exprs) - return _field_to_compare_op( - field=field, - value=value, - operator=operator, - team=team, - property=property, - is_json_field=property.type != "session", - ) + if operator == PropertyOperator.IS_SET: + return ast.CompareOperation( + op=ast.CompareOperationOp.NotEq, + left=field, + right=ast.Constant(value=None), + ) + elif operator == PropertyOperator.IS_NOT_SET: + return ast.Or( + exprs=[ + ast.CompareOperation( + op=ast.CompareOperationOp.Eq, + left=field, + right=ast.Constant(value=None), + ) + ] + + ( + [] + if not properties_field or properties_field == field + else [ + ast.Not( + expr=ast.Call( + name="JSONHas", + args=[properties_field, ast.Constant(value=property.key)], + ) + ) + ] + ) + ) + elif operator == PropertyOperator.ICONTAINS: + return ast.CompareOperation( + op=ast.CompareOperationOp.ILike, + left=field, + right=ast.Constant(value=f"%{value}%"), + ) + elif operator == PropertyOperator.NOT_ICONTAINS: + return ast.CompareOperation( + op=ast.CompareOperationOp.NotILike, + left=field, + right=ast.Constant(value=f"%{value}%"), + ) + elif operator == PropertyOperator.REGEX: + return ast.Call( + name="ifNull", + args=[ + ast.Call(name="match", args=[ast.Call(name="toString", args=[field]), ast.Constant(value=value)]), + ast.Constant(value=0), + ], + ) + elif operator == PropertyOperator.NOT_REGEX: + return ast.Call( + name="ifNull", + args=[ + ast.Call( + name="not", + args=[ + ast.Call( + name="match", args=[ast.Call(name="toString", args=[field]), ast.Constant(value=value)] + ) + ], + ), + ast.Constant(value=1), + ], + ) + elif operator == PropertyOperator.EXACT or operator == PropertyOperator.IS_DATE_EXACT: + op = ast.CompareOperationOp.Eq + elif operator == PropertyOperator.IS_NOT: + op = ast.CompareOperationOp.NotEq + elif operator == PropertyOperator.LT or operator == PropertyOperator.IS_DATE_BEFORE: + op = ast.CompareOperationOp.Lt + elif operator == PropertyOperator.GT or operator == PropertyOperator.IS_DATE_AFTER: + op = ast.CompareOperationOp.Gt + elif operator == PropertyOperator.LTE: + op = ast.CompareOperationOp.LtEq + elif operator == PropertyOperator.GTE: + op = ast.CompareOperationOp.GtEq + else: + raise NotImplementedError(f"PropertyOperator {operator} not implemented") + + # For Boolean and untyped properties, treat "true" and "false" as boolean values + if ( + (op == ast.CompareOperationOp.Eq or op == ast.CompareOperationOp.NotEq) + and team is not None + and (value == "true" or value == "false") + ): + if property.type == "person": + property_types = PropertyDefinition.objects.filter( + team=team, + name=property.key, + type=PropertyDefinition.Type.PERSON, + ) + elif property.type == "group": + property_types = PropertyDefinition.objects.filter( + team=team, + name=property.key, + type=PropertyDefinition.Type.GROUP, + group_type_index=property.group_type_index, + ) + elif property.type == "data_warehouse_person_property": + key = chain[-1] + + # TODO: pass id of table item being filtered on instead of searching through joins + current_join: DataWarehouseJoin | None = ( + DataWarehouseJoin.objects.filter(Q(deleted__isnull=True) | Q(deleted=False)) + .filter(team=team, source_table_name="persons", field_name=key) + .first() + ) + + if not current_join: + raise Exception(f"Could not find join for key {key}") + + prop_type = None + + maybe_view = ( + DataWarehouseSavedQuery.objects.filter(Q(deleted__isnull=True) | Q(deleted=False)) + .filter(team=team, name=current_join.joining_table_name) + .first() + ) + + if maybe_view: + prop_type_dict = maybe_view.columns.get(property.key, None) + prop_type = prop_type_dict.get("hogql") + + maybe_table = ( + DataWarehouseTable.objects.filter(Q(deleted__isnull=True) | Q(deleted=False)) + .filter(team=team, name=current_join.joining_table_name) + .first() + ) + + if maybe_table: + prop_type_dict = maybe_table.columns.get(property.key, None) + prop_type = prop_type_dict.get("hogql") + + if not maybe_view and not maybe_table: + raise Exception(f"Could not find table or view for key {key}") + + if prop_type == "BooleanDatabaseField": + if value == "true": + value = True + if value == "false": + value = False + + return ast.CompareOperation(op=op, left=field, right=ast.Constant(value=value)) + + else: + property_types = PropertyDefinition.objects.filter( + team=team, + name=property.key, + type=PropertyDefinition.Type.EVENT, + ) + property_type = property_types[0].property_type if len(property_types) > 0 else None + + if property_type == PropertyType.Boolean: + if value == "true": + value = True + if value == "false": + value = False + + return ast.CompareOperation(op=op, left=field, right=ast.Constant(value=value)) elif property.type == "element": if scope == "person": @@ -449,34 +407,10 @@ def property_to_expr( return expr if property.key == "href": - return parse_expr( - "arrayExists(href -> {compare}, elements_chain_hrefs)", - { - "compare": _field_to_compare_op( - field=ast.Field(chain=["href"]), - value=value, - operator=operator, - team=team, - property=property, - is_json_field=False, - ) - }, - ) + return element_chain_key_filter("href", str(value), operator) if property.key == "text": - return parse_expr( - "arrayExists(text -> {compare}, elements_chain_texts)", - { - "compare": _field_to_compare_op( - field=ast.Field(chain=["text"]), - value=value, - operator=operator, - team=team, - property=property, - is_json_field=False, - ) - }, - ) + return element_chain_key_filter("text", str(value), operator) raise NotImplementedError(f"property_to_expr for type element not implemented for key {property.key}") elif property.type == "cohort" or property.type == "static-cohort" or property.type == "precalculated-cohort": @@ -515,49 +449,21 @@ def action_to_expr(action: Action) -> ast.Expr: exprs.append(tag_name_to_expr(step.tag_name)) if step.href is not None: if step.href_matching == "regex": - exprs.append( - ast.CompareOperation( - op=ast.CompareOperationOp.Regex, - left=ast.Field(chain=["elements_chain_href"]), - right=ast.Constant(value=step.href), - ) - ) + operator = PropertyOperator.REGEX elif step.href_matching == "contains": - exprs.append( - ast.CompareOperation( - op=ast.CompareOperationOp.ILike, - left=ast.Field(chain=["elements_chain_href"]), - right=ast.Constant(value=f"%{step.href}%"), - ) - ) + operator = PropertyOperator.ICONTAINS else: - exprs.append( - ast.CompareOperation( - op=ast.CompareOperationOp.Eq, - left=ast.Field(chain=["elements_chain_href"]), - right=ast.Constant(value=step.href), - ) - ) + operator = PropertyOperator.EXACT + exprs.append(element_chain_key_filter("href", step.href, operator)) if step.text is not None: - value = step.text if step.text_matching == "regex": - match = ast.CompareOperationOp.Regex + operator = PropertyOperator.REGEX elif step.text_matching == "contains": - match = ast.CompareOperationOp.ILike - value = f"%{value}%" + operator = PropertyOperator.ICONTAINS else: - match = ast.CompareOperationOp.Eq - - exprs.append( - parse_expr( - "arrayExists(x -> {match}, elements_chain_texts)", - { - "match": ast.CompareOperation( - op=match, left=ast.Field(chain=["x"]), right=ast.Constant(value=value) - ) - }, - ) - ) + operator = PropertyOperator.EXACT + exprs.append(element_chain_key_filter("text", step.text, operator)) + if step.url: if step.url_matching == "exact": expr = parse_expr( @@ -606,38 +512,45 @@ def entity_to_expr(entity: RetentionEntity) -> ast.Expr: ) +def element_chain_key_filter(key: str, text: str, operator: PropertyOperator): + escaped = text.replace('"', r"\"") + if operator == PropertyOperator.IS_SET or operator == PropertyOperator.IS_NOT_SET: + value = r'[^"]+' + elif operator == PropertyOperator.ICONTAINS or operator == PropertyOperator.NOT_ICONTAINS: + value = rf'[^"]*{re.escape(escaped)}[^"]*' + elif operator == PropertyOperator.REGEX or operator == PropertyOperator.NOT_REGEX: + value = escaped + elif operator == PropertyOperator.EXACT or operator == PropertyOperator.IS_NOT: + value = re.escape(escaped) + else: + raise NotImplementedError(f"element_href_to_expr not implemented for operator {operator}") + + regex = f'({key}="{value}")' + if operator == PropertyOperator.ICONTAINS or operator == PropertyOperator.NOT_ICONTAINS: + expr = parse_expr("elements_chain =~* {regex}", {"regex": ast.Constant(value=str(regex))}) + else: + expr = parse_expr("elements_chain =~ {regex}", {"regex": ast.Constant(value=str(regex))}) + + if ( + operator == PropertyOperator.IS_NOT_SET + or operator == PropertyOperator.NOT_ICONTAINS + or operator == PropertyOperator.IS_NOT + or operator == PropertyOperator.NOT_REGEX + ): + expr = ast.Call(name="not", args=[expr]) + return expr + + def tag_name_to_expr(tag_name: str): regex = rf"(^|;){tag_name}(\.|$|;|:)" expr = parse_expr("elements_chain =~ {regex}", {"regex": ast.Constant(value=str(regex))}) return expr -def selector_to_expr(selector_string: str): - selector = Selector(selector_string, escape_slashes=False) - exprs = [] - regex = build_selector_regex(selector) - exprs.append(parse_expr("elements_chain =~ {regex}", {"regex": ast.Constant(value=regex)})) - - useful_elements: list[ast.Expr] = [] - for part in selector.parts: - if "tag_name" in part.data: - if part.data["tag_name"] in Element.USEFUL_ELEMENTS: - useful_elements.append(ast.Constant(value=part.data["tag_name"])) - - if "attr_id" in part.data: - exprs.append( - parse_expr( - "indexOf(elements_chain_ids, {value}) > 0", {"value": ast.Constant(value=part.data["attr_id"])} - ) - ) - if len(useful_elements) > 0: - exprs.append( - parse_expr("hasAll(elements_chain_elements, {value})", {"value": ast.Array(exprs=useful_elements)}) - ) - - if len(exprs) == 1: - return exprs[0] - return ast.And(exprs=exprs) +def selector_to_expr(selector: str): + regex = build_selector_regex(Selector(selector, escape_slashes=False)) + expr = parse_expr("elements_chain =~ {regex}", {"regex": ast.Constant(value=regex)}) + return expr def get_property_type(property): diff --git a/posthog/hogql/test/__snapshots__/test_resolver.ambr b/posthog/hogql/test/__snapshots__/test_resolver.ambr index 9907d46ee7f89..d7d042252c55d 100644 --- a/posthog/hogql/test/__snapshots__/test_resolver.ambr +++ b/posthog/hogql/test/__snapshots__/test_resolver.ambr @@ -32,10 +32,6 @@ created_at: {}, distinct_id: {}, elements_chain: {}, - elements_chain_elements: {}, - elements_chain_href: {}, - elements_chain_ids: {}, - elements_chain_texts: {}, event: {}, goe_0: {}, goe_1: {}, @@ -124,34 +120,6 @@ table_type: } }, - elements_chain_elements: { - alias: "elements_chain_elements" - type: { - name: "elements_chain_elements" - table_type: - } - }, - elements_chain_href: { - alias: "elements_chain_href" - type: { - name: "elements_chain_href" - table_type: - } - }, - elements_chain_ids: { - alias: "elements_chain_ids" - type: { - name: "elements_chain_ids" - table_type: - } - }, - elements_chain_texts: { - alias: "elements_chain_texts" - type: { - name: "elements_chain_texts" - table_type: - } - }, event: { alias: "event" type: { @@ -414,74 +382,6 @@ alias: "$group_4" type: } - }, - { - alias: "elements_chain_href" - expr: { - chain: [ - "elements_chain_href" - ] - type: { - name: "elements_chain_href" - table_type: - } - } - hidden: True - type: { - alias: "elements_chain_href" - type: - } - }, - { - alias: "elements_chain_texts" - expr: { - chain: [ - "elements_chain_texts" - ] - type: { - name: "elements_chain_texts" - table_type: - } - } - hidden: True - type: { - alias: "elements_chain_texts" - type: - } - }, - { - alias: "elements_chain_ids" - expr: { - chain: [ - "elements_chain_ids" - ] - type: { - name: "elements_chain_ids" - table_type: - } - } - hidden: True - type: { - alias: "elements_chain_ids" - type: - } - }, - { - alias: "elements_chain_elements" - expr: { - chain: [ - "elements_chain_elements" - ] - type: { - name: "elements_chain_elements" - table_type: - } - } - hidden: True - type: { - alias: "elements_chain_elements" - type: - } } ] select_from: { @@ -640,50 +540,6 @@ } hidden: True type: - }, - { - alias: "elements_chain_href" - expr: { - chain: [ - "elements_chain_href" - ] - type: - } - hidden: True - type: - }, - { - alias: "elements_chain_texts" - expr: { - chain: [ - "elements_chain_texts" - ] - type: - } - hidden: True - type: - }, - { - alias: "elements_chain_ids" - expr: { - chain: [ - "elements_chain_ids" - ] - type: - } - hidden: True - type: - }, - { - alias: "elements_chain_elements" - expr: { - chain: [ - "elements_chain_elements" - ] - type: - } - hidden: True - type: } ] select_from: { @@ -715,10 +571,6 @@ created_at: , distinct_id: , elements_chain: , - elements_chain_elements: , - elements_chain_href: , - elements_chain_ids: , - elements_chain_texts: , event: , properties: , timestamp: , @@ -755,10 +607,6 @@ created_at: {}, distinct_id: {}, elements_chain: {}, - elements_chain_elements: {}, - elements_chain_href: {}, - elements_chain_ids: {}, - elements_chain_texts: {}, event: {}, goe_0: {}, goe_1: {}, @@ -1011,74 +859,6 @@ alias: "$group_4" type: } - }, - { - alias: "elements_chain_href" - expr: { - chain: [ - "elements_chain_href" - ] - type: { - name: "elements_chain_href" - table_type: - } - } - hidden: True - type: { - alias: "elements_chain_href" - type: - } - }, - { - alias: "elements_chain_texts" - expr: { - chain: [ - "elements_chain_texts" - ] - type: { - name: "elements_chain_texts" - table_type: - } - } - hidden: True - type: { - alias: "elements_chain_texts" - type: - } - }, - { - alias: "elements_chain_ids" - expr: { - chain: [ - "elements_chain_ids" - ] - type: { - name: "elements_chain_ids" - table_type: - } - } - hidden: True - type: { - alias: "elements_chain_ids" - type: - } - }, - { - alias: "elements_chain_elements" - expr: { - chain: [ - "elements_chain_elements" - ] - type: { - name: "elements_chain_elements" - table_type: - } - } - hidden: True - type: { - alias: "elements_chain_elements" - type: - } } ] select_from: { @@ -1104,10 +884,6 @@ created_at: , distinct_id: , elements_chain: , - elements_chain_elements: , - elements_chain_href: , - elements_chain_ids: , - elements_chain_texts: , event: , properties: , timestamp: , @@ -1154,10 +930,6 @@ created_at: {}, distinct_id: {}, elements_chain: {}, - elements_chain_elements: {}, - elements_chain_href: {}, - elements_chain_ids: {}, - elements_chain_texts: {}, event: {}, goe_0: {}, goe_1: {}, @@ -1246,34 +1018,6 @@ table_type: } }, - elements_chain_elements: { - alias: "elements_chain_elements" - type: { - name: "elements_chain_elements" - table_type: - } - }, - elements_chain_href: { - alias: "elements_chain_href" - type: { - name: "elements_chain_href" - table_type: - } - }, - elements_chain_ids: { - alias: "elements_chain_ids" - type: { - name: "elements_chain_ids" - table_type: - } - }, - elements_chain_texts: { - alias: "elements_chain_texts" - type: { - name: "elements_chain_texts" - table_type: - } - }, event: { alias: "event" type: { @@ -1536,74 +1280,6 @@ alias: "$group_4" type: } - }, - { - alias: "elements_chain_href" - expr: { - chain: [ - "elements_chain_href" - ] - type: { - name: "elements_chain_href" - table_type: - } - } - hidden: True - type: { - alias: "elements_chain_href" - type: - } - }, - { - alias: "elements_chain_texts" - expr: { - chain: [ - "elements_chain_texts" - ] - type: { - name: "elements_chain_texts" - table_type: - } - } - hidden: True - type: { - alias: "elements_chain_texts" - type: - } - }, - { - alias: "elements_chain_ids" - expr: { - chain: [ - "elements_chain_ids" - ] - type: { - name: "elements_chain_ids" - table_type: - } - } - hidden: True - type: { - alias: "elements_chain_ids" - type: - } - }, - { - alias: "elements_chain_elements" - expr: { - chain: [ - "elements_chain_elements" - ] - type: { - name: "elements_chain_elements" - table_type: - } - } - hidden: True - type: { - alias: "elements_chain_elements" - type: - } } ] select_from: { @@ -1764,50 +1440,6 @@ } hidden: True type: - }, - { - alias: "elements_chain_href" - expr: { - chain: [ - "elements_chain_href" - ] - type: - } - hidden: True - type: - }, - { - alias: "elements_chain_texts" - expr: { - chain: [ - "elements_chain_texts" - ] - type: - } - hidden: True - type: - }, - { - alias: "elements_chain_ids" - expr: { - chain: [ - "elements_chain_ids" - ] - type: - } - hidden: True - type: - }, - { - alias: "elements_chain_elements" - expr: { - chain: [ - "elements_chain_elements" - ] - type: - } - hidden: True - type: } ] select_from: { @@ -1844,10 +1476,6 @@ created_at: {}, distinct_id: {}, elements_chain: {}, - elements_chain_elements: {}, - elements_chain_href: {}, - elements_chain_ids: {}, - elements_chain_texts: {}, event: {}, goe_0: {}, goe_1: {}, @@ -2099,74 +1727,6 @@ alias: "$group_4" type: } - }, - { - alias: "elements_chain_href" - expr: { - chain: [ - "elements_chain_href" - ] - type: { - name: "elements_chain_href" - table_type: - } - } - hidden: True - type: { - alias: "elements_chain_href" - type: - } - }, - { - alias: "elements_chain_texts" - expr: { - chain: [ - "elements_chain_texts" - ] - type: { - name: "elements_chain_texts" - table_type: - } - } - hidden: True - type: { - alias: "elements_chain_texts" - type: - } - }, - { - alias: "elements_chain_ids" - expr: { - chain: [ - "elements_chain_ids" - ] - type: { - name: "elements_chain_ids" - table_type: - } - } - hidden: True - type: { - alias: "elements_chain_ids" - type: - } - }, - { - alias: "elements_chain_elements" - expr: { - chain: [ - "elements_chain_elements" - ] - type: { - name: "elements_chain_elements" - table_type: - } - } - hidden: True - type: { - alias: "elements_chain_elements" - type: - } } ] select_from: { @@ -2192,10 +1752,6 @@ created_at: , distinct_id: , elements_chain: , - elements_chain_elements: , - elements_chain_href: , - elements_chain_ids: , - elements_chain_texts: , event: , properties: , timestamp: , @@ -2233,10 +1789,6 @@ created_at: , distinct_id: , elements_chain: , - elements_chain_elements: , - elements_chain_href: , - elements_chain_ids: , - elements_chain_texts: , event: , properties: , timestamp: , @@ -2485,10 +2037,6 @@ created_at: {}, distinct_id: {}, elements_chain: {}, - elements_chain_elements: {}, - elements_chain_href: {}, - elements_chain_ids: {}, - elements_chain_texts: {}, event: {}, goe_0: {}, goe_1: {}, @@ -2740,74 +2288,6 @@ alias: "$group_4" type: } - }, - { - alias: "elements_chain_href" - expr: { - chain: [ - "elements_chain_href" - ] - type: { - name: "elements_chain_href" - table_type: - } - } - hidden: True - type: { - alias: "elements_chain_href" - type: - } - }, - { - alias: "elements_chain_texts" - expr: { - chain: [ - "elements_chain_texts" - ] - type: { - name: "elements_chain_texts" - table_type: - } - } - hidden: True - type: { - alias: "elements_chain_texts" - type: - } - }, - { - alias: "elements_chain_ids" - expr: { - chain: [ - "elements_chain_ids" - ] - type: { - name: "elements_chain_ids" - table_type: - } - } - hidden: True - type: { - alias: "elements_chain_ids" - type: - } - }, - { - alias: "elements_chain_elements" - expr: { - chain: [ - "elements_chain_elements" - ] - type: { - name: "elements_chain_elements" - table_type: - } - } - hidden: True - type: { - alias: "elements_chain_elements" - type: - } } ] select_from: { @@ -2833,10 +2313,6 @@ created_at: , distinct_id: , elements_chain: , - elements_chain_elements: , - elements_chain_href: , - elements_chain_ids: , - elements_chain_texts: , event: , properties: , timestamp: , @@ -2877,10 +2353,6 @@ created_at: {}, distinct_id: {}, elements_chain: {}, - elements_chain_elements: {}, - elements_chain_href: {}, - elements_chain_ids: {}, - elements_chain_texts: {}, event: {}, goe_0: {}, goe_1: {}, @@ -3133,74 +2605,6 @@ alias: "$group_4" type: } - }, - { - alias: "elements_chain_href" - expr: { - chain: [ - "elements_chain_href" - ] - type: { - name: "elements_chain_href" - table_type: - } - } - hidden: True - type: { - alias: "elements_chain_href" - type: - } - }, - { - alias: "elements_chain_texts" - expr: { - chain: [ - "elements_chain_texts" - ] - type: { - name: "elements_chain_texts" - table_type: - } - } - hidden: True - type: { - alias: "elements_chain_texts" - type: - } - }, - { - alias: "elements_chain_ids" - expr: { - chain: [ - "elements_chain_ids" - ] - type: { - name: "elements_chain_ids" - table_type: - } - } - hidden: True - type: { - alias: "elements_chain_ids" - type: - } - }, - { - alias: "elements_chain_elements" - expr: { - chain: [ - "elements_chain_elements" - ] - type: { - name: "elements_chain_elements" - table_type: - } - } - hidden: True - type: { - alias: "elements_chain_elements" - type: - } } ] select_from: { @@ -3227,10 +2631,6 @@ created_at: , distinct_id: , elements_chain: , - elements_chain_elements: , - elements_chain_href: , - elements_chain_ids: , - elements_chain_texts: , event: , properties: , timestamp: , @@ -3271,10 +2671,6 @@ created_at: {}, distinct_id: {}, elements_chain: {}, - elements_chain_elements: {}, - elements_chain_href: {}, - elements_chain_ids: {}, - elements_chain_texts: {}, event: {}, goe_0: {}, goe_1: {}, @@ -3374,10 +2770,6 @@ created_at: {}, distinct_id: {}, elements_chain: {}, - elements_chain_elements: {}, - elements_chain_href: {}, - elements_chain_ids: {}, - elements_chain_texts: {}, event: {}, goe_0: {}, goe_1: {}, @@ -3757,10 +3149,6 @@ created_at: {}, distinct_id: {}, elements_chain: {}, - elements_chain_elements: {}, - elements_chain_href: {}, - elements_chain_ids: {}, - elements_chain_texts: {}, event: {}, goe_0: {}, goe_1: {}, @@ -3894,10 +3282,6 @@ created_at: {}, distinct_id: {}, elements_chain: {}, - elements_chain_elements: {}, - elements_chain_href: {}, - elements_chain_ids: {}, - elements_chain_texts: {}, event: {}, goe_0: {}, goe_1: {}, @@ -4035,10 +3419,6 @@ created_at: {}, distinct_id: {}, elements_chain: {}, - elements_chain_elements: {}, - elements_chain_href: {}, - elements_chain_ids: {}, - elements_chain_texts: {}, event: {}, goe_0: {}, goe_1: {}, @@ -4212,10 +3592,6 @@ created_at: {}, distinct_id: {}, elements_chain: {}, - elements_chain_elements: {}, - elements_chain_href: {}, - elements_chain_ids: {}, - elements_chain_texts: {}, event: {}, goe_0: {}, goe_1: {}, @@ -4394,10 +3770,6 @@ created_at: {}, distinct_id: {}, elements_chain: {}, - elements_chain_elements: {}, - elements_chain_href: {}, - elements_chain_ids: {}, - elements_chain_texts: {}, event: {}, goe_0: {}, goe_1: {}, @@ -4529,10 +3901,6 @@ created_at: {}, distinct_id: {}, elements_chain: {}, - elements_chain_elements: {}, - elements_chain_href: {}, - elements_chain_ids: {}, - elements_chain_texts: {}, event: {}, goe_0: {}, goe_1: {}, @@ -4665,10 +4033,6 @@ created_at: {}, distinct_id: {}, elements_chain: {}, - elements_chain_elements: {}, - elements_chain_href: {}, - elements_chain_ids: {}, - elements_chain_texts: {}, event: {}, goe_0: {}, goe_1: {}, @@ -4788,10 +4152,6 @@ created_at: {}, distinct_id: {}, elements_chain: {}, - elements_chain_elements: {}, - elements_chain_href: {}, - elements_chain_ids: {}, - elements_chain_texts: {}, event: {}, goe_0: {}, goe_1: {}, @@ -4998,10 +4358,6 @@ created_at: {}, distinct_id: {}, elements_chain: {}, - elements_chain_elements: {}, - elements_chain_href: {}, - elements_chain_ids: {}, - elements_chain_texts: {}, event: {}, goe_0: {}, goe_1: {}, @@ -5096,10 +4452,6 @@ created_at: {}, distinct_id: {}, elements_chain: {}, - elements_chain_elements: {}, - elements_chain_href: {}, - elements_chain_ids: {}, - elements_chain_texts: {}, event: {}, goe_0: {}, goe_1: {}, @@ -5206,10 +4558,6 @@ created_at: {}, distinct_id: {}, elements_chain: {}, - elements_chain_elements: {}, - elements_chain_href: {}, - elements_chain_ids: {}, - elements_chain_texts: {}, event: {}, goe_0: {}, goe_1: {}, diff --git a/posthog/hogql/test/test_property.py b/posthog/hogql/test/test_property.py index 359401b52fcda..19b48e652808b 100644 --- a/posthog/hogql/test/test_property.py +++ b/posthog/hogql/test/test_property.py @@ -6,6 +6,7 @@ from posthog.hogql.parser import parse_expr from posthog.hogql.property import ( action_to_expr, + element_chain_key_filter, has_aggregation, property_to_expr, selector_to_expr, @@ -22,7 +23,7 @@ ) from posthog.models.property import PropertyGroup from posthog.models.property_definition import PropertyType -from posthog.schema import HogQLPropertyFilter, RetentionEntity, EmptyPropertyFilter +from posthog.schema import HogQLPropertyFilter, PropertyOperator, RetentionEntity, EmptyPropertyFilter from posthog.test.base import BaseTest from posthog.warehouse.models import DataWarehouseTable, DataWarehouseJoin, DataWarehouseCredential @@ -340,7 +341,7 @@ def test_property_to_expr_element(self): "operator": "exact", } ), - self._parse_expr("arrayExists(href -> href = 'href-text.', elements_chain_hrefs)"), + clear_locations(element_chain_key_filter("href", "href-text.", PropertyOperator.EXACT)), ) self.assertEqual( self._property_to_expr( @@ -351,9 +352,7 @@ def test_property_to_expr_element(self): "operator": "regex", } ), - self._parse_expr( - "arrayExists(text -> ifNull(match(toString(text), 'text-text.'), false), elements_chain_texts)" - ), + clear_locations(element_chain_key_filter("text", "text-text.", PropertyOperator.REGEX)), ) def test_property_groups(self): @@ -471,14 +470,7 @@ def test_selector_to_expr(self): self.assertEqual( self._selector_to_expr("a[href='boo']"), clear_locations( - parse_expr( - "{regex} and hasAll(elements_chain_elements, ['a'])", - { - "regex": elements_chain_match( - '(^|;)a.*?href="boo".*?([-_a-zA-Z0-9\\.:"= ]*?)?($|;|:([^;^\\s]*(;|$|\\s)))' - ) - }, - ) + elements_chain_match('(^|;)a.*?href="boo".*?([-_a-zA-Z0-9\\.:"= ]*?)?($|;|:([^;^\\s]*(;|$|\\s)))') ), ) self.assertEqual( @@ -490,26 +482,14 @@ def test_selector_to_expr(self): self.assertEqual( self._selector_to_expr("#withid"), clear_locations( - parse_expr( - """{regex} and indexOf(elements_chain_ids, 'withid') > 0""", - { - "regex": elements_chain_match( - '(^|;).*?attr_id="withid".*?([-_a-zA-Z0-9\\.:"= ]*?)?($|;|:([^;^\\s]*(;|$|\\s)))' - ) - }, - ) + elements_chain_match('(^|;).*?attr_id="withid".*?([-_a-zA-Z0-9\\.:"= ]*?)?($|;|:([^;^\\s]*(;|$|\\s)))') ), ) self.assertEqual( self._selector_to_expr("#with-dashed-id"), clear_locations( - parse_expr( - """{regex} and indexOf(elements_chain_ids, 'with-dashed-id') > 0""", - { - "regex": elements_chain_match( - '(^|;).*?attr_id="with\\-dashed\\-id".*?([-_a-zA-Z0-9\\.:"= ]*?)?($|;|:([^;^\\s]*(;|$|\\s)))' - ) - }, + elements_chain_match( + '(^|;).*?attr_id="with\\-dashed\\-id".*?([-_a-zA-Z0-9\\.:"= ]*?)?($|;|:([^;^\\s]*(;|$|\\s)))' ) ), ) @@ -520,17 +500,46 @@ def test_selector_to_expr(self): self.assertEqual( self._selector_to_expr("#with\\slashed\\id"), clear_locations( - parse_expr( - "{regex} and indexOf(elements_chain_ids, 'with\\\\slashed\\\\id') > 0", - { - "regex": elements_chain_match( - '(^|;).*?attr_id="with\\\\slashed\\\\id".*?([-_a-zA-Z0-9\\.:"= ]*?)?($|;|:([^;^\\s]*(;|$|\\s)))' - ) - }, + elements_chain_match( + '(^|;).*?attr_id="with\\\\slashed\\\\id".*?([-_a-zA-Z0-9\\.:"= ]*?)?($|;|:([^;^\\s]*(;|$|\\s)))' ) ), ) + def test_elements_chain_key_filter(self): + self.assertEqual( + clear_locations(element_chain_key_filter("href", "boo..", PropertyOperator.IS_SET)), + clear_locations(elements_chain_match('(href="[^"]+")')), + ) + self.assertEqual( + clear_locations(element_chain_key_filter("href", "boo..", PropertyOperator.IS_NOT_SET)), + clear_locations(not_call(elements_chain_match('(href="[^"]+")'))), + ) + self.assertEqual( + clear_locations(element_chain_key_filter("href", "boo..", PropertyOperator.ICONTAINS)), + clear_locations(elements_chain_imatch('(href="[^"]*boo\\.\\.[^"]*")')), + ) + self.assertEqual( + clear_locations(element_chain_key_filter("href", "boo..", PropertyOperator.NOT_ICONTAINS)), + clear_locations(not_call(elements_chain_imatch('(href="[^"]*boo\\.\\.[^"]*")'))), + ) + self.assertEqual( + clear_locations(element_chain_key_filter("href", "boo..", PropertyOperator.REGEX)), + clear_locations(elements_chain_match('(href="boo..")')), + ) + self.assertEqual( + clear_locations(element_chain_key_filter("href", "boo..", PropertyOperator.NOT_REGEX)), + clear_locations(not_call(elements_chain_match('(href="boo..")'))), + ) + self.assertEqual( + clear_locations(element_chain_key_filter("href", "boo..", PropertyOperator.EXACT)), + clear_locations(elements_chain_match('(href="boo\\.\\.")')), + ) + self.assertEqual( + clear_locations(element_chain_key_filter("href", "boo..", PropertyOperator.IS_NOT)), + clear_locations(not_call(elements_chain_match('(href="boo\\.\\.")'))), + ) + def test_action_to_expr(self): action1 = Action.objects.create( team=self.team, @@ -538,27 +547,19 @@ def test_action_to_expr(self): { "event": "$autocapture", "selector": "a.nav-link.active", + "tag_name": "a", } ], ) self.assertEqual( clear_locations(action_to_expr(action1)), self._parse_expr( - "event = '$autocapture' and {regex1}", + "event = '$autocapture' and elements_chain =~ {regex1} and elements_chain =~ {regex2}", { - "regex1": ast.And( - exprs=[ - self._parse_expr( - "elements_chain =~ {regex}", - { - "regex": ast.Constant( - value='(^|;)a.*?\\.active\\..*?nav\\-link([-_a-zA-Z0-9\\.:"= ]*?)?($|;|:([^;^\\s]*(;|$|\\s)))' - ) - }, - ), - self._parse_expr("hasAll(elements_chain_elements, ['a'])"), - ] + "regex1": ast.Constant( + value='(^|;)a.*?\\.active\\..*?nav\\-link([-_a-zA-Z0-9\\.:"= ]*?)?($|;|:([^;^\\s]*(;|$|\\s)))' ), + "regex2": ast.Constant(value="(^|;)a(\\.|$|;|:)"), }, ), ) @@ -610,24 +611,6 @@ def test_action_to_expr(self): self._parse_expr("event = '$pageview' OR true"), # All events just resolve to "true" ) - action5 = Action.objects.create( - team=self.team, - steps_json=[{"event": "$autocapture", "href": "https://example4.com", "href_matching": "regex"}], - ) - self.assertEqual( - clear_locations(action_to_expr(action5)), - self._parse_expr("event = '$autocapture' and elements_chain_href =~ 'https://example4.com'"), - ) - - action6 = Action.objects.create( - team=self.team, - steps_json=[{"event": "$autocapture", "text": "blabla", "text_matching": "regex"}], - ) - self.assertEqual( - clear_locations(action_to_expr(action6)), - self._parse_expr("event = '$autocapture' and arrayExists(x -> x =~ 'blabla', elements_chain_texts)"), - ) - def test_cohort_filter_static(self): cohort = Cohort.objects.create( team=self.team, diff --git a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr index d733159ba5ee0..9822999809a62 100644 --- a/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr +++ b/posthog/hogql_queries/insights/funnels/test/__snapshots__/test_funnel.ambr @@ -317,9 +317,9 @@ if(not(empty(e__override.distinct_id)), e__override.person_id, e.person_id) AS aggregation_target, if(equals(e.event, 'user signed up'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(and(equals(e.event, '$autocapture'), match(e.elements_chain, '(^|;)button(\\.|$|;|:)'), arrayExists(x -> ifNull(equals(x, 'Pay $10'), 0), e.elements_chain_texts)), 1, 0) AS step_1, + if(and(equals(e.event, '$autocapture'), match(e.elements_chain, '(^|;)button(\\.|$|;|:)'), match(e.elements_chain, '(text="Pay\\ \\$10")')), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1, - if(and(equals(e.event, '$autocapture'), match(e.elements_chain, '(^|;)a(\\.|$|;|:)'), equals(e.elements_chain_href, '/movie')), 1, 0) AS step_2, + if(and(equals(e.event, '$autocapture'), match(e.elements_chain, '(^|;)a(\\.|$|;|:)'), match(e.elements_chain, '(href="/movie")')), 1, 0) AS step_2, if(ifNull(equals(step_2, 1), 0), timestamp, NULL) AS latest_2 FROM events AS e LEFT OUTER JOIN diff --git a/posthog/models/event/sql.py b/posthog/models/event/sql.py index 8219bece6572b..33e7d2a2c3062 100644 --- a/posthog/models/event/sql.py +++ b/posthog/models/event/sql.py @@ -65,10 +65,6 @@ , $group_4 VARCHAR MATERIALIZED {trim_quotes_expr("JSONExtractRaw(properties, '$group_4')")} COMMENT 'column_materializer::$group_4' , $window_id VARCHAR MATERIALIZED {trim_quotes_expr("JSONExtractRaw(properties, '$window_id')")} COMMENT 'column_materializer::$window_id' , $session_id VARCHAR MATERIALIZED {trim_quotes_expr("JSONExtractRaw(properties, '$session_id')")} COMMENT 'column_materializer::$session_id' - , elements_chain_href String MATERIALIZED extract(elements_chain, '(?::|\")href="(.*?)"') - , elements_chain_texts Array(String) MATERIALIZED arrayDistinct(extractAll(elements_chain, '(?::|\")text="(.*?)"')) - , elements_chain_ids Array(String) MATERIALIZED arrayDistinct(extractAll(elements_chain, '(?::|\")attr_id="(.*?)"')) - , elements_chain_elements Array(Enum('a', 'button', 'form', 'input', 'select', 'textarea', 'label')) MATERIALIZED arrayDistinct(extractAll(elements_chain, '(?:^|;)(a|button|form|input|select|textarea|label)(?:\\.|$|:)')) , INDEX `minmax_$group_0` `$group_0` TYPE minmax GRANULARITY 1 , INDEX `minmax_$group_1` `$group_1` TYPE minmax GRANULARITY 1 , INDEX `minmax_$group_2` `$group_2` TYPE minmax GRANULARITY 1 @@ -76,6 +72,10 @@ , INDEX `minmax_$group_4` `$group_4` TYPE minmax GRANULARITY 1 , INDEX `minmax_$window_id` `$window_id` TYPE minmax GRANULARITY 1 , INDEX `minmax_$session_id` `$session_id` TYPE minmax GRANULARITY 1 + , elements_chain_href String MATERIALIZED extract(elements_chain, '(?::|\")href="(.*?)"') + , elements_chain_texts Array(String) MATERIALIZED arrayDistinct(extractAll(elements_chain, '(?::|\")text="(.*?)"')) + , elements_chain_ids Array(String) MATERIALIZED arrayDistinct(extractAll(elements_chain, '(?::|\")attr_id="(.*?)"')) + , elements_chain_elements Array(Enum('a', 'button', 'form', 'input', 'select', 'textarea', 'label')) MATERIALIZED arrayDistinct(extractAll(elements_chain, '(?:^|;)(a|button|form|input|select|textarea|label)(?:\\.|$|:)')) """ EVENTS_TABLE_PROXY_MATERIALIZED_COLUMNS = """ @@ -86,10 +86,6 @@ , $group_4 VARCHAR COMMENT 'column_materializer::$group_4' , $window_id VARCHAR COMMENT 'column_materializer::$window_id' , $session_id VARCHAR COMMENT 'column_materializer::$session_id' - , elements_chain_href String COMMENT 'column_materializer::elements_chain::href' - , elements_chain_texts Array(String) COMMENT 'column_materializer::elements_chain::texts' - , elements_chain_ids Array(String) COMMENT 'column_materializer::elements_chain::ids' - , elements_chain_elements Array(Enum('a', 'button', 'form', 'input', 'select', 'textarea', 'label')) COMMENT 'column_materializer::elements_chain::elements' """ EVENTS_DATA_TABLE_ENGINE = lambda: ReplacingMergeTree( diff --git a/posthog/test/base.py b/posthog/test/base.py index 6f23e9d8fbc05..6d1d608da930a 100644 --- a/posthog/test/base.py +++ b/posthog/test/base.py @@ -5,7 +5,6 @@ import threading import time import uuid -import unittest from collections.abc import Callable, Generator from contextlib import contextmanager from functools import wraps @@ -101,10 +100,6 @@ persons_ordering_int: int = 1 -# Expand string diffs -unittest.util._MAX_LENGTH = 2000 # type: ignore - - def _setup_test_data(klass): klass.organization = Organization.objects.create(name=klass.CONFIG_ORGANIZATION_NAME) klass.project, klass.team = Project.objects.create_with_team(