From 455030350516aec830dda5e821eea993f5c33b2f Mon Sep 17 00:00:00 2001 From: Robbie Date: Wed, 18 Oct 2023 08:14:41 +0100 Subject: [PATCH] fix(hogql): Fix boolean is not set (#18040) --- posthog/hogql/property.py | 8 +- posthog/hogql/test/test_property.py | 4 +- .../insights/test/test_events_query.py | 105 ++++++++++++++++++ posthog/hogql_queries/query_runner.py | 2 +- 4 files changed, 115 insertions(+), 4 deletions(-) create mode 100644 posthog/hogql_queries/insights/test/test_events_query.py diff --git a/posthog/hogql/property.py b/posthog/hogql/property.py index 293005bce98224..c0341461e1293c 100644 --- a/posthog/hogql/property.py +++ b/posthog/hogql/property.py @@ -130,11 +130,17 @@ def property_to_expr( chain = ["person", "properties"] if property.type == "person" and scope != "person" else ["properties"] field = ast.Field(chain=chain + [property.key]) + properties_field = ast.Field(chain=chain) if operator == PropertyOperator.is_set: return ast.CompareOperation(op=ast.CompareOperationOp.NotEq, left=field, right=ast.Constant(value=None)) elif operator == PropertyOperator.is_not_set: - return ast.CompareOperation(op=ast.CompareOperationOp.Eq, left=field, right=ast.Constant(value=None)) + return ast.Or( + exprs=[ + ast.CompareOperation(op=ast.CompareOperationOp.Eq, left=field, right=ast.Constant(value=None)), + ast.Not(expr=ast.Call(name="JSONHas", args=[properties_field, ast.Constant(value=property.key)])), + ] + ) elif operator == PropertyOperator.icontains: return ast.CompareOperation( op=ast.CompareOperationOp.ILike, diff --git a/posthog/hogql/test/test_property.py b/posthog/hogql/test/test_property.py index d56a74b59ba705..1e575898056453 100644 --- a/posthog/hogql/test/test_property.py +++ b/posthog/hogql/test/test_property.py @@ -63,11 +63,11 @@ def test_property_to_expr_event(self): ) self.assertEqual( self._property_to_expr({"type": "event", "key": "a", "value": "b", "operator": "is_set"}), - self._parse_expr("properties.a is not null"), + self._parse_expr("properties.a != NULL"), ) self.assertEqual( + self._parse_expr("properties.a = NULL OR (NOT JSONHas(properties, 'a'))"), self._property_to_expr({"type": "event", "key": "a", "value": "b", "operator": "is_not_set"}), - self._parse_expr("properties.a is null"), ) self.assertEqual( self._property_to_expr({"type": "event", "key": "a", "value": "b", "operator": "exact"}), diff --git a/posthog/hogql_queries/insights/test/test_events_query.py b/posthog/hogql_queries/insights/test/test_events_query.py new file mode 100644 index 00000000000000..707891d424a419 --- /dev/null +++ b/posthog/hogql_queries/insights/test/test_events_query.py @@ -0,0 +1,105 @@ +from typing import Tuple, Any + +from freezegun import freeze_time + +from posthog.hogql_queries.events_query_runner import EventsQueryRunner +from posthog.schema import ( + EventsQuery, + EventPropertyFilter, + PropertyOperator, +) +from posthog.test.base import APIBaseTest, ClickhouseTestMixin, _create_event, _create_person + + +class TestEventsQueryRunner(ClickhouseTestMixin, APIBaseTest): + maxDiff = None + + def _create_events(self, data: list[Tuple[str, str, Any]], event="$pageview"): + person_result = [] + for distinct_id, timestamp, event_properties in data: + with freeze_time(timestamp): + person_result.append( + _create_person( + team_id=self.team.pk, + distinct_ids=[distinct_id], + properties={ + "name": distinct_id, + }, + ) + ) + _create_event( + team=self.team, + event=event, + distinct_id=distinct_id, + timestamp=timestamp, + properties=event_properties, + ) + return person_result + + def _create_boolean_field_test_events(self): + self._create_events( + data=[ + ( + "p_true", + "2020-01-11T12:00:01Z", + {"boolean_field": True}, + ), + ( + "p_false", + "2020-01-11T12:00:02Z", + {"boolean_field": False}, + ), + ( + "p_notset", + "2020-01-11T12:00:04Z", + {}, + ), + ( + "p_null", + "2020-01-11T12:00:04Z", + {"boolean_field": None}, + ), + ] + ) + + def _run_boolean_field_query(self, filter: EventPropertyFilter): + with freeze_time("2020-01-11T12:01:00"): + query = EventsQuery( + after="-24h", + event="$pageview", + kind="EventsQuery", + orderBy=["timestamp ASC"], + select=["*"], + properties=[filter], + ) + + runner = EventsQueryRunner(query=query, team=self.team) + return runner.run().results + + def test_is_not_set_boolean(self): + # see https://github.com/PostHog/posthog/issues/18030 + self._create_boolean_field_test_events() + results = self._run_boolean_field_query( + EventPropertyFilter( + type="event", + key="boolean_field", + operator=PropertyOperator.is_not_set, + value=PropertyOperator.is_not_set, + ) + ) + + self.assertEqual({"p_notset", "p_null"}, set(row[0]["distinct_id"] for row in results)) + + def test_is_set_boolean(self): + self._create_boolean_field_test_events() + + results = self._run_boolean_field_query( + EventPropertyFilter( + type="event", + key="boolean_field", + operator=PropertyOperator.is_set, + value=PropertyOperator.is_set, + ) + ) + + self.assertEqual({"p_true", "p_false"}, set(row[0]["distinct_id"] for row in results)) diff --git a/posthog/hogql_queries/query_runner.py b/posthog/hogql_queries/query_runner.py index 49f7ca369fdc5b..d89202883607ae 100644 --- a/posthog/hogql_queries/query_runner.py +++ b/posthog/hogql_queries/query_runner.py @@ -48,7 +48,7 @@ class QueryResponse(BaseModel, Generic[DataT]): ) results: DataT timings: Optional[List[QueryTiming]] = None - types: Optional[List[Tuple[str, str]]] = None + types: Optional[List[Union[Tuple[str, str], str]]] = None columns: Optional[List[str]] = None hogql: Optional[str] = None hasMore: Optional[bool] = None