From 77e0185f1a5968689a81b2f16f5d5fb15c33db24 Mon Sep 17 00:00:00 2001 From: Neil Kakkar Date: Mon, 6 Nov 2023 13:20:31 +0000 Subject: [PATCH 1/3] Revert "revert: "fix(flags): Do the expected for numeric comparisons" (#18413)" This reverts commit 30789e6e86fed70d7ecabcd80af99f56e6f940c2. --- ee/clickhouse/models/test/test_filters.py | 36 ++ posthog/models/feature_flag/flag_matching.py | 43 +- posthog/models/filters/test/test_filter.py | 88 ++-- posthog/queries/base.py | 56 ++- posthog/queries/test/test_base.py | 21 +- .../test/__snapshots__/test_feature_flag.ambr | 52 ++ posthog/test/base.py | 7 +- posthog/test/test_feature_flag.py | 443 ++++++++++++++++++ 8 files changed, 693 insertions(+), 53 deletions(-) diff --git a/ee/clickhouse/models/test/test_filters.py b/ee/clickhouse/models/test/test_filters.py index 26ff79c565c4e..def3ca4493fe1 100644 --- a/ee/clickhouse/models/test/test_filters.py +++ b/ee/clickhouse/models/test/test_filters.py @@ -610,6 +610,42 @@ def test_numerical(self): events = _filter_events(filter, self.team) self.assertEqual(events[0]["id"], event1_uuid) + def test_numerical_person_properties(self): + _create_person(team_id=self.team.pk, distinct_ids=["p1"], properties={"$a_number": 4}) + _create_person(team_id=self.team.pk, distinct_ids=["p2"], properties={"$a_number": 5}) + _create_person(team_id=self.team.pk, distinct_ids=["p3"], properties={"$a_number": 6}) + + filter = Filter( + data={ + "properties": [ + { + "type": "person", + "key": "$a_number", + "value": 4, + "operator": "gt", + } + ] + } + ) + self.assertEqual(len(_filter_persons(filter, self.team)), 2) + + filter = Filter(data={"properties": [{"type": "person", "key": "$a_number", "value": 5}]}) + self.assertEqual(len(_filter_persons(filter, self.team)), 1) + + filter = Filter( + data={ + "properties": [ + { + "type": "person", + "key": "$a_number", + "value": 6, + "operator": "lt", + } + ] + } + ) + self.assertEqual(len(_filter_persons(filter, self.team)), 2) + def test_contains(self): _create_event(team=self.team, distinct_id="test", event="$pageview") event2_uuid = _create_event( diff --git a/posthog/models/feature_flag/flag_matching.py b/posthog/models/feature_flag/flag_matching.py index fe57b171204e8..df9a83df7b682 100644 --- a/posthog/models/feature_flag/flag_matching.py +++ b/posthog/models/feature_flag/flag_matching.py @@ -10,7 +10,7 @@ from django.db import DatabaseError, IntegrityError, OperationalError from django.db.models.expressions import ExpressionWrapper, RawSQL from django.db.models.fields import BooleanField -from django.db.models import Q +from django.db.models import Q, Func, F, CharField from django.db.models.query import QuerySet from sentry_sdk.api import capture_exception, start_span from posthog.metrics import LABEL_TEAM_ID @@ -396,6 +396,13 @@ def condition_eval(key, condition): annotate_query = True nonlocal person_query + property_list = Filter(data=condition).property_groups.flat + properties_with_math_operators = [ + key_and_field_for_property(prop) + for prop in property_list + if prop.operator in ["gt", "lt", "gte", "lte"] + ] + if len(condition.get("properties", {})) > 0: # Feature Flags don't support OR filtering yet target_properties = self.property_value_overrides @@ -404,8 +411,9 @@ def condition_eval(key, condition): self.cache.group_type_index_to_name[feature_flag.aggregation_group_type_index], {}, ) + expr = properties_to_Q( - Filter(data=condition).property_groups.flat, + property_list, override_property_values=target_properties, cohorts_cache=self.cohorts_cache, using_database=DATABASE_FOR_FLAG_MATCHING, @@ -428,13 +436,24 @@ def condition_eval(key, condition): if annotate_query: if feature_flag.aggregation_group_type_index is None: + # :TRICKY: Flag matching depends on type of property when doing >, <, >=, <= comparisons. + # This requires a generated field to query in Q objects, which sadly don't allow inlining fields, + # hence we need to annotate the query here, even though these annotations are used much deeper, + # in properties_to_q, in empty_or_null_with_value_q + # These need to come in before the expr so they're available to use inside the expr. + # Same holds for the group queries below. + type_property_annotations = { + prop_key: Func(F(prop_field), function="JSONB_TYPEOF", output_field=CharField()) + for prop_key, prop_field in properties_with_math_operators + } person_query = person_query.annotate( + **type_property_annotations, **{ key: ExpressionWrapper( expr if expr else RawSQL("true", []), output_field=BooleanField(), - ) - } + ), + }, ) person_fields.append(key) else: @@ -445,13 +464,18 @@ def condition_eval(key, condition): group_query, group_fields, ) = group_query_per_group_type_mapping[feature_flag.aggregation_group_type_index] + type_property_annotations = { + prop_key: Func(F(prop_field), function="JSONB_TYPEOF", output_field=CharField()) + for prop_key, prop_field in properties_with_math_operators + } group_query = group_query.annotate( + **type_property_annotations, **{ key: ExpressionWrapper( expr if expr else RawSQL("true", []), output_field=BooleanField(), ) - } + }, ) group_fields.append(key) group_query_per_group_type_mapping[feature_flag.aggregation_group_type_index] = ( @@ -881,3 +905,12 @@ def parse_exception_for_error_message(err: Exception): reason = "query_wait_timeout" return reason + + +def key_and_field_for_property(property: Property) -> Tuple[str, str]: + column = "group_properties" if property.type == "group" else "properties" + key = property.key + return ( + f"{column}_{key}_type", + f"{column}__{key}", + ) diff --git a/posthog/models/filters/test/test_filter.py b/posthog/models/filters/test/test_filter.py index 84b70bbd4d837..72ad460b91084 100644 --- a/posthog/models/filters/test/test_filter.py +++ b/posthog/models/filters/test/test_filter.py @@ -2,7 +2,7 @@ import json from typing import Any, Callable, Dict, List, Optional, cast -from django.db.models import Q +from django.db.models import Q, Func, F, CharField from posthog.constants import FILTER_TEST_ACCOUNTS from posthog.models import Cohort, Filter, Person, Team @@ -219,42 +219,6 @@ def test_incomplete_data(self): ) self.assertListEqual(filter.property_groups.values, []) - def test_numerical_person_properties(self): - person_factory(team_id=self.team.pk, distinct_ids=["p1"], properties={"$a_number": 4}) - person_factory(team_id=self.team.pk, distinct_ids=["p2"], properties={"$a_number": 5}) - person_factory(team_id=self.team.pk, distinct_ids=["p3"], properties={"$a_number": 6}) - - filter = Filter( - data={ - "properties": [ - { - "type": "person", - "key": "$a_number", - "value": 4, - "operator": "gt", - } - ] - } - ) - self.assertEqual(len(filter_persons(filter, self.team)), 2) - - filter = Filter(data={"properties": [{"type": "person", "key": "$a_number", "value": 5}]}) - self.assertEqual(len(filter_persons(filter, self.team)), 1) - - filter = Filter( - data={ - "properties": [ - { - "type": "person", - "key": "$a_number", - "value": 6, - "operator": "lt", - } - ] - } - ) - self.assertEqual(len(filter_persons(filter, self.team)), 2) - def test_contains_persons(self): person_factory( team_id=self.team.pk, @@ -819,6 +783,56 @@ def _filter_with_date_range( return Filter(data=data) + def test_numerical_person_properties(self): + _create_person(team_id=self.team.pk, distinct_ids=["p1"], properties={"$a_number": 4}) + _create_person(team_id=self.team.pk, distinct_ids=["p2"], properties={"$a_number": 5}) + _create_person(team_id=self.team.pk, distinct_ids=["p3"], properties={"$a_number": 6}) + _create_person(team_id=self.team.pk, distinct_ids=["p4"], properties={"$a_number": 14}) + + flush_persons_and_events() + + def filter_persons_with_annotation(filter: Filter, team: Team): + persons = Person.objects.annotate( + **{ + "properties_$a_number_type": Func( + F("properties__$a_number"), function="JSONB_TYPEOF", output_field=CharField() + ) + } + ).filter(properties_to_Q(filter.property_groups.flat)) + persons = persons.filter(team_id=team.pk) + return [str(uuid) for uuid in persons.values_list("uuid", flat=True)] + + filter = Filter( + data={ + "properties": [ + { + "type": "person", + "key": "$a_number", + "value": "4", + "operator": "gt", + } + ] + } + ) + self.assertEqual(len(filter_persons_with_annotation(filter, self.team)), 3) + + filter = Filter(data={"properties": [{"type": "person", "key": "$a_number", "value": 5}]}) + self.assertEqual(len(filter_persons_with_annotation(filter, self.team)), 1) + + filter = Filter( + data={ + "properties": [ + { + "type": "person", + "key": "$a_number", + "value": 6, + "operator": "lt", + } + ] + } + ) + self.assertEqual(len(filter_persons_with_annotation(filter, self.team)), 2) + def filter_persons_with_property_group( filter: Filter, team: Team, property_overrides: Dict[str, Any] = {} diff --git a/posthog/queries/base.py b/posthog/queries/base.py index 65ccd651d2c3c..00269a384b37f 100644 --- a/posthog/queries/base.py +++ b/posthog/queries/base.py @@ -12,7 +12,7 @@ ) from dateutil import parser -from django.db.models import Exists, OuterRef, Q +from django.db.models import Exists, OuterRef, Q, Value from rest_framework.exceptions import ValidationError from posthog.constants import PropertyOperatorType @@ -29,10 +29,10 @@ from posthog.queries.util import convert_to_datetime_aware from posthog.utils import get_compare_period_dates, is_valid_regex -F = TypeVar("F", Filter, PathFilter) +FilterType = TypeVar("FilterType", Filter, PathFilter) -def determine_compared_filter(filter: F) -> F: +def determine_compared_filter(filter: FilterType) -> FilterType: if not filter.date_to or not filter.date_from: raise ValidationError("You need date_from and date_to to compare") date_from, date_to = get_compare_period_dates( @@ -142,8 +142,34 @@ def compute_exact_match(value: ValueT, override_value: Any) -> bool: except re.error: return False - if operator == "gt": - return type(override_value) == type(value) and override_value > value + if operator in ("gt", "gte", "lt", "lte"): + # :TRICKY: We adjust comparison based on the override value passed in, + # to make sure we handle both numeric and string comparisons appropriately. + def compare(lhs, rhs, operator): + if operator == "gt": + return lhs > rhs + elif operator == "gte": + return lhs >= rhs + elif operator == "lt": + return lhs < rhs + elif operator == "lte": + return lhs <= rhs + else: + raise ValueError(f"Invalid operator: {operator}") + + parsed_value = None + try: + parsed_value = float(value) # type: ignore + except Exception: + pass + + if parsed_value is not None: + if isinstance(override_value, str): + return compare(override_value, str(value), operator) + else: + return compare(override_value, parsed_value, operator) + else: + return compare(str(override_value), str(value), operator) if operator == "gte": return type(override_value) == type(value) and override_value >= value @@ -207,7 +233,25 @@ def empty_or_null_with_value_q( f"{column}__{key}", value_as_coerced_to_number ) else: - target_filter = Q(**{f"{column}__{key}__{operator}": value}) + if isinstance(value, list): + raise TypeError(f"empty_or_null_with_value_q: Operator {operator} does not support list values") + + parsed_value = None + if operator in ("gt", "gte", "lt", "lte"): + try: + parsed_value = float(value) + except (ValueError, TypeError): + pass + + if parsed_value is not None: + # When we can coerce given value to a number, check whether the value in DB is a number + # and do a numeric comparison. Otherwise, do a string comparison. + target_filter = Q( + Q(**{f"{column}__{key}__{operator}": str(value), f"{column}_{key}_type": Value("string")}) + | Q(**{f"{column}__{key}__{operator}": parsed_value, f"{column}_{key}_type": Value("number")}) + ) + else: + target_filter = Q(**{f"{column}__{key}__{operator}": value}) query_filter = Q(target_filter & Q(**{f"{column}__has_key": key}) & ~Q(**{f"{column}__{key}": None})) diff --git a/posthog/queries/test/test_base.py b/posthog/queries/test/test_base.py index bccc9ca60a53e..91076d31aa4ff 100644 --- a/posthog/queries/test/test_base.py +++ b/posthog/queries/test/test_base.py @@ -154,7 +154,8 @@ def test_match_properties_math_operators(self): self.assertFalse(match_property(property_a, {"key": 0})) self.assertFalse(match_property(property_a, {"key": -1})) - self.assertFalse(match_property(property_a, {"key": "23"})) + # now we handle type mismatches so this should be true + self.assertTrue(match_property(property_a, {"key": "23"})) property_b = Property(key="key", value=1, operator="lt") self.assertTrue(match_property(property_b, {"key": 0})) @@ -171,16 +172,32 @@ def test_match_properties_math_operators(self): self.assertFalse(match_property(property_c, {"key": 0})) self.assertFalse(match_property(property_c, {"key": -1})) - self.assertFalse(match_property(property_c, {"key": "3"})) + # now we handle type mismatches so this should be true + self.assertTrue(match_property(property_c, {"key": "3"})) property_d = Property(key="key", value="43", operator="lt") self.assertTrue(match_property(property_d, {"key": "41"})) self.assertTrue(match_property(property_d, {"key": "42"})) + self.assertTrue(match_property(property_d, {"key": 42})) self.assertFalse(match_property(property_d, {"key": "43"})) self.assertFalse(match_property(property_d, {"key": "44"})) self.assertFalse(match_property(property_d, {"key": 44})) + property_e = Property(key="key", value="30", operator="lt") + self.assertTrue(match_property(property_e, {"key": "29"})) + + # depending on the type of override, we adjust type comparison + self.assertTrue(match_property(property_e, {"key": "100"})) + self.assertFalse(match_property(property_e, {"key": 100})) + + property_f = Property(key="key", value="123aloha", operator="gt") + self.assertFalse(match_property(property_f, {"key": "123"})) + self.assertFalse(match_property(property_f, {"key": 122})) + + # this turns into a string comparison + self.assertTrue(match_property(property_f, {"key": 129})) + def test_match_property_date_operators(self): property_a = Property(key="key", value="2022-05-01", operator="is_date_before") self.assertTrue(match_property(property_a, {"key": "2022-03-01"})) diff --git a/posthog/test/__snapshots__/test_feature_flag.ambr b/posthog/test/__snapshots__/test_feature_flag.ambr index f7a912fe972b7..a4200a05ffd09 100644 --- a/posthog/test/__snapshots__/test_feature_flag.ambr +++ b/posthog/test/__snapshots__/test_feature_flag.ambr @@ -345,6 +345,58 @@ AND "posthog_group"."group_type_index" = 2) ' --- +# name: TestFeatureFlagMatcher.test_numeric_operator_with_groups_and_person_flags + ' + SELECT "posthog_grouptypemapping"."id", + "posthog_grouptypemapping"."team_id", + "posthog_grouptypemapping"."group_type", + "posthog_grouptypemapping"."group_type_index", + "posthog_grouptypemapping"."name_singular", + "posthog_grouptypemapping"."name_plural" + FROM "posthog_grouptypemapping" + WHERE "posthog_grouptypemapping"."team_id" = 2 + ' +--- +# name: TestFeatureFlagMatcher.test_numeric_operator_with_groups_and_person_flags.1 + ' + SELECT (((("posthog_person"."properties" -> 'number') >= '"20"' + AND JSONB_TYPEOF(("posthog_person"."properties" -> 'number')) = 'string') + OR (("posthog_person"."properties" -> 'number') >= '20.0' + AND JSONB_TYPEOF(("posthog_person"."properties" -> 'number')) = 'number')) + AND "posthog_person"."properties" ? 'number' + AND NOT (("posthog_person"."properties" -> 'number') = 'null')) AS "flag_X_condition_0" + FROM "posthog_person" + INNER JOIN "posthog_persondistinctid" ON ("posthog_person"."id" = "posthog_persondistinctid"."person_id") + WHERE ("posthog_persondistinctid"."distinct_id" = '307' + AND "posthog_persondistinctid"."team_id" = 2 + AND "posthog_person"."team_id" = 2) + ' +--- +# name: TestFeatureFlagMatcher.test_numeric_operator_with_groups_and_person_flags.2 + ' + SELECT (((("posthog_group"."group_properties" -> 'number') > '"100"' + AND JSONB_TYPEOF(("posthog_group"."group_properties" -> 'number')) = 'string') + OR (("posthog_group"."group_properties" -> 'number') > '100.0' + AND JSONB_TYPEOF(("posthog_group"."group_properties" -> 'number')) = 'number')) + AND "posthog_group"."group_properties" ? 'number' + AND NOT (("posthog_group"."group_properties" -> 'number') = 'null')) AS "flag_X_condition_0" + FROM "posthog_group" + WHERE ("posthog_group"."team_id" = 2 + AND "posthog_group"."group_key" = 'foo' + AND "posthog_group"."group_type_index" = 0) + ' +--- +# name: TestFeatureFlagMatcher.test_numeric_operator_with_groups_and_person_flags.3 + ' + SELECT (("posthog_group"."group_properties" -> 'number') > '"100b2c"' + AND "posthog_group"."group_properties" ? 'number' + AND NOT (("posthog_group"."group_properties" -> 'number') = 'null')) AS "flag_X_condition_0" + FROM "posthog_group" + WHERE ("posthog_group"."team_id" = 2 + AND "posthog_group"."group_key" = 'foo-project' + AND "posthog_group"."group_type_index" = 1) + ' +--- # name: TestFeatureFlagMatcher.test_super_condition_matches_string ' SELECT ((("posthog_person"."properties" -> 'is_enabled') = 'true' diff --git a/posthog/test/base.py b/posthog/test/base.py index 105285fa3073e..3cd830a68a49d 100644 --- a/posthog/test/base.py +++ b/posthog/test/base.py @@ -460,13 +460,14 @@ def assertQueryMatchesSnapshot(self, query, params=None, replace_all_numbers=Fal if replace_all_numbers: query = re.sub(r"(\"?) = \d+", r"\1 = 2", query) query = re.sub(r"(\"?) IN \(\d+(, \d+)*\)", r"\1 IN (1, 2, 3, 4, 5 /* ... */)", query) - # feature flag conditions use primary keys as columns in queries, so replace those too - query = re.sub(r"flag_\d+_condition", r"flag_X_condition", query) - query = re.sub(r"flag_\d+_super_condition", r"flag_X_super_condition", query) else: query = re.sub(r"(team|cohort)_id(\"?) = \d+", r"\1_id\2 = 2", query) query = re.sub(r"\d+ as (team|cohort)_id(\"?)", r"2 as \1_id\2", query) + # feature flag conditions use primary keys as columns in queries, so replace those always + query = re.sub(r"flag_\d+_condition", r"flag_X_condition", query) + query = re.sub(r"flag_\d+_super_condition", r"flag_X_super_condition", query) + # hog ql checks team ids differently query = re.sub( r"equals\(([^.]+\.)?team_id?, \d+\)", diff --git a/posthog/test/test_feature_flag.py b/posthog/test/test_feature_flag.py index dbcda391f1f22..83bfe2583a482 100644 --- a/posthog/test/test_feature_flag.py +++ b/posthog/test/test_feature_flag.py @@ -4139,6 +4139,449 @@ def create_groups(self): def create_feature_flag(self, key="beta-feature", **kwargs): return FeatureFlag.objects.create(team=self.team, name="Beta feature", key=key, created_by=self.user, **kwargs) + def test_numeric_operator(self): + Person.objects.create( + team=self.team, + distinct_ids=["307"], + properties={"number": 30, "string_number": "30", "version": "1.24"}, + ) + + feature_flag1 = self.create_feature_flag( + key="random1", + filters={ + "groups": [ + { + "properties": [ + { + "key": "number", + "value": "100", + "operator": "gt", + "type": "person", + }, + ] + } + ] + }, + ) + + feature_flag2 = self.create_feature_flag( + key="random2", + filters={ + "groups": [ + { + "properties": [ + { + "key": "number", + "value": "100b2c", + "operator": "gt", + "type": "person", + }, + ] + } + ] + }, + ) + + feature_flag3 = self.create_feature_flag( + key="random3", + filters={ + "groups": [ + { + "properties": [ + { + "key": "number", + "value": "3.1x00b2c", + "operator": "gt", + "type": "person", + }, + ] + } + ] + }, + ) + + feature_flag4 = self.create_feature_flag( + key="random4", + filters={ + "groups": [ + { + "properties": [ + { + "key": "number", + "value": "20", + "operator": "gt", + "type": "person", + }, + ] + } + ] + }, + ) + + feature_flag5 = self.create_feature_flag( + key="random5", + filters={ + "groups": [ + { + "properties": [ + { + "key": "version", + "value": "1.05", + "operator": "gt", + "type": "person", + }, + ] + }, + { + "properties": [ + { + "key": "version", + "value": "1.15", + "operator": "gt", + "type": "person", + }, + ] + }, + { + "properties": [ + { + "key": "version", + "value": "1.1200", + "operator": "gt", + "type": "person", + }, + ] + }, + ] + }, + ) + + feature_flag6 = self.create_feature_flag( + key="random6", + filters={ + "groups": [ + { + "properties": [ + { + "key": "version", + "value": "1.206.0", + "operator": "lt", + "type": "person", + }, + ] + } + ] + }, + ) + + self.assertEqual( + self.match_flag(feature_flag1, "307"), + FeatureFlagMatch(False, None, FeatureFlagMatchReason.NO_CONDITION_MATCH, 0), + ) + self.assertEqual( + self.match_flag(feature_flag2, "307"), + FeatureFlagMatch(True, None, FeatureFlagMatchReason.CONDITION_MATCH, 0), + ) + self.assertEqual( + self.match_flag(feature_flag3, "307"), + FeatureFlagMatch(True, None, FeatureFlagMatchReason.CONDITION_MATCH, 0), + ) + self.assertEqual( + self.match_flag(feature_flag4, "307"), + FeatureFlagMatch(True, None, FeatureFlagMatchReason.CONDITION_MATCH, 0), + ) + + # even though we can parse as a number, only do string comparison + self.assertEqual( + self.match_flag(feature_flag5, "307"), + FeatureFlagMatch(True, None, FeatureFlagMatchReason.CONDITION_MATCH, 0), + ) + self.assertEqual( + self.match_flag(feature_flag6, "307"), + FeatureFlagMatch(False, None, FeatureFlagMatchReason.NO_CONDITION_MATCH, 0), + ) + + def test_numeric_operator_with_groups_and_person_flags(self): + Person.objects.create( + team=self.team, + distinct_ids=["307"], + properties={"number": 30, "string_number": "30", "version": "1.24"}, + ) + GroupTypeMapping.objects.create(team=self.team, group_type="organization", group_type_index=0) + GroupTypeMapping.objects.create(team=self.team, group_type="project", group_type_index=1) + + Group.objects.create( + team=self.team, + group_type_index=0, + group_key="foo", + group_properties={"name": "foo.inc", "number": 50, "string_number": "50"}, + version=1, + ) + Group.objects.create( + team=self.team, + group_type_index=1, + group_key="foo-project", + group_properties={"name": "foo-project", "number": 20, "string_number": "20"}, + version=1, + ) + + feature_flag1 = self.create_feature_flag( + key="random1", + filters={ + "aggregation_group_type_index": 0, + "groups": [ + { + "properties": [ + { + "key": "number", + "value": "100", + "operator": "gt", + "group_type_index": 0, + "type": "group", + }, + ] + } + ], + }, + ) + + feature_flag2 = self.create_feature_flag( + key="random2", + filters={ + "aggregation_group_type_index": 1, + "groups": [ + { + "properties": [ + { + "key": "number", + "value": "100b2c", + "operator": "gt", + "group_type_index": 1, + "type": "group", + }, + ] + } + ], + }, + ) + + feature_flag3 = self.create_feature_flag( + key="random3", + filters={ + "aggregation_group_type_index": 0, + "groups": [ + { + "properties": [ + { + "key": "number", + "value": "3.1x00b2c", + "operator": "gte", + "type": "person", + "group_type_index": 0, + "type": "group", + }, + ] + } + ], + }, + ) + + feature_flag4 = self.create_feature_flag( + key="random4", + filters={ + "aggregation_group_type_index": 0, + "groups": [ + { + "properties": [ + { + "key": "number", + "value": "20", + "operator": "gt", + "group_type_index": 0, + "type": "group", + }, + ] + } + ], + }, + ) + + feature_flag4_person = self.create_feature_flag( + key="random4_person", + filters={ + "groups": [ + { + "properties": [ + { + "key": "number", + "value": "20", + "operator": "gte", + "type": "person", + }, + ] + } + ] + }, + ) + + feature_flag5 = self.create_feature_flag( + key="random5", + filters={ + "groups": [ + { + "properties": [ + { + "key": "version", + "value": "1.05", + "operator": "gt", + "type": "person", + }, + ] + }, + { + "properties": [ + { + "key": "version", + "value": "1.15", + "operator": "gt", + "type": "person", + }, + ] + }, + { + "properties": [ + { + "key": "version", + "value": "1.1200", + "operator": "gte", + "type": "person", + }, + ] + }, + ] + }, + ) + + feature_flag6 = self.create_feature_flag( + key="random6", + filters={ + "aggregation_group_type_index": 0, + "groups": [ + { + "properties": [ + { + "key": "version", + "value": "1.206.0", + "operator": "lt", + "group_type_index": 0, + "type": "group", + }, + ] + } + ], + }, + ) + + self.assertEqual( + self.match_flag(feature_flag1, "307", groups={"organization": "foo", "project": "foo-project"}), + FeatureFlagMatch(False, None, FeatureFlagMatchReason.NO_CONDITION_MATCH, 0), + ) + self.assertEqual( + self.match_flag(feature_flag2, "307", groups={"organization": "foo", "project": "foo-project"}), + FeatureFlagMatch(True, None, FeatureFlagMatchReason.CONDITION_MATCH, 0), + ) + self.assertEqual( + self.match_flag(feature_flag3, "307", groups={"organization": "foo", "project": "foo-project"}), + FeatureFlagMatch(True, None, FeatureFlagMatchReason.CONDITION_MATCH, 0), + ) + self.assertEqual( + self.match_flag(feature_flag4, "307", groups={"organization": "foo", "project": "foo-project"}), + FeatureFlagMatch(True, None, FeatureFlagMatchReason.CONDITION_MATCH, 0), + ) + + # even though we can parse as a number, only do string comparison + self.assertEqual( + self.match_flag(feature_flag5, "307", groups={"organization": "foo", "project": "foo-project"}), + FeatureFlagMatch(True, None, FeatureFlagMatchReason.CONDITION_MATCH, 0), + ) + self.assertEqual( + self.match_flag(feature_flag6, "307", groups={"organization": "foo", "project": "foo-project"}), + FeatureFlagMatch(False, None, FeatureFlagMatchReason.NO_CONDITION_MATCH, 0), + ) + + # Make sure clashes on property name doesn't affect computation + with snapshot_postgres_queries_context(self, replace_all_numbers=False): + self.assertEqual( + FeatureFlagMatcher( + [feature_flag1, feature_flag2, feature_flag4_person], + "307", + groups={"organization": "foo", "project": "foo-project"}, + ).get_matches()[1], + { + "random1": { + "condition_index": 0, + "reason": FeatureFlagMatchReason.NO_CONDITION_MATCH, + }, + "random2": { + "condition_index": 0, + "reason": FeatureFlagMatchReason.CONDITION_MATCH, + }, + "random4_person": { + "condition_index": 0, + "reason": FeatureFlagMatchReason.CONDITION_MATCH, + }, + }, + ) + + # handle overrides in group properties + self.assertEqual( + self.match_flag( + feature_flag1, + "307", + groups={"organization": "foo", "project": "foo-project"}, + group_property_value_overrides={"organization": {"number": 200}, "project": {"number": 1}}, + ), + FeatureFlagMatch(True, None, FeatureFlagMatchReason.CONDITION_MATCH, 0), + ) + + # string '30' > string '100' (lexicographically) + self.assertEqual( + self.match_flag( + feature_flag1, + "307", + groups={"organization": "foo", "project": "foo-project"}, + group_property_value_overrides={"organization": {"number": "30"}, "project": {"number": 1}}, + ), + FeatureFlagMatch(True, None, FeatureFlagMatchReason.CONDITION_MATCH, 0), + ) + self.assertEqual( + self.match_flag( + feature_flag1, + "307", + groups={"organization": "foo", "project": "foo-project"}, + group_property_value_overrides={"organization": {"number": "01323"}, "project": {"number": 1}}, + ), + FeatureFlagMatch(False, None, FeatureFlagMatchReason.NO_CONDITION_MATCH, 0), + ) + self.assertEqual( + self.match_flag( + feature_flag1, + "307", + groups={"organization": "foo", "project": "foo-project"}, + group_property_value_overrides={"organization": {"number": 0}, "project": {"number": 1}}, + ), + FeatureFlagMatch(False, None, FeatureFlagMatchReason.NO_CONDITION_MATCH, 0), + ) + self.assertEqual( + self.match_flag( + feature_flag2, + "307", + groups={"organization": "foo", "project": "foo-project"}, + group_property_value_overrides={"organization": {"number": "0"}, "project": {"number": 19.999999}}, + ), + FeatureFlagMatch(True, None, FeatureFlagMatchReason.CONDITION_MATCH, 0), + ) + class TestFeatureFlagHashKeyOverrides(BaseTest, QueryMatchingTest): person: Person From e02352c7b6a615d30b6f853a9175ea79f1e13656 Mon Sep 17 00:00:00 2001 From: Neil Kakkar Date: Mon, 6 Nov 2023 15:33:54 +0000 Subject: [PATCH 2/3] fix issues, add more tests --- posthog/models/feature_flag/flag_matching.py | 31 +++- .../test/__snapshots__/test_filter.ambr | 30 ++++ posthog/models/filters/test/test_filter.py | 61 +++++++ posthog/queries/base.py | 22 +-- posthog/queries/test/test_base.py | 39 +++++ .../test/__snapshots__/test_feature_flag.ambr | 56 ++++++ posthog/test/test_feature_flag.py | 161 ++++++++++++++++++ 7 files changed, 377 insertions(+), 23 deletions(-) diff --git a/posthog/models/feature_flag/flag_matching.py b/posthog/models/feature_flag/flag_matching.py index df9a83df7b682..189e9801ceed8 100644 --- a/posthog/models/feature_flag/flag_matching.py +++ b/posthog/models/feature_flag/flag_matching.py @@ -3,7 +3,7 @@ from enum import Enum import time import structlog -from typing import Dict, List, Optional, Tuple, Union +from typing import Dict, List, Optional, Tuple, Union, cast from prometheus_client import Counter from django.conf import settings @@ -397,11 +397,9 @@ def condition_eval(key, condition): nonlocal person_query property_list = Filter(data=condition).property_groups.flat - properties_with_math_operators = [ - key_and_field_for_property(prop) - for prop in property_list - if prop.operator in ["gt", "lt", "gte", "lte"] - ] + properties_with_math_operators = get_all_properties_with_math_operators( + property_list, self.cohorts_cache + ) if len(condition.get("properties", {})) > 0: # Feature Flags don't support OR filtering yet @@ -914,3 +912,24 @@ def key_and_field_for_property(property: Property) -> Tuple[str, str]: f"{column}_{key}_type", f"{column}__{key}", ) + + +def get_all_properties_with_math_operators( + properties: List[Property], cohorts_cache: Dict[int, Cohort] +) -> List[Tuple[str, str]]: + all_keys_and_fields = [] + + for prop in properties: + if prop.type == "cohort": + cohort_id = int(cast(Union[str, int], prop.value)) + if cohorts_cache.get(cohort_id) is None: + cohorts_cache[cohort_id] = Cohort.objects.using(DATABASE_FOR_FLAG_MATCHING).get(pk=cohort_id) + cohort = cohorts_cache[cohort_id] + if cohort: + all_keys_and_fields.extend( + get_all_properties_with_math_operators(cohort.properties.flat, cohorts_cache) + ) + elif prop.operator in ["gt", "lt", "gte", "lte"] and prop.type in ("person", "group"): + all_keys_and_fields.append(key_and_field_for_property(prop)) + + return all_keys_and_fields diff --git a/posthog/models/filters/test/__snapshots__/test_filter.ambr b/posthog/models/filters/test/__snapshots__/test_filter.ambr index 2b65b1a00ce6f..961af14048237 100644 --- a/posthog/models/filters/test/__snapshots__/test_filter.ambr +++ b/posthog/models/filters/test/__snapshots__/test_filter.ambr @@ -306,6 +306,36 @@ ORDER BY "posthog_persondistinctid"."id" ASC ' --- +# name: TestDjangoPropertiesToQ.test_icontains_with_array_value + ' + SELECT "posthog_person"."uuid" + FROM "posthog_person" + WHERE (UPPER(("posthog_person"."properties" ->> '$key')::text) LIKE UPPER('%[''red'']%') + AND "posthog_person"."properties" ? '$key' + AND NOT (("posthog_person"."properties" -> '$key') = 'null') + AND "posthog_person"."team_id" = 2) + ' +--- +# name: TestDjangoPropertiesToQ.test_icontains_with_array_value.1 + ' + SELECT "posthog_person"."uuid" + FROM "posthog_person" + WHERE (UPPER(("posthog_person"."properties" ->> '$key')::text) LIKE UPPER('%red%') + AND "posthog_person"."properties" ? '$key' + AND NOT (("posthog_person"."properties" -> '$key') = 'null') + AND "posthog_person"."team_id" = 2) + ' +--- +# name: TestDjangoPropertiesToQ.test_icontains_with_array_value.2 + ' + SELECT "posthog_person"."uuid" + FROM "posthog_person" + WHERE (("posthog_person"."properties" -> '$key') > '["2"]' + AND "posthog_person"."properties" ? '$key' + AND NOT (("posthog_person"."properties" -> '$key') = 'null') + AND "posthog_person"."team_id" = 2) + ' +--- # name: TestDjangoPropertyGroupToQ.test_property_group_to_q_with_cohorts ' SELECT "posthog_cohort"."id", diff --git a/posthog/models/filters/test/test_filter.py b/posthog/models/filters/test/test_filter.py index 72ad460b91084..b2e209969b28c 100644 --- a/posthog/models/filters/test/test_filter.py +++ b/posthog/models/filters/test/test_filter.py @@ -833,6 +833,67 @@ def filter_persons_with_annotation(filter: Filter, team: Team): ) self.assertEqual(len(filter_persons_with_annotation(filter, self.team)), 2) + @snapshot_postgres_queries + def test_icontains_with_array_value(self): + _create_person(team_id=self.team.pk, distinct_ids=["p1"], properties={"$key": "red-123"}) + _create_person(team_id=self.team.pk, distinct_ids=["p2"], properties={"$key": "blue-123"}) + _create_person(team_id=self.team.pk, distinct_ids=["p3"], properties={"$key": 6}) + + flush_persons_and_events() + + def filter_persons_with_annotation(filter: Filter, team: Team): + persons = Person.objects.annotate( + **{ + "properties_$key_type": Func( + F("properties__$key"), function="JSONB_TYPEOF", output_field=CharField() + ) + } + ).filter(properties_to_Q(filter.property_groups.flat)) + persons = persons.filter(team_id=team.pk) + return [str(uuid) for uuid in persons.values_list("uuid", flat=True)] + + filter = Filter( + data={ + "properties": [ + { + "type": "person", + "key": "$key", + "value": ["red"], + "operator": "icontains", + } + ] + } + ) + self.assertEqual(len(filter_persons_with_annotation(filter, self.team)), 0) + + filter = Filter( + data={ + "properties": [ + { + "type": "person", + "key": "$key", + "value": "red", + "operator": "icontains", + } + ] + } + ) + self.assertEqual(len(filter_persons_with_annotation(filter, self.team)), 1) + + filter = Filter( + data={ + "properties": [ + { + "type": "person", + "key": "$key", + "value": ["2"], + "operator": "gt", + } + ] + } + ) + self.assertEqual(len(filter_persons_with_annotation(filter, self.team)), 0) + def filter_persons_with_property_group( filter: Filter, team: Team, property_overrides: Dict[str, Any] = {} diff --git a/posthog/queries/base.py b/posthog/queries/base.py index 00269a384b37f..e6b0a93f5811c 100644 --- a/posthog/queries/base.py +++ b/posthog/queries/base.py @@ -20,7 +20,6 @@ from posthog.models.filters.filter import Filter from posthog.models.filters.path_filter import PathFilter from posthog.models.property import ( - CLICKHOUSE_ONLY_PROPERTY_TYPES, Property, PropertyGroup, ) @@ -163,7 +162,7 @@ def compare(lhs, rhs, operator): except Exception: pass - if parsed_value is not None: + if parsed_value is not None and override_value is not None: if isinstance(override_value, str): return compare(override_value, str(value), operator) else: @@ -171,15 +170,6 @@ def compare(lhs, rhs, operator): else: return compare(str(override_value), str(value), operator) - if operator == "gte": - return type(override_value) == type(value) and override_value >= value - - if operator == "lt": - return type(override_value) == type(value) and override_value < value - - if operator == "lte": - return type(override_value) == type(value) and override_value <= value - if operator in ["is_date_before", "is_date_after"]: try: parsed_date = parser.parse(str(value)) @@ -233,14 +223,12 @@ def empty_or_null_with_value_q( f"{column}__{key}", value_as_coerced_to_number ) else: - if isinstance(value, list): - raise TypeError(f"empty_or_null_with_value_q: Operator {operator} does not support list values") - parsed_value = None if operator in ("gt", "gte", "lt", "lte"): try: - parsed_value = float(value) - except (ValueError, TypeError): + # try to parse even if arrays can't be parsed, the catch will handle it + parsed_value = float(value) # type: ignore + except Exception: pass if parsed_value is not None: @@ -273,7 +261,7 @@ def property_to_Q( cohorts_cache: Optional[Dict[int, Cohort]] = None, using_database: str = "default", ) -> Q: - if property.type in CLICKHOUSE_ONLY_PROPERTY_TYPES: + if property.type not in ["person", "group", "cohort"]: raise ValueError(f"property_to_Q: type is not supported: {repr(property.type)}") value = property._parse_value(property.value) diff --git a/posthog/queries/test/test_base.py b/posthog/queries/test/test_base.py index 91076d31aa4ff..de09d2f1ed3ce 100644 --- a/posthog/queries/test/test_base.py +++ b/posthog/queries/test/test_base.py @@ -244,3 +244,42 @@ def test_match_property_date_operators(self): self.assertTrue(match_property(property_d, {"key": "2022-04-05 12:34:11 CET"})) self.assertFalse(match_property(property_d, {"key": "2022-04-05 12:34:13 CET"})) + + def test_none_property_value_with_all_operators(self): + property_a = Property(key="key", value="none", operator="is_not") + self.assertFalse(match_property(property_a, {"key": None})) + self.assertTrue(match_property(property_a, {"key": "non"})) + + property_b = Property(key="key", value=None, operator="is_set") + self.assertTrue(match_property(property_b, {"key": None})) + + property_c = Property(key="key", value="no", operator="icontains") + self.assertTrue(match_property(property_c, {"key": None})) + self.assertFalse(match_property(property_c, {"key": "smh"})) + + property_d = Property(key="key", value="No", operator="regex") + self.assertTrue(match_property(property_d, {"key": None})) + + property_d_lower_case = Property(key="key", value="no", operator="regex") + self.assertFalse(match_property(property_d_lower_case, {"key": None})) + + property_e = Property(key="key", value=1, operator="gt") + self.assertTrue(match_property(property_e, {"key": None})) + + property_f = Property(key="key", value=1, operator="lt") + self.assertFalse(match_property(property_f, {"key": None})) + + property_g = Property(key="key", value="xyz", operator="gte") + self.assertFalse(match_property(property_g, {"key": None})) + + property_h = Property(key="key", value="Oo", operator="lte") + self.assertTrue(match_property(property_h, {"key": None})) + + property_i = Property(key="key", value="2022-05-01", operator="is_date_before") + self.assertFalse(match_property(property_i, {"key": None})) + + property_j = Property(key="key", value="2022-05-01", operator="is_date_after") + self.assertFalse(match_property(property_j, {"key": None})) + + property_k = Property(key="key", value="2022-05-01", operator="is_date_before") + self.assertFalse(match_property(property_k, {"key": "random"})) diff --git a/posthog/test/__snapshots__/test_feature_flag.ambr b/posthog/test/__snapshots__/test_feature_flag.ambr index a4200a05ffd09..d303d7bd80145 100644 --- a/posthog/test/__snapshots__/test_feature_flag.ambr +++ b/posthog/test/__snapshots__/test_feature_flag.ambr @@ -345,6 +345,62 @@ AND "posthog_group"."group_type_index" = 2) ' --- +# name: TestFeatureFlagMatcher.test_numeric_operator_with_cohorts_and_nested_cohorts + ' + SELECT "posthog_cohort"."id", + "posthog_cohort"."name", + "posthog_cohort"."description", + "posthog_cohort"."team_id", + "posthog_cohort"."deleted", + "posthog_cohort"."filters", + "posthog_cohort"."version", + "posthog_cohort"."pending_version", + "posthog_cohort"."count", + "posthog_cohort"."created_by_id", + "posthog_cohort"."created_at", + "posthog_cohort"."is_calculating", + "posthog_cohort"."last_calculation", + "posthog_cohort"."errors_calculating", + "posthog_cohort"."is_static", + "posthog_cohort"."groups" + FROM "posthog_cohort" + WHERE (NOT "posthog_cohort"."deleted" + AND "posthog_cohort"."team_id" = 2) + ' +--- +# name: TestFeatureFlagMatcher.test_numeric_operator_with_cohorts_and_nested_cohorts.1 + ' + SELECT (((("posthog_person"."properties" -> 'number') > '"100"' + AND JSONB_TYPEOF(("posthog_person"."properties" -> 'number')) = 'string') + OR (("posthog_person"."properties" -> 'number') > '100.0' + AND JSONB_TYPEOF(("posthog_person"."properties" -> 'number')) = 'number')) + AND "posthog_person"."properties" ? 'number' + AND NOT (("posthog_person"."properties" -> 'number') = 'null')) AS "flag_X_condition_0", + (((("posthog_person"."properties" -> 'version') > '"1.05"' + AND JSONB_TYPEOF(("posthog_person"."properties" -> 'version')) = 'string') + OR (("posthog_person"."properties" -> 'version') > '1.05' + AND JSONB_TYPEOF(("posthog_person"."properties" -> 'version')) = 'number')) + AND "posthog_person"."properties" ? 'version' + AND NOT (("posthog_person"."properties" -> 'version') = 'null')) AS "flag_X_condition_0", + (((("posthog_person"."properties" -> 'number') < '"31"' + AND JSONB_TYPEOF(("posthog_person"."properties" -> 'number')) = 'string') + OR (("posthog_person"."properties" -> 'number') < '31.0' + AND JSONB_TYPEOF(("posthog_person"."properties" -> 'number')) = 'number')) + AND "posthog_person"."properties" ? 'number' + AND NOT (("posthog_person"."properties" -> 'number') = 'null') + AND ((("posthog_person"."properties" -> 'nested_prop') > '"20"' + AND JSONB_TYPEOF(("posthog_person"."properties" -> 'nested_prop')) = 'string') + OR (("posthog_person"."properties" -> 'nested_prop') > '20.0' + AND JSONB_TYPEOF(("posthog_person"."properties" -> 'nested_prop')) = 'number')) + AND "posthog_person"."properties" ? 'nested_prop' + AND NOT (("posthog_person"."properties" -> 'nested_prop') = 'null')) AS "flag_X_condition_0" + FROM "posthog_person" + INNER JOIN "posthog_persondistinctid" ON ("posthog_person"."id" = "posthog_persondistinctid"."person_id") + WHERE ("posthog_persondistinctid"."distinct_id" = '307' + AND "posthog_persondistinctid"."team_id" = 2 + AND "posthog_person"."team_id" = 2) + ' +--- # name: TestFeatureFlagMatcher.test_numeric_operator_with_groups_and_person_flags ' SELECT "posthog_grouptypemapping"."id", diff --git a/posthog/test/test_feature_flag.py b/posthog/test/test_feature_flag.py index 83bfe2583a482..c77fabc41744d 100644 --- a/posthog/test/test_feature_flag.py +++ b/posthog/test/test_feature_flag.py @@ -4582,6 +4582,167 @@ def test_numeric_operator_with_groups_and_person_flags(self): FeatureFlagMatch(True, None, FeatureFlagMatchReason.CONDITION_MATCH, 0), ) + def test_numeric_operator_with_cohorts_and_nested_cohorts(self): + Person.objects.create( + team=self.team, + distinct_ids=["307"], + properties={"number": 30, "string_number": "30", "version": "1.24", "nested_prop": 21}, + ) + cohort1 = Cohort.objects.create( + team=self.team, + groups=[ + { + "properties": [ + { + "key": "number", + "value": "100", + "type": "person", + "operator": "gt", + } + ] + } + ], + ) + feature_flag1 = self.create_feature_flag( + key="random1", + filters={ + "groups": [ + { + "properties": [ + { + "key": "id", + "value": cohort1.pk, + "type": "cohort", + }, + ] + } + ] + }, + ) + + self.assertEqual( + self.match_flag(feature_flag1, "307"), + FeatureFlagMatch(False, None, FeatureFlagMatchReason.NO_CONDITION_MATCH, 0), + ) + + cohort2 = Cohort.objects.create( + team=self.team, + groups=[ + { + "properties": [ + { + "key": "version", + "value": "1.05", + "operator": "gt", + "type": "person", + }, + ] + } + ], + ) + feature_flag2 = self.create_feature_flag( + key="random2", + filters={ + "groups": [ + { + "properties": [ + { + "key": "id", + "value": cohort2.pk, + "type": "cohort", + }, + ] + } + ] + }, + ) + + self.assertEqual( + self.match_flag(feature_flag2, "307"), + FeatureFlagMatch(True, None, FeatureFlagMatchReason.CONDITION_MATCH, 0), + ) + + cohort_nest = Cohort.objects.create( + team=self.team, + groups=[ + { + "properties": [ + { + "key": "nested_prop", + "value": "20", + "operator": "gt", + "type": "person", + }, + ] + } + ], + ) + + cohort3 = Cohort.objects.create( + team=self.team, + groups=[ + { + "properties": [ + { + "key": "number", + "value": "31", + "operator": "lt", + "type": "person", + }, + { + "key": "id", + "value": str(cohort_nest.pk), + "type": "cohort", + }, + ] + } + ], + ) + feature_flag3 = self.create_feature_flag( + key="random3", + filters={ + "groups": [ + { + "properties": [ + { + "key": "id", + "value": str(cohort3.pk), + "type": "cohort", + }, + ] + } + ] + }, + ) + + self.assertEqual( + self.match_flag(feature_flag3, "307"), + FeatureFlagMatch(True, None, FeatureFlagMatchReason.CONDITION_MATCH, 0), + ) + + # Make sure clashes on property name doesn't affect computation + with snapshot_postgres_queries_context(self, replace_all_numbers=False): + self.assertEqual( + FeatureFlagMatcher( + [feature_flag1, feature_flag2, feature_flag3], + "307", + ).get_matches()[1], + { + "random1": { + "condition_index": 0, + "reason": FeatureFlagMatchReason.NO_CONDITION_MATCH, + }, + "random2": { + "condition_index": 0, + "reason": FeatureFlagMatchReason.CONDITION_MATCH, + }, + "random3": { + "condition_index": 0, + "reason": FeatureFlagMatchReason.CONDITION_MATCH, + }, + }, + ) + class TestFeatureFlagHashKeyOverrides(BaseTest, QueryMatchingTest): person: Person From 90dc0731bbff4b793238b9792dcadad1f2c3658e Mon Sep 17 00:00:00 2001 From: Neil Kakkar Date: Tue, 7 Nov 2023 10:50:25 +0000 Subject: [PATCH 3/3] fix --- posthog/queries/base.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/posthog/queries/base.py b/posthog/queries/base.py index e6b0a93f5811c..1279fe088dda7 100644 --- a/posthog/queries/base.py +++ b/posthog/queries/base.py @@ -261,7 +261,8 @@ def property_to_Q( cohorts_cache: Optional[Dict[int, Cohort]] = None, using_database: str = "default", ) -> Q: - if property.type not in ["person", "group", "cohort"]: + if property.type not in ["person", "group", "cohort", "event"]: + # We need to support event type for backwards compatibility, even though it's treated as a person property type raise ValueError(f"property_to_Q: type is not supported: {repr(property.type)}") value = property._parse_value(property.value)