Skip to content

Commit

Permalink
fix(flags): Do the expected for numeric comparisons - FINAL FINAL edi…
Browse files Browse the repository at this point in the history
…tion (#18443)
  • Loading branch information
neilkakkar authored Nov 8, 2023
1 parent f6ce07f commit aa6bac9
Show file tree
Hide file tree
Showing 9 changed files with 1,324 additions and 65 deletions.
36 changes: 36 additions & 0 deletions ee/clickhouse/models/test/test_filters.py
Original file line number Diff line number Diff line change
Expand Up @@ -610,6 +610,42 @@ def test_numerical(self):
events = _filter_events(filter, self.team)
self.assertEqual(events[0]["id"], event1_uuid)

def test_numerical_person_properties(self):
_create_person(team_id=self.team.pk, distinct_ids=["p1"], properties={"$a_number": 4})
_create_person(team_id=self.team.pk, distinct_ids=["p2"], properties={"$a_number": 5})
_create_person(team_id=self.team.pk, distinct_ids=["p3"], properties={"$a_number": 6})

filter = Filter(
data={
"properties": [
{
"type": "person",
"key": "$a_number",
"value": 4,
"operator": "gt",
}
]
}
)
self.assertEqual(len(_filter_persons(filter, self.team)), 2)

filter = Filter(data={"properties": [{"type": "person", "key": "$a_number", "value": 5}]})
self.assertEqual(len(_filter_persons(filter, self.team)), 1)

filter = Filter(
data={
"properties": [
{
"type": "person",
"key": "$a_number",
"value": 6,
"operator": "lt",
}
]
}
)
self.assertEqual(len(_filter_persons(filter, self.team)), 2)

def test_contains(self):
_create_event(team=self.team, distinct_id="test", event="$pageview")
event2_uuid = _create_event(
Expand Down
68 changes: 61 additions & 7 deletions posthog/models/feature_flag/flag_matching.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,14 +3,14 @@
from enum import Enum
import time
import structlog
from typing import Dict, List, Optional, Tuple, Union
from typing import Dict, List, Optional, Tuple, Union, cast

from prometheus_client import Counter
from django.conf import settings
from django.db import DatabaseError, IntegrityError, OperationalError
from django.db.models.expressions import ExpressionWrapper, RawSQL
from django.db.models.fields import BooleanField
from django.db.models import Q
from django.db.models import Q, Func, F, CharField
from django.db.models.query import QuerySet
from sentry_sdk.api import capture_exception, start_span
from posthog.metrics import LABEL_TEAM_ID
Expand All @@ -24,7 +24,7 @@
from posthog.models.property.property import Property
from posthog.models.cohort import Cohort
from posthog.models.utils import execute_with_timeout
from posthog.queries.base import match_property, properties_to_Q
from posthog.queries.base import match_property, properties_to_Q, sanitize_property_key
from posthog.database_healthcheck import (
postgres_healthcheck,
DATABASE_FOR_FLAG_MATCHING,
Expand Down Expand Up @@ -396,6 +396,11 @@ def condition_eval(key, condition):
annotate_query = True
nonlocal person_query

property_list = Filter(data=condition).property_groups.flat
properties_with_math_operators = get_all_properties_with_math_operators(
property_list, self.cohorts_cache
)

if len(condition.get("properties", {})) > 0:
# Feature Flags don't support OR filtering yet
target_properties = self.property_value_overrides
Expand All @@ -404,8 +409,9 @@ def condition_eval(key, condition):
self.cache.group_type_index_to_name[feature_flag.aggregation_group_type_index],
{},
)

expr = properties_to_Q(
Filter(data=condition).property_groups.flat,
property_list,
override_property_values=target_properties,
cohorts_cache=self.cohorts_cache,
using_database=DATABASE_FOR_FLAG_MATCHING,
Expand All @@ -428,13 +434,24 @@ def condition_eval(key, condition):

if annotate_query:
if feature_flag.aggregation_group_type_index is None:
# :TRICKY: Flag matching depends on type of property when doing >, <, >=, <= comparisons.
# This requires a generated field to query in Q objects, which sadly don't allow inlining fields,
# hence we need to annotate the query here, even though these annotations are used much deeper,
# in properties_to_q, in empty_or_null_with_value_q
# These need to come in before the expr so they're available to use inside the expr.
# Same holds for the group queries below.
type_property_annotations = {
prop_key: Func(F(prop_field), function="JSONB_TYPEOF", output_field=CharField())
for prop_key, prop_field in properties_with_math_operators
}
person_query = person_query.annotate(
**type_property_annotations,
**{
key: ExpressionWrapper(
expr if expr else RawSQL("true", []),
output_field=BooleanField(),
)
}
),
},
)
person_fields.append(key)
else:
Expand All @@ -445,13 +462,18 @@ def condition_eval(key, condition):
group_query,
group_fields,
) = group_query_per_group_type_mapping[feature_flag.aggregation_group_type_index]
type_property_annotations = {
prop_key: Func(F(prop_field), function="JSONB_TYPEOF", output_field=CharField())
for prop_key, prop_field in properties_with_math_operators
}
group_query = group_query.annotate(
**type_property_annotations,
**{
key: ExpressionWrapper(
expr if expr else RawSQL("true", []),
output_field=BooleanField(),
)
}
},
)
group_fields.append(key)
group_query_per_group_type_mapping[feature_flag.aggregation_group_type_index] = (
Expand Down Expand Up @@ -881,3 +903,35 @@ def parse_exception_for_error_message(err: Exception):
reason = "query_wait_timeout"

return reason


def key_and_field_for_property(property: Property) -> Tuple[str, str]:
column = "group_properties" if property.type == "group" else "properties"
key = property.key
sanitized_key = sanitize_property_key(key)

return (
f"{column}_{sanitized_key}_type",
f"{column}__{key}",
)


def get_all_properties_with_math_operators(
properties: List[Property], cohorts_cache: Dict[int, Cohort]
) -> List[Tuple[str, str]]:
all_keys_and_fields = []

for prop in properties:
if prop.type == "cohort":
cohort_id = int(cast(Union[str, int], prop.value))
if cohorts_cache.get(cohort_id) is None:
cohorts_cache[cohort_id] = Cohort.objects.using(DATABASE_FOR_FLAG_MATCHING).get(pk=cohort_id)
cohort = cohorts_cache[cohort_id]
if cohort:
all_keys_and_fields.extend(
get_all_properties_with_math_operators(cohort.properties.flat, cohorts_cache)
)
elif prop.operator in ["gt", "lt", "gte", "lte"] and prop.type in ("person", "group"):
all_keys_and_fields.append(key_and_field_for_property(prop))

return all_keys_and_fields
30 changes: 30 additions & 0 deletions posthog/models/filters/test/__snapshots__/test_filter.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,36 @@
ORDER BY "posthog_persondistinctid"."id" ASC
'
---
# name: TestDjangoPropertiesToQ.test_icontains_with_array_value
'
SELECT "posthog_person"."uuid"
FROM "posthog_person"
WHERE (UPPER(("posthog_person"."properties" ->> '$key')::text) LIKE UPPER('%[''red'']%')
AND "posthog_person"."properties" ? '$key'
AND NOT (("posthog_person"."properties" -> '$key') = 'null')
AND "posthog_person"."team_id" = 2)
'
---
# name: TestDjangoPropertiesToQ.test_icontains_with_array_value.1
'
SELECT "posthog_person"."uuid"
FROM "posthog_person"
WHERE (UPPER(("posthog_person"."properties" ->> '$key')::text) LIKE UPPER('%red%')
AND "posthog_person"."properties" ? '$key'
AND NOT (("posthog_person"."properties" -> '$key') = 'null')
AND "posthog_person"."team_id" = 2)
'
---
# name: TestDjangoPropertiesToQ.test_icontains_with_array_value.2
'
SELECT "posthog_person"."uuid"
FROM "posthog_person"
WHERE (("posthog_person"."properties" -> '$key') > '["2"]'
AND "posthog_person"."properties" ? '$key'
AND NOT (("posthog_person"."properties" -> '$key') = 'null')
AND "posthog_person"."team_id" = 2)
'
---
# name: TestDjangoPropertyGroupToQ.test_property_group_to_q_with_cohorts
'
SELECT "posthog_cohort"."id",
Expand Down
149 changes: 112 additions & 37 deletions posthog/models/filters/test/test_filter.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import json
from typing import Any, Callable, Dict, List, Optional, cast

from django.db.models import Q
from django.db.models import Q, Func, F, CharField

from posthog.constants import FILTER_TEST_ACCOUNTS
from posthog.models import Cohort, Filter, Person, Team
Expand Down Expand Up @@ -219,42 +219,6 @@ def test_incomplete_data(self):
)
self.assertListEqual(filter.property_groups.values, [])

def test_numerical_person_properties(self):
person_factory(team_id=self.team.pk, distinct_ids=["p1"], properties={"$a_number": 4})
person_factory(team_id=self.team.pk, distinct_ids=["p2"], properties={"$a_number": 5})
person_factory(team_id=self.team.pk, distinct_ids=["p3"], properties={"$a_number": 6})

filter = Filter(
data={
"properties": [
{
"type": "person",
"key": "$a_number",
"value": 4,
"operator": "gt",
}
]
}
)
self.assertEqual(len(filter_persons(filter, self.team)), 2)

filter = Filter(data={"properties": [{"type": "person", "key": "$a_number", "value": 5}]})
self.assertEqual(len(filter_persons(filter, self.team)), 1)

filter = Filter(
data={
"properties": [
{
"type": "person",
"key": "$a_number",
"value": 6,
"operator": "lt",
}
]
}
)
self.assertEqual(len(filter_persons(filter, self.team)), 2)

def test_contains_persons(self):
person_factory(
team_id=self.team.pk,
Expand Down Expand Up @@ -819,6 +783,117 @@ def _filter_with_date_range(

return Filter(data=data)

def test_numerical_person_properties(self):
_create_person(team_id=self.team.pk, distinct_ids=["p1"], properties={"$a_number": 4})
_create_person(team_id=self.team.pk, distinct_ids=["p2"], properties={"$a_number": 5})
_create_person(team_id=self.team.pk, distinct_ids=["p3"], properties={"$a_number": 6})
_create_person(team_id=self.team.pk, distinct_ids=["p4"], properties={"$a_number": 14})

flush_persons_and_events()

def filter_persons_with_annotation(filter: Filter, team: Team):
persons = Person.objects.annotate(
**{
"properties_anumber_27b11200b8ed4fb_type": Func(
F("properties__$a_number"), function="JSONB_TYPEOF", output_field=CharField()
)
}
).filter(properties_to_Q(filter.property_groups.flat))
persons = persons.filter(team_id=team.pk)
return [str(uuid) for uuid in persons.values_list("uuid", flat=True)]

filter = Filter(
data={
"properties": [
{
"type": "person",
"key": "$a_number",
"value": "4",
"operator": "gt",
}
]
}
)
self.assertEqual(len(filter_persons_with_annotation(filter, self.team)), 3)

filter = Filter(data={"properties": [{"type": "person", "key": "$a_number", "value": 5}]})
self.assertEqual(len(filter_persons_with_annotation(filter, self.team)), 1)

filter = Filter(
data={
"properties": [
{
"type": "person",
"key": "$a_number",
"value": 6,
"operator": "lt",
}
]
}
)
self.assertEqual(len(filter_persons_with_annotation(filter, self.team)), 2)

@snapshot_postgres_queries
def test_icontains_with_array_value(self):
_create_person(team_id=self.team.pk, distinct_ids=["p1"], properties={"$key": "red-123"})
_create_person(team_id=self.team.pk, distinct_ids=["p2"], properties={"$key": "blue-123"})
_create_person(team_id=self.team.pk, distinct_ids=["p3"], properties={"$key": 6})

flush_persons_and_events()

def filter_persons_with_annotation(filter: Filter, team: Team):
persons = Person.objects.annotate(
**{
"properties_$key_type": Func(
F("properties__$key"), function="JSONB_TYPEOF", output_field=CharField()
)
}
).filter(properties_to_Q(filter.property_groups.flat))
persons = persons.filter(team_id=team.pk)
return [str(uuid) for uuid in persons.values_list("uuid", flat=True)]

filter = Filter(
data={
"properties": [
{
"type": "person",
"key": "$key",
"value": ["red"],
"operator": "icontains",
}
]
}
)
self.assertEqual(len(filter_persons_with_annotation(filter, self.team)), 0)

filter = Filter(
data={
"properties": [
{
"type": "person",
"key": "$key",
"value": "red",
"operator": "icontains",
}
]
}
)
self.assertEqual(len(filter_persons_with_annotation(filter, self.team)), 1)

filter = Filter(
data={
"properties": [
{
"type": "person",
"key": "$key",
"value": ["2"],
"operator": "gt",
}
]
}
)
self.assertEqual(len(filter_persons_with_annotation(filter, self.team)), 0)


def filter_persons_with_property_group(
filter: Filter, team: Team, property_overrides: Dict[str, Any] = {}
Expand Down
Loading

0 comments on commit aa6bac9

Please sign in to comment.