Skip to content

Commit

Permalink
fix(filters): Don't match on an invalid filter condition; if such a c…
Browse files Browse the repository at this point in the history
…ondition exists, fail the match (#23297)

* saving work

* cleaned up tests

* this test condition no longer passes

* Update query snapshots

* saving work

* cleaned it up

* better comments

* Update query snapshots

* Update query snapshots

* delete unused snapshot

* Update query snapshots

* Update query snapshots

* relevant test condition

* I think I covered everything

* accidental commit

---------

Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
  • Loading branch information
dmarticus and github-actions[bot] authored Jul 1, 2024
1 parent 70549f8 commit 93f1261
Show file tree
Hide file tree
Showing 10 changed files with 482 additions and 354 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -227,97 +227,26 @@
# ---
# name: ClickhouseTestFunnelExperimentResults.test_experiment_flow_with_event_results_and_events_out_of_time_range_timezones.1
'''
/* user_id:0 request:_snapshot_ */
SELECT array(replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', '')) AS value,
count(*) as count
FROM events e
WHERE team_id = 2
AND event IN ['$pageleave', '$pageview']
AND toTimeZone(timestamp, 'Europe/Amsterdam') >= toDateTime('2020-01-01 14:20:21', 'Europe/Amsterdam')
AND toTimeZone(timestamp, 'Europe/Amsterdam') <= toDateTime('2020-01-06 10:00:00', 'Europe/Amsterdam')
GROUP BY value
ORDER BY count DESC, value DESC
LIMIT 26
OFFSET 0
/* celery:posthog.tasks.tasks.sync_insight_caching_state */
SELECT team_id,
date_diff('second', max(timestamp), now()) AS age
FROM events
WHERE timestamp > date_sub(DAY, 3, now())
AND timestamp < now()
GROUP BY team_id
ORDER BY age;
'''
# ---
# name: ClickhouseTestFunnelExperimentResults.test_experiment_flow_with_event_results_and_events_out_of_time_range_timezones.2
'''
/* user_id:0 request:_snapshot_ */
SELECT countIf(steps = 1) step_1,
countIf(steps = 2) step_2,
avg(step_1_average_conversion_time_inner) step_1_average_conversion_time,
median(step_1_median_conversion_time_inner) step_1_median_conversion_time,
prop
FROM
(SELECT aggregation_target,
steps,
avg(step_1_conversion_time) step_1_average_conversion_time_inner,
median(step_1_conversion_time) step_1_median_conversion_time_inner ,
prop
FROM
(SELECT aggregation_target,
steps,
max(steps) over (PARTITION BY aggregation_target,
prop) as max_steps,
step_1_conversion_time ,
prop
FROM
(SELECT *,
if(latest_0 <= latest_1
AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps ,
if(isNotNull(latest_1)
AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time,
prop
FROM
(SELECT aggregation_target, timestamp, step_0,
latest_0,
step_1,
min(latest_1) over (PARTITION by aggregation_target,
prop
ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 ,
if(has([['test'], ['control']], prop), prop, ['Other']) as prop
FROM
(SELECT *,
if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, ['']) as prop
FROM
(SELECT e.timestamp as timestamp,
pdi.person_id as aggregation_target,
pdi.person_id as person_id,
if(event = '$pageview', 1, 0) as step_0,
if(step_0 = 1, timestamp, null) as latest_0,
if(event = '$pageleave', 1, 0) as step_1,
if(step_1 = 1, timestamp, null) as latest_1,
array(replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', '')) AS prop_basic,
prop_basic as prop,
argMinIf(prop, timestamp, notEmpty(arrayFilter(x -> notEmpty(x), prop))) over (PARTITION by aggregation_target) as prop_vals
FROM events e
INNER JOIN
(SELECT distinct_id,
argMax(person_id, version) as person_id
FROM person_distinct_id2
WHERE team_id = 2
AND distinct_id IN
(SELECT distinct_id
FROM events
WHERE team_id = 2
AND event IN ['$pageleave', '$pageview']
AND toTimeZone(timestamp, 'Europe/Amsterdam') >= toDateTime('2020-01-01 14:20:21', 'Europe/Amsterdam')
AND toTimeZone(timestamp, 'Europe/Amsterdam') <= toDateTime('2020-01-06 10:00:00', 'Europe/Amsterdam') )
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id
WHERE team_id = 2
AND event IN ['$pageleave', '$pageview']
AND toTimeZone(timestamp, 'Europe/Amsterdam') >= toDateTime('2020-01-01 14:20:21', 'Europe/Amsterdam')
AND toTimeZone(timestamp, 'Europe/Amsterdam') <= toDateTime('2020-01-06 10:00:00', 'Europe/Amsterdam')
AND (step_0 = 1
OR step_1 = 1) )))
WHERE step_0 = 1 ))
GROUP BY aggregation_target,
steps,
prop
HAVING steps = max_steps)
GROUP BY prop
/* celery:posthog.tasks.tasks.sync_insight_caching_state */
SELECT team_id,
date_diff('second', max(timestamp), now()) AS age
FROM events
WHERE timestamp > date_sub(DAY, 3, now())
AND timestamp < now()
GROUP BY team_id
ORDER BY age;
'''
# ---
# name: ClickhouseTestFunnelExperimentResults.test_experiment_flow_with_event_results_and_events_out_of_time_range_timezones.3
Expand Down
15 changes: 11 additions & 4 deletions posthog/api/feature_flag.py
Original file line number Diff line number Diff line change
Expand Up @@ -246,10 +246,17 @@ def properties_all_match(predicate):
detail=f"Invalid date value: {prop.value}", code="invalid_date"
)

# make sure regex and icontains properties have string values
if prop.operator in ["regex", "icontains", "not_regex", "not_icontains"] and not isinstance(
prop.value, str
):
# make sure regex, icontains, gte, lte, lt, and gt properties have string values
if prop.operator in [
"regex",
"icontains",
"not_regex",
"not_icontains",
"gte",
"lte",
"gt",
"lt",
] and not isinstance(prop.value, str):
raise serializers.ValidationError(
detail=f"Invalid value for operator {prop.operator}: {prop.value}", code="invalid_value"
)
Expand Down
20 changes: 0 additions & 20 deletions posthog/api/test/__snapshots__/test_feature_flag.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -1801,26 +1801,6 @@
LIMIT 100 SETTINGS optimize_aggregation_in_order = 1
'''
# ---
# name: TestFeatureFlag.test_creating_static_cohort.16
'''
/* user_id:0 request:_snapshot_ */
SELECT id
FROM person
INNER JOIN
(SELECT person_id
FROM person_static_cohort
WHERE team_id = 2
AND cohort_id = 2
GROUP BY person_id,
cohort_id,
team_id) cohort_persons ON cohort_persons.person_id = person.id
WHERE team_id = 2
GROUP BY id
HAVING max(is_deleted) = 0
ORDER BY argMax(person.created_at, version) DESC, id DESC
LIMIT 100 SETTINGS optimize_aggregation_in_order = 1
'''
# ---
# name: TestFeatureFlag.test_creating_static_cohort.2
'''
SELECT "posthog_organizationmembership"."id",
Expand Down
143 changes: 143 additions & 0 deletions posthog/api/test/test_decide.py
Original file line number Diff line number Diff line change
Expand Up @@ -2200,6 +2200,149 @@ def test_flag_with_regular_cohorts(self, *args):
self.assertEqual(response.json()["featureFlags"], {"cohort-flag": False})
self.assertEqual(response.json()["errorsWhileComputingFlags"], False)

def test_flag_with_invalid_cohort_filter_condition(self, *args):
self.team.app_urls = ["https://example.com"]
self.team.save()
self.client.logout()

person1_distinct_id = "example_id"
Person.objects.create(
team=self.team,
distinct_ids=[person1_distinct_id],
properties={"registration_ts": 1716447600},
)

# Create a cohort with an invalid filter condition (tis broken filter came from this issue: https://github.com/PostHog/posthog/issues/23213)
# The invalid condition is that the registration_ts property is compared against a list of values
# Since this filter must match everything, the flag should evaluate to False
cohort = Cohort.objects.create(
team=self.team,
filters={
"properties": {
"type": "OR",
"values": [
{
"type": "AND",
"values": [
# This is the valid condition
{
"key": "registration_ts",
"type": "person",
"value": "1716274800",
"operator": "gte",
},
# This is the invalid condition (lte operator comparing against a list of values)
{
"key": "registration_ts",
"type": "person",
"value": ["1716447600"],
"operator": "lte",
},
],
}
],
}
},
name="Test cohort",
)

# Create a feature flag that uses the cohort
FeatureFlag.objects.create(
team=self.team,
filters={
"groups": [
{
"properties": [
{
"key": "id",
"type": "cohort",
"value": cohort.pk,
}
],
}
]
},
name="This is a cohort-based flag",
key="cohort-flag",
created_by=self.user,
)

with self.assertNumQueries(5):
response = self._post_decide(api_version=3, distinct_id=person1_distinct_id)
self.assertEqual(response.json()["featureFlags"], {"cohort-flag": False})
self.assertEqual(response.json()["errorsWhileComputingFlags"], False)

def test_flag_with_invalid_but_safe_cohort_filter_condition(self, *args):
self.team.app_urls = ["https://example.com"]
self.team.save()
self.client.logout()

person1_distinct_id = "example_id"
Person.objects.create(
team=self.team,
distinct_ids=[person1_distinct_id],
properties={"registration_ts": 1716447600},
)

# Create a cohort with a safe OR filter that contains an invalid condition
# it should still evaluate the FeatureFlag to True
cohort = Cohort.objects.create(
team=self.team,
filters={
"properties": {
"type": "OR",
"values": [
{
"type": "OR",
"values": [
# This is the valid condition
{
"key": "registration_ts",
"type": "person",
"value": "1716274800",
"operator": "gte",
},
# This is the invalid condition (lte operator comparing against a list of values)
{
"key": "registration_ts",
"type": "person",
"value": ["1716447600"],
"operator": "lte",
},
],
}
],
}
},
name="Test cohort",
)

# Create a feature flag that uses the cohort
FeatureFlag.objects.create(
team=self.team,
filters={
"groups": [
{
"properties": [
{
"key": "id",
"type": "cohort",
"value": cohort.pk,
}
],
}
]
},
name="This is a cohort-based flag",
key="cohort-flag",
created_by=self.user,
)

with self.assertNumQueries(5):
response = self._post_decide(api_version=3, distinct_id=person1_distinct_id)
self.assertEqual(response.json()["featureFlags"], {"cohort-flag": True})
self.assertEqual(response.json()["errorsWhileComputingFlags"], False)

def test_flag_with_unknown_cohort(self, *args):
self.team.app_urls = ["https://example.com"]
self.team.save()
Expand Down
Loading

0 comments on commit 93f1261

Please sign in to comment.