Skip to content

Commit

Permalink
feat(cohorts): Allow adding property filters to events in cohorts (#2…
Browse files Browse the repository at this point in the history
  • Loading branch information
neilkakkar authored Apr 15, 2024
1 parent 7d483a3 commit de511ee
Show file tree
Hide file tree
Showing 29 changed files with 768 additions and 118 deletions.
1 change: 1 addition & 0 deletions .eslintrc.js
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,7 @@ module.exports = {
'error',
{
ignoreRestSiblings: true,
destructuredArrayIgnorePattern: '^_$',
},
],
'@typescript-eslint/prefer-ts-expect-error': 'error',
Expand Down
9 changes: 6 additions & 3 deletions ee/clickhouse/models/test/__snapshots__/test_cohort.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -84,7 +84,8 @@
(SELECT pdi.person_id AS person_id,
countIf(timestamp > now() - INTERVAL 2 year
AND timestamp < now()
AND event = '$pageview') > 0 AS performed_event_condition_15_level_level_0_level_0_level_0_0
AND event = '$pageview'
AND 1=1) > 0 AS performed_event_condition_15_level_level_0_level_0_level_0_0
FROM events e
INNER JOIN
(SELECT distinct_id,
Expand Down Expand Up @@ -149,7 +150,8 @@
(SELECT pdi.person_id AS person_id,
countIf(timestamp > now() - INTERVAL 2 year
AND timestamp < now()
AND event = '$pageview') > 0 AS performed_event_condition_17_level_level_0_level_0_level_0_0
AND event = '$pageview'
AND 1=1) > 0 AS performed_event_condition_17_level_level_0_level_0_level_0_0
FROM events e
INNER JOIN
(SELECT distinct_id,
Expand Down Expand Up @@ -238,7 +240,8 @@
(SELECT pdi.person_id AS person_id,
countIf(timestamp > now() - INTERVAL 2 year
AND timestamp < now()
AND event = '$pageview') > 0 AS performed_event_condition_19_level_level_0_level_0_level_0_0,
AND event = '$pageview'
AND 1=1) > 0 AS performed_event_condition_19_level_level_0_level_0_level_0_0,
minIf(timestamp, event = 'signup') >= now() - INTERVAL 15 day
AND minIf(timestamp, event = 'signup') < now() as first_time_condition_19_level_level_0_level_1_level_0_level_0_level_0_0
FROM events e
Expand Down
94 changes: 80 additions & 14 deletions ee/clickhouse/queries/test/__snapshots__/test_cohort_query.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -7,10 +7,12 @@
(SELECT pdi.person_id AS person_id,
countIf(timestamp > now() - INTERVAL 1 day
AND timestamp < now()
AND event = '$pageview') > 0 AS performed_event_condition_None_level_level_0_level_0_level_0_0,
AND event = '$pageview'
AND 1=1) > 0 AS performed_event_condition_None_level_level_0_level_0_level_0_0,
countIf(timestamp > now() - INTERVAL 2 week
AND timestamp < now()
AND event = '$pageview') > 0 AS performed_event_condition_None_level_level_0_level_0_level_1_0,
AND event = '$pageview'
AND 1=1) > 0 AS performed_event_condition_None_level_level_0_level_0_level_1_0,
minIf(timestamp, ((replaceRegexpAll(JSONExtractRaw(properties, '$current_url'), '^"|"$', '') = 'https://posthog.com/feedback/123'
AND event = '$autocapture'))) >= now() - INTERVAL 2 week
AND minIf(timestamp, ((replaceRegexpAll(JSONExtractRaw(properties, '$current_url'), '^"|"$', '') = 'https://posthog.com/feedback/123'
Expand Down Expand Up @@ -126,7 +128,8 @@
(SELECT pdi.person_id AS person_id,
countIf(timestamp > now() - INTERVAL 1 week
AND timestamp < now()
AND event = '$pageview') > 0 AS performed_event_condition_None_level_level_0_level_0_0
AND event = '$pageview'
AND 1=1) > 0 AS performed_event_condition_None_level_level_0_level_0_0
FROM events e
INNER JOIN
(SELECT distinct_id,
Expand Down Expand Up @@ -167,7 +170,8 @@
(SELECT pdi.person_id AS person_id,
countIf(timestamp > now() - INTERVAL 1 week
AND timestamp < now()
AND event = '$pageview') > 0 AS performed_event_condition_None_level_level_0_level_1_level_0_0
AND event = '$pageview'
AND 1=1) > 0 AS performed_event_condition_None_level_level_0_level_1_level_0_0
FROM events e
INNER JOIN
(SELECT distinct_id,
Expand Down Expand Up @@ -250,7 +254,8 @@
AND event_0_latest_1 <= event_0_latest_0 + INTERVAL 3 day, 2, 1)) = 2 AS steps_0,
countIf(timestamp > now() - INTERVAL 1 week
AND timestamp < now()
AND event = '$new_view') >= 1 AS performed_event_multiple_condition_None_level_level_0_level_1_0
AND event = '$new_view'
AND 1=1) >= 1 AS performed_event_multiple_condition_None_level_level_0_level_1_0
FROM
(SELECT person_id,
event,
Expand Down Expand Up @@ -297,7 +302,8 @@
AND event_0_latest_1 <= event_0_latest_0 + INTERVAL 3 day, 2, 1)) = 2 AS steps_0,
countIf(timestamp > now() - INTERVAL 1 week
AND timestamp < now()
AND event = '$pageview') >= 1 AS performed_event_multiple_condition_None_level_level_0_level_1_0
AND event = '$pageview'
AND 1=1) >= 1 AS performed_event_multiple_condition_None_level_level_0_level_1_0
FROM
(SELECT person_id,
event,
Expand Down Expand Up @@ -348,6 +354,58 @@
AND (performed_event_multiple_condition_None_level_level_0_level_1_0)))
'''
# ---
# name: TestCohortQuery.test_performed_event_with_event_filters
'''

SELECT behavior_query.person_id AS id
FROM
(SELECT pdi.person_id AS person_id,
countIf(timestamp > now() - INTERVAL 1 week
AND timestamp < now()
AND event = '$pageview'
AND (has(['something'], replaceRegexpAll(JSONExtractRaw(properties, '$filter_prop'), '^"|"$', '')))) > 0 AS performed_event_condition_None_level_level_0_level_0_0
FROM events e
INNER JOIN
(SELECT distinct_id,
argMax(person_id, version) as person_id
FROM person_distinct_id2
WHERE team_id = 2
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id
WHERE team_id = 2
AND event IN ['$pageview']
AND timestamp <= now()
AND timestamp >= now() - INTERVAL 1 week
GROUP BY person_id) behavior_query
WHERE 1 = 1
AND (((performed_event_condition_None_level_level_0_level_0_0)))
'''
# ---
# name: TestCohortQuery.test_performed_event_with_event_filters_and_explicit_date
'''

SELECT behavior_query.person_id AS id
FROM
(SELECT pdi.person_id AS person_id,
countIf(timestamp > '2024-04-02 13:01:01'
AND timestamp < now()
AND event = '$pageview'
AND (has(['something'], replaceRegexpAll(JSONExtractRaw(properties, '$filter_prop'), '^"|"$', '')))) > 0 AS performed_event_condition_None_level_level_0_level_0_0
FROM events e
INNER JOIN
(SELECT distinct_id,
argMax(person_id, version) as person_id
FROM person_distinct_id2
WHERE team_id = 2
GROUP BY distinct_id
HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id
WHERE team_id = 2
AND event IN ['$pageview']
GROUP BY person_id) behavior_query
WHERE 1 = 1
AND (((performed_event_condition_None_level_level_0_level_0_0)))
'''
# ---
# name: TestCohortQuery.test_person
'''

Expand All @@ -356,7 +414,8 @@
(SELECT pdi.person_id AS person_id,
countIf(timestamp > now() - INTERVAL 1 week
AND timestamp < now()
AND event = '$pageview') > 0 AS performed_event_condition_None_level_level_0_level_0_0
AND event = '$pageview'
AND 1=1) > 0 AS performed_event_condition_None_level_level_0_level_0_0
FROM events e
INNER JOIN
(SELECT distinct_id,
Expand Down Expand Up @@ -393,7 +452,8 @@
(SELECT pdi.person_id AS person_id,
countIf(timestamp > now() - INTERVAL 1 week
AND timestamp < now()
AND event = '$pageview') > 0 AS performed_event_condition_None_level_level_0_level_0_0
AND event = '$pageview'
AND 1=1) > 0 AS performed_event_condition_None_level_level_0_level_0_0
FROM events e
INNER JOIN
(SELECT distinct_id,
Expand Down Expand Up @@ -430,10 +490,12 @@
(SELECT pdi.person_id AS person_id,
countIf(timestamp > now() - INTERVAL 1 day
AND timestamp < now()
AND event = '$pageview') > 0 AS performed_event_condition_None_level_level_0_level_0_level_0_0,
AND event = '$pageview'
AND 1=1) > 0 AS performed_event_condition_None_level_level_0_level_0_level_0_0,
countIf(timestamp > now() - INTERVAL 2 week
AND timestamp < now()
AND event = '$pageview') > 0 AS performed_event_condition_None_level_level_0_level_0_level_1_0,
AND event = '$pageview'
AND 1=1) > 0 AS performed_event_condition_None_level_level_0_level_0_level_1_0,
minIf(timestamp, ((replaceRegexpAll(JSONExtractRaw(properties, '$current_url'), '^"|"$', '') = 'https://posthog.com/feedback/123'
AND event = '$autocapture'))) >= now() - INTERVAL 2 week
AND minIf(timestamp, ((replaceRegexpAll(JSONExtractRaw(properties, '$current_url'), '^"|"$', '') = 'https://posthog.com/feedback/123'
Expand Down Expand Up @@ -579,7 +641,8 @@
(SELECT pdi.person_id AS person_id,
countIf(timestamp > now() - INTERVAL 1 week
AND timestamp < now()
AND event = '$pageview') > 0 AS performed_event_condition_None_level_level_0_level_1_0
AND event = '$pageview'
AND 1=1) > 0 AS performed_event_condition_None_level_level_0_level_1_0
FROM events e
INNER JOIN
(SELECT distinct_id,
Expand Down Expand Up @@ -619,7 +682,8 @@
(SELECT pdi.person_id AS person_id,
countIf(timestamp > now() - INTERVAL 1 week
AND timestamp < now()
AND event = '$pageview') > 0 AS performed_event_condition_None_level_level_0_level_1_level_0_0
AND event = '$pageview'
AND 1=1) > 0 AS performed_event_condition_None_level_level_0_level_1_level_0_0
FROM events e
INNER JOIN
(SELECT distinct_id,
Expand Down Expand Up @@ -668,10 +732,12 @@
(SELECT pdi.person_id AS person_id,
countIf(timestamp > now() - INTERVAL 7 day
AND timestamp < now()
AND event = '$new_view') > 0 AS performed_event_condition_None_level_level_0_level_0_level_0_0,
AND event = '$new_view'
AND 1=1) > 0 AS performed_event_condition_None_level_level_0_level_0_level_0_0,
countIf(timestamp > now() - INTERVAL 1 week
AND timestamp < now()
AND event = '$pageview') > 0 AS performed_event_condition_None_level_level_0_level_1_0
AND event = '$pageview'
AND 1=1) > 0 AS performed_event_condition_None_level_level_0_level_1_0
FROM events e
INNER JOIN
(SELECT distinct_id,
Expand Down
140 changes: 140 additions & 0 deletions ee/clickhouse/queries/test/test_cohort_query.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
from datetime import datetime, timedelta

from freezegun import freeze_time

from ee.clickhouse.queries.enterprise_cohort_query import check_negation_clause
from posthog.client import sync_execute
from posthog.constants import PropertyOperatorType
Expand Down Expand Up @@ -216,10 +218,76 @@ def test_performed_event(self):
{
"key": "$pageview",
"event_type": "events",
"explicit_datetime": "-1w",
"value": "performed_event",
"type": "behavioral",
}
],
}
}
)

q, params = CohortQuery(filter=filter, team=self.team).get_query()
res = sync_execute(q, {**params, **filter.hogql_context.values})

self.assertEqual([p1.uuid], [r[0] for r in res])

@snapshot_clickhouse_queries
@freeze_time("2024-04-05 13:01:01")
def test_performed_event_with_event_filters_and_explicit_date(self):
p1 = _create_person(
team_id=self.team.pk,
distinct_ids=["p1"],
properties={"name": "test", "email": "[email protected]"},
)
_create_event(
team=self.team,
event="$pageview",
properties={"$filter_prop": "something"},
distinct_id="p1",
timestamp=datetime.now() - timedelta(days=2),
)

_create_person(
team_id=self.team.pk,
distinct_ids=["p2"],
properties={"name": "test", "email": "[email protected]"},
)
_create_event(
team=self.team,
event="$pageview",
properties={},
distinct_id="p2",
timestamp=datetime.now() - timedelta(days=2),
)
_create_event(
team=self.team,
event="$pageview",
properties={"$filter_prop": "something"},
distinct_id="p2",
# rejected because explicit datetime is set to 3 days ago
timestamp=datetime.now() - timedelta(days=5),
)
flush_persons_and_events()

filter = Filter(
data={
"properties": {
"type": "AND",
"values": [
{
"key": "$pageview",
"event_type": "events",
"explicit_datetime": str(
datetime.now() - timedelta(days=3)
), # overrides time_value and time_interval
"time_value": 1,
"time_interval": "week",
"value": "performed_event",
"type": "behavioral",
"event_filters": [
{"key": "$filter_prop", "value": "something", "operator": "exact", "type": "event"}
],
}
],
}
Expand Down Expand Up @@ -292,6 +360,78 @@ def test_performed_event_multiple(self):

self.assertEqual([p1.uuid], [r[0] for r in res])

def test_performed_event_multiple_with_event_filters(self):
p1 = _create_person(
team_id=self.team.pk,
distinct_ids=["p1"],
properties={"name": "test", "email": "[email protected]"},
)
_create_event(
team=self.team,
event="$pageview",
properties={"$filter_prop": "something"},
distinct_id="p1",
timestamp=datetime.now() - timedelta(days=2),
)

_create_event(
team=self.team,
event="$pageview",
properties={"$filter_prop": "something"},
distinct_id="p1",
timestamp=datetime.now() - timedelta(days=4),
)

_create_person(
team_id=self.team.pk,
distinct_ids=["p2"],
properties={"name": "test", "email": "[email protected]"},
)
_create_event(
team=self.team,
event="$pageview",
properties={},
distinct_id="p2",
timestamp=datetime.now() - timedelta(days=2),
)
_create_event(
team=self.team,
event="$pageview",
properties={},
distinct_id="p2",
timestamp=datetime.now() - timedelta(days=4),
)
flush_persons_and_events()

filter = Filter(
data={
"properties": {
"type": "AND",
"values": [
{
"key": "$pageview",
"event_type": "events",
"operator": "gte",
"operator_value": 1,
"time_value": 1,
"time_interval": "week",
"value": "performed_event_multiple",
"type": "behavioral",
"event_filters": [
{"key": "$filter_prop", "value": "something", "operator": "exact", "type": "event"},
{"key": "$filter_prop", "value": "some", "operator": "icontains", "type": "event"},
],
}
],
}
}
)

q, params = CohortQuery(filter=filter, team=self.team).get_query()
res = sync_execute(q, {**params, **filter.hogql_context.values})

self.assertEqual([p1.uuid], [r[0] for r in res])

def test_performed_event_lte_1_times(self):
_create_person(
team_id=self.team.pk,
Expand Down
2 changes: 1 addition & 1 deletion ee/clickhouse/views/experiments.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@
from posthog.models.filters.filter import Filter
from posthog.utils import generate_cache_key, get_safe_cache

EXPERIMENT_RESULTS_CACHE_DEFAULT_TTL = 60 * 30 # 30 minutes
EXPERIMENT_RESULTS_CACHE_DEFAULT_TTL = 60 * 60 # 1 hour


def _calculate_experiment_results(experiment: Experiment, refresh: bool = False):
Expand Down
Loading

0 comments on commit de511ee

Please sign in to comment.