Skip to content

Commit

Permalink
fix(persons): no duplicate distinct_ids (#20152)
Browse files Browse the repository at this point in the history
  • Loading branch information
mariusandra authored Feb 6, 2024
1 parent 8a4c7d8 commit 337b7e7
Show file tree
Hide file tree
Showing 4 changed files with 95 additions and 5 deletions.
8 changes: 4 additions & 4 deletions posthog/hogql_queries/actor_strategies.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

from posthog.hogql import ast
from posthog.hogql.property import property_to_expr
from posthog.hogql.parser import parse_expr
from posthog.hogql_queries.insights.paginators import HogQLHasMorePaginator
from posthog.hogql_queries.utils.recordings import RecordingsHelper
from posthog.models import Team, Person, Group
Expand Down Expand Up @@ -88,10 +89,9 @@ def filter_conditions(self) -> List[ast.Expr]:
left=ast.Call(name="toString", args=[ast.Field(chain=["id"])]),
right=ast.Constant(value=f"%{self.query.search}%"),
),
ast.CompareOperation(
op=ast.CompareOperationOp.ILike,
left=ast.Field(chain=["pdi", "distinct_id"]),
right=ast.Constant(value=f"%{self.query.search}%"),
parse_expr(
"id in (select person_id from person_distinct_ids where ilike(distinct_id, {search}))",
{"search": ast.Constant(value=f"%{self.query.search}%")},
),
]
)
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -74,13 +74,14 @@ def run_query(self, query):
runner = RetentionQueryRunner(team=self.team, query=query)
return runner.calculate().model_dump()["results"]

def run_actors_query(self, interval, query, select=None):
def run_actors_query(self, interval, query, select=None, search=None):
query["kind"] = "RetentionQuery"
if not query.get("retentionFilter"):
query["retentionFilter"] = {}
runner = ActorsQueryRunner(
team=self.team,
query={
"search": search,
"select": ["person", "appearances", *(select or [])],
"orderBy": ["length(appearances) DESC", "actor_id"],
"source": {
Expand Down Expand Up @@ -874,6 +875,44 @@ def test_retention_people_in_period(self):
self.assertEqual(result[1][0]["id"], person1.uuid)
self.assertCountEqual(result[1][1], [0, 3, 4])

def test_retention_people_search(self):
_create_person(
team_id=self.team.pk,
distinct_ids=["person1", "alias1"],
properties={"email": "[email protected]"},
)
_create_person(
team_id=self.team.pk,
distinct_ids=["person2"],
properties={"email": "[email protected]"},
)

_create_events(
self.team,
[
("person1", _date(0)),
("person1", _date(1)),
("person1", _date(2)),
("person1", _date(5)),
("alias1", _date(5, 9)),
("person1", _date(6)),
("person2", _date(1)),
("person2", _date(2)),
("person2", _date(3)),
("person2", _date(6)),
("person2", _date(7)),
],
)

result = self.run_actors_query(
interval=2,
query={
"dateRange": {"date_to": _date(10, hour=6)},
},
search="test",
)
self.assertEqual(len(result), 2)

def test_retention_people_in_period_first_time(self):
p1, p2, p3, p4 = self._create_first_time_retention_events()
# even if set to hour 6 it should default to beginning of day and include all pageviews above
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# serializer version: 1
# name: TestActorsQueryRunner.test_persons_query_search_snapshot
'''

SELECT id, id, created_at, 1
FROM persons
WHERE or(ilike(properties.email, '%SEARCHSTRING%'), ilike(properties.name, '%SEARCHSTRING%'), ilike(toString(id), '%SEARCHSTRING%'), in(id, (
SELECT person_id
FROM person_distinct_ids
WHERE ilike(distinct_id, '%SEARCHSTRING%')))) ORDER BY created_at DESC
LIMIT 10000
'''
# ---
38 changes: 38 additions & 0 deletions posthog/hogql_queries/test/test_actors_query_runner.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,7 @@
import pytest

from posthog.hogql import ast
from posthog.hogql.test.utils import pretty_print_in_tests
from posthog.hogql.visitor import clear_locations
from posthog.hogql_queries.actors_query_runner import ActorsQueryRunner
from posthog.models.utils import UUIDT
Expand Down Expand Up @@ -137,6 +140,11 @@ def test_persons_query_search_distinct_id(self):
runner = self._create_runner(ActorsQuery(search=f"id-{self.random_uuid}-9"))
self.assertEqual(len(runner.calculate().results), 1)

@pytest.mark.usefixtures("unittest_snapshot")
def test_persons_query_search_snapshot(self):
runner = self._create_runner(ActorsQuery(search="SEARCHSTRING"))
assert pretty_print_in_tests(runner.to_hogql(), self.team.pk) == self.snapshot

def test_persons_query_aggregation_select_having(self):
self.random_uuid = self._create_random_persons()
runner = self._create_runner(ActorsQuery(select=["properties.name", "count()"]))
Expand Down Expand Up @@ -227,3 +235,33 @@ def test_source_lifecycle_query(self):
runner = self._create_runner(query)
response = runner.calculate()
self.assertEqual(response.results, [[f"jacob4@{self.random_uuid}.posthog.com"]])

def test_persons_query_grouping(self):
random_uuid = f"RANDOM_TEST_ID::{UUIDT()}"
_create_person(
properties={
"email": f"jacob0@{random_uuid}.posthog.com",
"name": f"Mr Jacob {random_uuid}",
"random_uuid": random_uuid,
"index": 0,
},
team=self.team,
distinct_ids=[f"id-{random_uuid}-0", f"id-{random_uuid}-1"],
is_identified=True,
)
_create_event(
distinct_id=f"id-{random_uuid}-0",
event=f"clicky-0",
team=self.team,
)
_create_event(
distinct_id=f"id-{random_uuid}-1",
event=f"clicky-9",
team=self.team,
)
flush_persons_and_events()
runner = self._create_runner(ActorsQuery(search="posthog.com"))

response = runner.calculate()
# Should show a single person despite multiple distinct_ids
self.assertEqual(len(response.results), 1)

0 comments on commit 337b7e7

Please sign in to comment.