Skip to content

Commit

Permalink
feat: most used property values
Browse files Browse the repository at this point in the history
  • Loading branch information
skoob13 committed Dec 17, 2024
1 parent 219bad7 commit 4c070fe
Show file tree
Hide file tree
Showing 5 changed files with 151 additions and 19 deletions.
3 changes: 3 additions & 0 deletions frontend/src/queries/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -5630,6 +5630,9 @@
"$ref": "#/definitions/HogQLQueryModifiers",
"description": "Modifiers used when performing the query"
},
"property": {
"type": "string"
},
"response": {
"$ref": "#/definitions/EventTaxonomyQueryResponse"
}
Expand Down
1 change: 1 addition & 0 deletions frontend/src/queries/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2420,6 +2420,7 @@ export type EventTaxonomyResponse = EventTaxonomyItem[]
export interface EventTaxonomyQuery extends DataNode<EventTaxonomyQueryResponse> {
kind: NodeKind.EventTaxonomyQuery
event: string
property?: string
}

export type EventTaxonomyQueryResponse = AnalyticsQueryResponseBase<EventTaxonomyResponse>
Expand Down
80 changes: 61 additions & 19 deletions posthog/hogql_queries/ai/event_taxonomy_query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,9 @@ def calculate(self):
)

def to_query(self) -> ast.SelectQuery | ast.SelectSetQuery:
query = parse_select(
"""
if not self.query.property:
return parse_select(
"""
SELECT
key,
-- Pick five latest distinct sample values.
Expand All @@ -64,10 +65,22 @@ def to_query(self) -> ast.SelectQuery | ast.SelectSetQuery:
ORDER BY total_count DESC
LIMIT 500
""",
placeholders={"from_query": self._get_subquery(), "filter": self._get_omit_filter()},
)
placeholders={"from_query": self._get_subquery(), "filter": self._get_omit_filter()},
)

return query
return parse_select(
"""
SELECT
{const},
arraySlice(arrayDistinct(groupArray(value)), 1, 5) AS values,
count(DISTINCT value) AS total_count
FROM {from_query}
""",
placeholders={
"const": ast.Constant(value=self.query.property),
"from_query": self._get_subquery(),
},
)

def _get_omit_filter(self):
"""
Expand Down Expand Up @@ -107,21 +120,50 @@ def _get_omit_filter(self):

def _get_subquery_filter(self) -> ast.Expr:
date_filter = parse_expr("timestamp >= now() - INTERVAL 30 DAY")
filter_expr = ast.And(
exprs=[
date_filter,
filter_expr: list[ast.Expr] = [
date_filter,
ast.CompareOperation(
left=ast.Field(chain=["event"]),
right=ast.Constant(value=self.query.event),
op=ast.CompareOperationOp.Eq,
),
]

if self.query.property:
filter_expr.append(
ast.CompareOperation(
left=ast.Field(chain=["event"]),
right=ast.Constant(value=self.query.event),
op=ast.CompareOperationOp.Eq,
),
]
)
return filter_expr
left=ast.Field(chain=["properties", self.query.property]),
op=ast.CompareOperationOp.NotEq,
right=ast.Constant(value=""),
)
)

return ast.And(exprs=filter_expr)

def _get_subquery(self) -> ast.SelectQuery:
query = parse_select(
"""
if self.query.property:
query = parse_select(
"""
SELECT
{prop} as value,
count(*) AS count
FROM
events
WHERE
{filter}
GROUP BY
value
ORDER BY
count DESC
""",
placeholders={
"prop": ast.Field(chain=["properties", self.query.property]),
"filter": self._get_subquery_filter(),
},
)
else:
query = parse_select(
"""
SELECT
JSONExtractKeysAndValues(properties, 'String') as kv
FROM
Expand All @@ -130,7 +172,7 @@ def _get_subquery(self) -> ast.SelectQuery:
ORDER BY timestamp desc
LIMIT 100
""",
placeholders={"filter": self._get_subquery_filter()},
)
placeholders={"filter": self._get_subquery_filter()},
)

return cast(ast.SelectQuery, query)
85 changes: 85 additions & 0 deletions posthog/hogql_queries/ai/test/test_event_taxonomy_query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,3 +251,88 @@ def test_limit(self):

response = EventTaxonomyQueryRunner(team=self.team, query=EventTaxonomyQuery(event="event1")).calculate()
self.assertEqual(len(response.results), 500)

def test_property_taxonomy(self):
_create_person(
distinct_ids=["person1"],
properties={"email": "[email protected]"},
team=self.team,
)
_create_person(
distinct_ids=["person2"],
properties={"email": "[email protected]"},
team=self.team,
)

_create_event(
event="event1",
distinct_id="person1",
properties={"$host": "us.posthog.com"},
team=self.team,
)

for _ in range(10):
_create_event(
event="event1",
distinct_id="person1",
properties={"$host": "posthog.com"},
team=self.team,
)

for _ in range(3):
_create_event(
event="event1",
distinct_id="person2",
properties={"$host": "eu.posthog.com"},
team=self.team,
)

response = EventTaxonomyQueryRunner(
team=self.team, query=EventTaxonomyQuery(event="event1", property="$host")
).calculate()
self.assertEqual(len(response.results), 1)
self.assertEqual(response.results[0].property, "$host")
self.assertEqual(response.results[0].sample_values, ["posthog.com", "eu.posthog.com", "us.posthog.com"])
self.assertEqual(response.results[0].sample_count, 3)

def test_property_taxonomy_filters(self):
_create_person(
distinct_ids=["person1"],
properties={"email": "[email protected]"},
team=self.team,
)
_create_person(
distinct_ids=["person2"],
properties={"email": "[email protected]"},
team=self.team,
)

_create_event(
event="event1",
distinct_id="person1",
properties={"$host": "us.posthog.com", "$browser": "Chrome"},
team=self.team,
)

for _ in range(10):
_create_event(
event="event2",
distinct_id="person1",
properties={"$host": "posthog.com", "prop": 10},
team=self.team,
)

for _ in range(3):
_create_event(
event="event1",
distinct_id="person2",
team=self.team,
)

response = EventTaxonomyQueryRunner(
team=self.team, query=EventTaxonomyQuery(event="event1", property="$host")
).calculate()
self.assertEqual(len(response.results), 1)
self.assertEqual(response.results[0].property, "$host")
self.assertEqual(response.results[0].sample_values, ["us.posthog.com"])
self.assertEqual(response.results[0].sample_count, 1)
1 change: 1 addition & 0 deletions posthog/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -5582,6 +5582,7 @@ class EventTaxonomyQuery(BaseModel):
modifiers: Optional[HogQLQueryModifiers] = Field(
default=None, description="Modifiers used when performing the query"
)
property: Optional[str] = None
response: Optional[EventTaxonomyQueryResponse] = None


Expand Down

0 comments on commit 4c070fe

Please sign in to comment.