Skip to content

Commit

Permalink
feat(product-analytics): most used property values for the agent (#26985
Browse files Browse the repository at this point in the history
)
  • Loading branch information
skoob13 authored Dec 18, 2024
1 parent 2f9e896 commit 3e95fdc
Show file tree
Hide file tree
Showing 5 changed files with 253 additions and 20 deletions.
6 changes: 6 additions & 0 deletions frontend/src/queries/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -5630,6 +5630,12 @@
"$ref": "#/definitions/HogQLQueryModifiers",
"description": "Modifiers used when performing the query"
},
"properties": {
"items": {
"type": "string"
},
"type": "array"
},
"response": {
"$ref": "#/definitions/EventTaxonomyQueryResponse"
}
Expand Down
1 change: 1 addition & 0 deletions frontend/src/queries/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2420,6 +2420,7 @@ export type EventTaxonomyResponse = EventTaxonomyItem[]
export interface EventTaxonomyQuery extends DataNode<EventTaxonomyQueryResponse> {
kind: NodeKind.EventTaxonomyQuery
event: string
properties?: string[]
}

export type EventTaxonomyQueryResponse = AnalyticsQueryResponseBase<EventTaxonomyResponse>
Expand Down
105 changes: 85 additions & 20 deletions posthog/hogql_queries/ai/event_taxonomy_query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,9 @@ def calculate(self):
)

def to_query(self) -> ast.SelectQuery | ast.SelectSetQuery:
query = parse_select(
"""
if not self.query.properties:
return parse_select(
"""
SELECT
key,
-- Pick five latest distinct sample values.
Expand All @@ -64,10 +65,23 @@ def to_query(self) -> ast.SelectQuery | ast.SelectSetQuery:
ORDER BY total_count DESC
LIMIT 500
""",
placeholders={"from_query": self._get_subquery(), "filter": self._get_omit_filter()},
)
placeholders={"from_query": self._get_subquery(), "filter": self._get_omit_filter()},
)

return query
return parse_select(
"""
SELECT
key,
arraySlice(arrayDistinct(groupArray(value)), 1, 5) AS values,
count(DISTINCT value) AS total_count
FROM {from_query}
GROUP BY key
LIMIT 500
""",
placeholders={
"from_query": self._get_subquery(),
},
)

def _get_omit_filter(self):
"""
Expand Down Expand Up @@ -107,21 +121,72 @@ def _get_omit_filter(self):

def _get_subquery_filter(self) -> ast.Expr:
date_filter = parse_expr("timestamp >= now() - INTERVAL 30 DAY")
filter_expr = ast.And(
exprs=[
date_filter,
ast.CompareOperation(
left=ast.Field(chain=["event"]),
right=ast.Constant(value=self.query.event),
op=ast.CompareOperationOp.Eq,
),
]
)
return filter_expr
filter_expr: list[ast.Expr] = [
date_filter,
ast.CompareOperation(
left=ast.Field(chain=["event"]),
right=ast.Constant(value=self.query.event),
op=ast.CompareOperationOp.Eq,
),
]

if self.query.properties:
filter_expr.append(
ast.Or(
exprs=[
ast.CompareOperation(
left=ast.Field(chain=["properties", prop]),
op=ast.CompareOperationOp.NotEq,
right=ast.Constant(value=""),
)
for prop in self.query.properties
]
)
)

return ast.And(exprs=filter_expr)

def _get_subquery(self) -> ast.SelectQuery:
query = parse_select(
"""
if self.query.properties:
query = parse_select(
"""
SELECT
key,
value,
count() as count
FROM (
SELECT
{props} as kv
FROM
events
WHERE {filter}
)
ARRAY JOIN kv.1 AS key, kv.2 AS value
WHERE value != ''
GROUP BY key, value
ORDER BY count DESC
""",
placeholders={
"props": ast.Array(
exprs=[
ast.Tuple(
exprs=[
ast.Constant(value=prop),
ast.Call(
name="JSONExtractString",
args=[ast.Field(chain=["properties"]), ast.Constant(value=prop)],
),
]
)
for prop in self.query.properties
]
),
"filter": self._get_subquery_filter(),
},
)
else:
query = parse_select(
"""
SELECT
JSONExtractKeysAndValues(properties, 'String') as kv
FROM
Expand All @@ -130,7 +195,7 @@ def _get_subquery(self) -> ast.SelectQuery:
ORDER BY timestamp desc
LIMIT 100
""",
placeholders={"filter": self._get_subquery_filter()},
)
placeholders={"filter": self._get_subquery_filter()},
)

return cast(ast.SelectQuery, query)
160 changes: 160 additions & 0 deletions posthog/hogql_queries/ai/test/test_event_taxonomy_query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,3 +251,163 @@ def test_limit(self):

response = EventTaxonomyQueryRunner(team=self.team, query=EventTaxonomyQuery(event="event1")).calculate()
self.assertEqual(len(response.results), 500)

def test_property_taxonomy_returns_unique_values_for_specified_property(self):
_create_person(
distinct_ids=["person1"],
properties={"email": "[email protected]"},
team=self.team,
)
_create_person(
distinct_ids=["person2"],
properties={"email": "[email protected]"},
team=self.team,
)

_create_event(
event="event1",
distinct_id="person1",
properties={"$host": "us.posthog.com"},
team=self.team,
)

for _ in range(10):
_create_event(
event="event1",
distinct_id="person1",
properties={"$host": "posthog.com"},
team=self.team,
)

for _ in range(3):
_create_event(
event="event1",
distinct_id="person2",
properties={"$host": "eu.posthog.com"},
team=self.team,
)

response = EventTaxonomyQueryRunner(
team=self.team, query=EventTaxonomyQuery(event="event1", properties=["$host"])
).calculate()
self.assertEqual(len(response.results), 1)
self.assertEqual(response.results[0].property, "$host")
self.assertEqual(response.results[0].sample_values, ["posthog.com", "eu.posthog.com", "us.posthog.com"])
self.assertEqual(response.results[0].sample_count, 3)

def test_property_taxonomy_filters_events_by_event_name(self):
_create_person(
distinct_ids=["person1"],
properties={"email": "[email protected]"},
team=self.team,
)
_create_person(
distinct_ids=["person2"],
properties={"email": "[email protected]"},
team=self.team,
)

_create_event(
event="event1",
distinct_id="person1",
properties={"$host": "us.posthog.com", "$browser": "Chrome"},
team=self.team,
)

for _ in range(10):
_create_event(
event="event2",
distinct_id="person1",
properties={"$host": "posthog.com", "prop": 10},
team=self.team,
)

for _ in range(3):
_create_event(
event="event1",
distinct_id="person2",
team=self.team,
)

response = EventTaxonomyQueryRunner(
team=self.team, query=EventTaxonomyQuery(event="event1", properties=["$host"])
).calculate()
self.assertEqual(len(response.results), 1)
self.assertEqual(response.results[0].property, "$host")
self.assertEqual(response.results[0].sample_values, ["us.posthog.com"])
self.assertEqual(response.results[0].sample_count, 1)

def test_property_taxonomy_handles_multiple_properties_in_query(self):
_create_person(
distinct_ids=["person1"],
properties={"email": "[email protected]"},
team=self.team,
)
_create_person(
distinct_ids=["person2"],
properties={"email": "[email protected]"},
team=self.team,
)

_create_event(
event="event1",
distinct_id="person1",
properties={"$host": "us.posthog.com", "$browser": "Chrome"},
team=self.team,
)

for _ in range(5):
_create_event(
event="event1",
distinct_id="person1",
properties={"$host": "posthog.com", "prop": 10},
team=self.team,
)

for _ in range(3):
_create_event(
event="event1",
distinct_id="person2",
team=self.team,
)

response = EventTaxonomyQueryRunner(
team=self.team, query=EventTaxonomyQuery(event="event1", properties=["$host", "prop"])
).calculate()
self.assertEqual(len(response.results), 2)
self.assertEqual(response.results[0].property, "prop")
self.assertEqual(response.results[0].sample_values, ["10"])
self.assertEqual(response.results[0].sample_count, 1)
self.assertEqual(response.results[1].property, "$host")
self.assertEqual(response.results[1].sample_values, ["posthog.com", "us.posthog.com"])
self.assertEqual(response.results[1].sample_count, 2)

def test_property_taxonomy_includes_events_with_partial_property_matches(self):
_create_person(
distinct_ids=["person1"],
properties={"email": "[email protected]"},
team=self.team,
)
_create_event(
event="event1",
distinct_id="person1",
properties={"$host": "us.posthog.com"},
team=self.team,
)
_create_event(
event="event1",
distinct_id="person2",
properties={"prop": 10},
team=self.team,
)

response = EventTaxonomyQueryRunner(
team=self.team, query=EventTaxonomyQuery(event="event1", properties=["$host", "prop"])
).calculate()
self.assertEqual(len(response.results), 2)
self.assertEqual(response.results[0].property, "prop")
self.assertEqual(response.results[0].sample_values, ["10"])
self.assertEqual(response.results[0].sample_count, 1)
self.assertEqual(response.results[1].property, "$host")
self.assertEqual(response.results[1].sample_values, ["us.posthog.com"])
self.assertEqual(response.results[1].sample_count, 1)
1 change: 1 addition & 0 deletions posthog/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -5582,6 +5582,7 @@ class EventTaxonomyQuery(BaseModel):
modifiers: Optional[HogQLQueryModifiers] = Field(
default=None, description="Modifiers used when performing the query"
)
properties: Optional[list[str]] = None
response: Optional[EventTaxonomyQueryResponse] = None


Expand Down

0 comments on commit 3e95fdc

Please sign in to comment.