Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(product-analytics): most used property values for the agent #26985

Merged
merged 2 commits into from
Dec 18, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions frontend/src/queries/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -5630,6 +5630,12 @@
"$ref": "#/definitions/HogQLQueryModifiers",
"description": "Modifiers used when performing the query"
},
"properties": {
"items": {
"type": "string"
},
"type": "array"
},
"response": {
"$ref": "#/definitions/EventTaxonomyQueryResponse"
}
Expand Down
1 change: 1 addition & 0 deletions frontend/src/queries/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2420,6 +2420,7 @@ export type EventTaxonomyResponse = EventTaxonomyItem[]
export interface EventTaxonomyQuery extends DataNode<EventTaxonomyQueryResponse> {
kind: NodeKind.EventTaxonomyQuery
event: string
properties?: string[]
}

export type EventTaxonomyQueryResponse = AnalyticsQueryResponseBase<EventTaxonomyResponse>
Expand Down
105 changes: 85 additions & 20 deletions posthog/hogql_queries/ai/event_taxonomy_query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,8 +50,9 @@ def calculate(self):
)

def to_query(self) -> ast.SelectQuery | ast.SelectSetQuery:
query = parse_select(
"""
if not self.query.properties:
return parse_select(
"""
SELECT
key,
-- Pick five latest distinct sample values.
Expand All @@ -64,10 +65,23 @@ def to_query(self) -> ast.SelectQuery | ast.SelectSetQuery:
ORDER BY total_count DESC
LIMIT 500
""",
placeholders={"from_query": self._get_subquery(), "filter": self._get_omit_filter()},
)
placeholders={"from_query": self._get_subquery(), "filter": self._get_omit_filter()},
)

return query
return parse_select(
"""
SELECT
key,
arraySlice(arrayDistinct(groupArray(value)), 1, 5) AS values,
count(DISTINCT value) AS total_count
FROM {from_query}
GROUP BY key
LIMIT 500
""",
placeholders={
"from_query": self._get_subquery(),
},
)

def _get_omit_filter(self):
"""
Expand Down Expand Up @@ -107,21 +121,72 @@ def _get_omit_filter(self):

def _get_subquery_filter(self) -> ast.Expr:
date_filter = parse_expr("timestamp >= now() - INTERVAL 30 DAY")
filter_expr = ast.And(
exprs=[
date_filter,
ast.CompareOperation(
left=ast.Field(chain=["event"]),
right=ast.Constant(value=self.query.event),
op=ast.CompareOperationOp.Eq,
),
]
)
return filter_expr
filter_expr: list[ast.Expr] = [
date_filter,
ast.CompareOperation(
left=ast.Field(chain=["event"]),
right=ast.Constant(value=self.query.event),
op=ast.CompareOperationOp.Eq,
),
]

if self.query.properties:
filter_expr.append(
ast.Or(
exprs=[
ast.CompareOperation(
left=ast.Field(chain=["properties", prop]),
op=ast.CompareOperationOp.NotEq,
right=ast.Constant(value=""),
)
for prop in self.query.properties
]
)
)

return ast.And(exprs=filter_expr)

def _get_subquery(self) -> ast.SelectQuery:
query = parse_select(
"""
if self.query.properties:
query = parse_select(
"""
SELECT
key,
value,
count() as count
FROM (
SELECT
{props} as kv
FROM
events
WHERE {filter}
)
ARRAY JOIN kv.1 AS key, kv.2 AS value
WHERE value != ''
GROUP BY key, value
ORDER BY count DESC
""",
placeholders={
"props": ast.Array(
exprs=[
ast.Tuple(
exprs=[
ast.Constant(value=prop),
ast.Call(
name="JSONExtractString",
args=[ast.Field(chain=["properties"]), ast.Constant(value=prop)],
),
]
)
for prop in self.query.properties
]
),
"filter": self._get_subquery_filter(),
},
)
else:
query = parse_select(
"""
SELECT
JSONExtractKeysAndValues(properties, 'String') as kv
FROM
Expand All @@ -130,7 +195,7 @@ def _get_subquery(self) -> ast.SelectQuery:
ORDER BY timestamp desc
LIMIT 100
""",
placeholders={"filter": self._get_subquery_filter()},
)
placeholders={"filter": self._get_subquery_filter()},
)

return cast(ast.SelectQuery, query)
160 changes: 160 additions & 0 deletions posthog/hogql_queries/ai/test/test_event_taxonomy_query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -251,3 +251,163 @@ def test_limit(self):

response = EventTaxonomyQueryRunner(team=self.team, query=EventTaxonomyQuery(event="event1")).calculate()
self.assertEqual(len(response.results), 500)

def test_property_taxonomy_returns_unique_values_for_specified_property(self):
_create_person(
distinct_ids=["person1"],
properties={"email": "[email protected]"},
team=self.team,
)
_create_person(
distinct_ids=["person2"],
properties={"email": "[email protected]"},
team=self.team,
)

_create_event(
event="event1",
distinct_id="person1",
properties={"$host": "us.posthog.com"},
team=self.team,
)

for _ in range(10):
_create_event(
event="event1",
distinct_id="person1",
properties={"$host": "posthog.com"},
team=self.team,
)

for _ in range(3):
_create_event(
event="event1",
distinct_id="person2",
properties={"$host": "eu.posthog.com"},
team=self.team,
)

response = EventTaxonomyQueryRunner(
team=self.team, query=EventTaxonomyQuery(event="event1", properties=["$host"])
).calculate()
self.assertEqual(len(response.results), 1)
self.assertEqual(response.results[0].property, "$host")
self.assertEqual(response.results[0].sample_values, ["posthog.com", "eu.posthog.com", "us.posthog.com"])
self.assertEqual(response.results[0].sample_count, 3)

def test_property_taxonomy_filters_events_by_event_name(self):
_create_person(
distinct_ids=["person1"],
properties={"email": "[email protected]"},
team=self.team,
)
_create_person(
distinct_ids=["person2"],
properties={"email": "[email protected]"},
team=self.team,
)

_create_event(
event="event1",
distinct_id="person1",
properties={"$host": "us.posthog.com", "$browser": "Chrome"},
team=self.team,
)

for _ in range(10):
_create_event(
event="event2",
distinct_id="person1",
properties={"$host": "posthog.com", "prop": 10},
team=self.team,
)

for _ in range(3):
_create_event(
event="event1",
distinct_id="person2",
team=self.team,
)

response = EventTaxonomyQueryRunner(
team=self.team, query=EventTaxonomyQuery(event="event1", properties=["$host"])
).calculate()
self.assertEqual(len(response.results), 1)
self.assertEqual(response.results[0].property, "$host")
self.assertEqual(response.results[0].sample_values, ["us.posthog.com"])
self.assertEqual(response.results[0].sample_count, 1)

def test_property_taxonomy_handles_multiple_properties_in_query(self):
_create_person(
distinct_ids=["person1"],
properties={"email": "[email protected]"},
team=self.team,
)
_create_person(
distinct_ids=["person2"],
properties={"email": "[email protected]"},
team=self.team,
)

_create_event(
event="event1",
distinct_id="person1",
properties={"$host": "us.posthog.com", "$browser": "Chrome"},
team=self.team,
)

for _ in range(5):
_create_event(
event="event1",
distinct_id="person1",
properties={"$host": "posthog.com", "prop": 10},
team=self.team,
)

for _ in range(3):
_create_event(
event="event1",
distinct_id="person2",
team=self.team,
)

response = EventTaxonomyQueryRunner(
team=self.team, query=EventTaxonomyQuery(event="event1", properties=["$host", "prop"])
).calculate()
self.assertEqual(len(response.results), 2)
self.assertEqual(response.results[0].property, "prop")
self.assertEqual(response.results[0].sample_values, ["10"])
self.assertEqual(response.results[0].sample_count, 1)
self.assertEqual(response.results[1].property, "$host")
self.assertEqual(response.results[1].sample_values, ["posthog.com", "us.posthog.com"])
self.assertEqual(response.results[1].sample_count, 2)

def test_property_taxonomy_includes_events_with_partial_property_matches(self):
_create_person(
distinct_ids=["person1"],
properties={"email": "[email protected]"},
team=self.team,
)
_create_event(
event="event1",
distinct_id="person1",
properties={"$host": "us.posthog.com"},
team=self.team,
)
_create_event(
event="event1",
distinct_id="person2",
properties={"prop": 10},
team=self.team,
)

response = EventTaxonomyQueryRunner(
team=self.team, query=EventTaxonomyQuery(event="event1", properties=["$host", "prop"])
).calculate()
self.assertEqual(len(response.results), 2)
self.assertEqual(response.results[0].property, "prop")
self.assertEqual(response.results[0].sample_values, ["10"])
self.assertEqual(response.results[0].sample_count, 1)
self.assertEqual(response.results[1].property, "$host")
self.assertEqual(response.results[1].sample_values, ["us.posthog.com"])
self.assertEqual(response.results[1].sample_count, 1)
1 change: 1 addition & 0 deletions posthog/schema.py
Original file line number Diff line number Diff line change
Expand Up @@ -5582,6 +5582,7 @@ class EventTaxonomyQuery(BaseModel):
modifiers: Optional[HogQLQueryModifiers] = Field(
default=None, description="Modifiers used when performing the query"
)
properties: Optional[list[str]] = None
response: Optional[EventTaxonomyQueryResponse] = None


Expand Down
Loading