diff --git a/frontend/src/queries/schema.json b/frontend/src/queries/schema.json index 82efdf3b31478..1f55d0478eae7 100644 --- a/frontend/src/queries/schema.json +++ b/frontend/src/queries/schema.json @@ -189,6 +189,80 @@ "required": ["id", "kind"], "type": "object" }, + "ActorsPropertyTaxonomyQuery": { + "additionalProperties": false, + "properties": { + "group_type_index": { + "type": "integer" + }, + "kind": { + "const": "ActorsPropertyTaxonomyQuery", + "type": "string" + }, + "modifiers": { + "$ref": "#/definitions/HogQLQueryModifiers", + "description": "Modifiers used when performing the query" + }, + "property": { + "type": "string" + }, + "response": { + "$ref": "#/definitions/ActorsPropertyTaxonomyQueryResponse" + } + }, + "required": ["kind", "property"], + "type": "object" + }, + "ActorsPropertyTaxonomyQueryResponse": { + "additionalProperties": false, + "description": "All analytics query responses must inherit from this.", + "properties": { + "error": { + "description": "Query error. Returned only if 'explain' or `modifiers.debug` is true. Throws an error otherwise.", + "type": "string" + }, + "hogql": { + "description": "Generated HogQL query.", + "type": "string" + }, + "modifiers": { + "$ref": "#/definitions/HogQLQueryModifiers", + "description": "Modifiers used when performing the query" + }, + "query_status": { + "$ref": "#/definitions/QueryStatus", + "description": "Query status indicates whether next to the provided data, a query is still running." + }, + "results": { + "$ref": "#/definitions/ActorsPropertyTaxonomyResponse" + }, + "timings": { + "description": "Measured timings for different parts of the query generation process", + "items": { + "$ref": "#/definitions/QueryTiming" + }, + "type": "array" + } + }, + "required": ["results"], + "type": "object" + }, + "ActorsPropertyTaxonomyResponse": { + "additionalProperties": false, + "properties": { + "sample_count": { + "type": "integer" + }, + "sample_values": { + "items": { + "type": "string" + }, + "type": "array" + } + }, + "required": ["sample_values", "sample_count"], + "type": "object" + }, "ActorsQuery": { "additionalProperties": false, "properties": { @@ -766,6 +840,71 @@ "required": ["cache_key"], "type": "object" }, + "CachedActorsPropertyTaxonomyQueryResponse": { + "additionalProperties": false, + "properties": { + "cache_key": { + "type": "string" + }, + "cache_target_age": { + "format": "date-time", + "type": "string" + }, + "calculation_trigger": { + "description": "What triggered the calculation of the query, leave empty if user/immediate", + "type": "string" + }, + "error": { + "description": "Query error. Returned only if 'explain' or `modifiers.debug` is true. Throws an error otherwise.", + "type": "string" + }, + "hogql": { + "description": "Generated HogQL query.", + "type": "string" + }, + "is_cached": { + "type": "boolean" + }, + "last_refresh": { + "format": "date-time", + "type": "string" + }, + "modifiers": { + "$ref": "#/definitions/HogQLQueryModifiers", + "description": "Modifiers used when performing the query" + }, + "next_allowed_client_refresh": { + "format": "date-time", + "type": "string" + }, + "query_status": { + "$ref": "#/definitions/QueryStatus", + "description": "Query status indicates whether next to the provided data, a query is still running." + }, + "results": { + "$ref": "#/definitions/ActorsPropertyTaxonomyResponse" + }, + "timezone": { + "type": "string" + }, + "timings": { + "description": "Measured timings for different parts of the query generation process", + "items": { + "$ref": "#/definitions/QueryTiming" + }, + "type": "array" + } + }, + "required": [ + "cache_key", + "is_cached", + "last_refresh", + "next_allowed_client_refresh", + "results", + "timezone" + ], + "type": "object" + }, "CachedActorsQueryResponse": { "additionalProperties": false, "properties": { @@ -7267,7 +7406,8 @@ "DatabaseSchemaQuery", "SuggestedQuestionsQuery", "TeamTaxonomyQuery", - "EventTaxonomyQuery" + "EventTaxonomyQuery", + "ActorsPropertyTaxonomyQuery" ], "type": "string" }, diff --git a/frontend/src/queries/schema.ts b/frontend/src/queries/schema.ts index 8762e320251c0..aa0e00c43745b 100644 --- a/frontend/src/queries/schema.ts +++ b/frontend/src/queries/schema.ts @@ -110,6 +110,7 @@ export enum NodeKind { SuggestedQuestionsQuery = 'SuggestedQuestionsQuery', TeamTaxonomyQuery = 'TeamTaxonomyQuery', EventTaxonomyQuery = 'EventTaxonomyQuery', + ActorsPropertyTaxonomyQuery = 'ActorsPropertyTaxonomyQuery', } export type AnyDataNode = @@ -2038,3 +2039,18 @@ export interface EventTaxonomyQuery extends DataNode export type EventTaxonomyQueryResponse = AnalyticsQueryResponseBase export type CachedEventTaxonomyQueryResponse = CachedQueryResponse + +export interface ActorsPropertyTaxonomyResponse { + sample_values: string[] + sample_count: integer +} + +export interface ActorsPropertyTaxonomyQuery extends DataNode { + kind: NodeKind.ActorsPropertyTaxonomyQuery + property: string + group_type_index?: integer +} + +export type ActorsPropertyTaxonomyQueryResponse = AnalyticsQueryResponseBase + +export type CachedActorsPropertyTaxonomyQueryResponse = CachedQueryResponse diff --git a/frontend/src/scenes/saved-insights/SavedInsights.tsx b/frontend/src/scenes/saved-insights/SavedInsights.tsx index d5772c38fc0eb..4810a04db4bdf 100644 --- a/frontend/src/scenes/saved-insights/SavedInsights.tsx +++ b/frontend/src/scenes/saved-insights/SavedInsights.tsx @@ -365,6 +365,12 @@ export const QUERY_TYPES_METADATA: Record = { icon: IconHogQL, inMenu: false, }, + [NodeKind.ActorsPropertyTaxonomyQuery]: { + name: 'Actor Property Taxonomy', + description: 'View the taxonomy of the actor’s property.', + icon: IconHogQL, + inMenu: false, + }, } export const INSIGHT_TYPE_OPTIONS: LemonSelectOptions = [ diff --git a/posthog/hogql_queries/ai/actors_property_taxonomy_query_runner.py b/posthog/hogql_queries/ai/actors_property_taxonomy_query_runner.py new file mode 100644 index 0000000000000..1fdd2be57db54 --- /dev/null +++ b/posthog/hogql_queries/ai/actors_property_taxonomy_query_runner.py @@ -0,0 +1,103 @@ +from typing import Optional + +from posthog.hogql import ast +from posthog.hogql.printer import to_printed_hogql +from posthog.hogql.query import execute_hogql_query +from posthog.hogql_queries.ai.utils import TaxonomyCacheMixin +from posthog.hogql_queries.query_runner import QueryRunner +from posthog.schema import ( + ActorsPropertyTaxonomyQuery, + ActorsPropertyTaxonomyQueryResponse, + CachedActorsPropertyTaxonomyQueryResponse, +) + + +class ActorsPropertyTaxonomyQueryRunner(TaxonomyCacheMixin, QueryRunner): + query: ActorsPropertyTaxonomyQuery + response: ActorsPropertyTaxonomyQueryResponse + cached_response: CachedActorsPropertyTaxonomyQueryResponse + + def calculate(self): + query = self.to_query() + hogql = to_printed_hogql(query, self.team) + + response = execute_hogql_query( + query_type="ActorsPropertyTaxonomyQuery", + query=query, + team=self.team, + timings=self.timings, + modifiers=self.modifiers, + limit_context=self.limit_context, + ) + + results = ( + { + "sample_values": response.results[0][0], + "sample_count": response.results[0][1], + } + if response.results + else { + "sample_values": [], + "sample_count": 0, + } + ) + + return ActorsPropertyTaxonomyQueryResponse( + results=results, + timings=response.timings, + hogql=hogql, + modifiers=self.modifiers, + ) + + def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: + query = ast.SelectQuery( + select=[ + ast.Call(name="groupArray", args=[ast.Field(chain=["prop"])], params=[ast.Constant(value=5)]), + ast.Call(name="count", args=[]), + ], + select_from=ast.JoinExpr(table=self._get_subquery()), + ) + + return query + + @property + def _actor_type(self) -> str: + if self.query.group_type_index is not None: + return "group" + return "person" + + @property + def _origin(self) -> str: + if self._actor_type == "person": + return "persons" + return "groups" + + def _subquery_filter(self) -> Optional[ast.Expr]: + field_filter = ast.Call( + name="isNotNull", + args=[ast.Field(chain=["prop"])], + ) + + if self._actor_type == "group": + return ast.And( + exprs=[ + field_filter, + ast.CompareOperation( + left=ast.Field(chain=["index"]), + op=ast.CompareOperationOp.Eq, + right=ast.Constant(value=self.query.group_type_index), + ), + ] + ) + + return field_filter + + def _get_subquery(self) -> ast.SelectQuery: + query = ast.SelectQuery( + select=[ast.Alias(expr=ast.Field(chain=["properties", self.query.property]), alias="prop")], + distinct=True, + select_from=ast.JoinExpr(table=ast.Field(chain=[self._origin])), + where=self._subquery_filter(), + order_by=[ast.OrderExpr(expr=ast.Field(chain=["created_at"]), order="DESC")], + ) + return query diff --git a/posthog/hogql_queries/ai/event_taxonomy_query_runner.py b/posthog/hogql_queries/ai/event_taxonomy_query_runner.py index 23530e1faf259..7b74a5e98d173 100644 --- a/posthog/hogql_queries/ai/event_taxonomy_query_runner.py +++ b/posthog/hogql_queries/ai/event_taxonomy_query_runner.py @@ -1,11 +1,10 @@ -from datetime import datetime -from typing import Optional, cast +from typing import cast -from posthog.caching.utils import ThresholdMode, is_stale from posthog.hogql import ast from posthog.hogql.parser import parse_expr, parse_select from posthog.hogql.printer import to_printed_hogql from posthog.hogql.query import execute_hogql_query +from posthog.hogql_queries.ai.utils import TaxonomyCacheMixin from posthog.hogql_queries.query_runner import QueryRunner from posthog.schema import ( CachedEventTaxonomyQueryResponse, @@ -15,7 +14,7 @@ ) -class EventTaxonomyQueryRunner(QueryRunner): +class EventTaxonomyQueryRunner(TaxonomyCacheMixin, QueryRunner): query: EventTaxonomyQuery response: EventTaxonomyQueryResponse cached_response: CachedEventTaxonomyQueryResponse @@ -69,15 +68,6 @@ def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: return query - def _is_stale(self, last_refresh: Optional[datetime], lazy: bool = False) -> bool: - """ - Despite the lazy mode, it caches for an hour by default. We don't want frequent updates here. - """ - return is_stale(self.team, date_to=None, interval=None, last_refresh=last_refresh, mode=ThresholdMode.AI) - - def cache_target_age(self, last_refresh: Optional[datetime], lazy: bool = False) -> Optional[datetime]: - return None - def _get_omit_filter(self): """ Ignore properties that are not useful for AI. diff --git a/posthog/hogql_queries/ai/team_taxonomy_query_runner.py b/posthog/hogql_queries/ai/team_taxonomy_query_runner.py index efa25ee17f68b..ff47aaefcda1f 100644 --- a/posthog/hogql_queries/ai/team_taxonomy_query_runner.py +++ b/posthog/hogql_queries/ai/team_taxonomy_query_runner.py @@ -1,11 +1,8 @@ -from datetime import datetime -from typing import Optional - -from posthog.caching.utils import ThresholdMode, is_stale from posthog.hogql import ast from posthog.hogql.parser import parse_select from posthog.hogql.printer import to_printed_hogql from posthog.hogql.query import execute_hogql_query +from posthog.hogql_queries.ai.utils import TaxonomyCacheMixin from posthog.hogql_queries.query_runner import QueryRunner from posthog.schema import ( CachedTeamTaxonomyQueryResponse, @@ -15,7 +12,7 @@ ) -class TeamTaxonomyQueryRunner(QueryRunner): +class TeamTaxonomyQueryRunner(TaxonomyCacheMixin, QueryRunner): """ Calculates the top events for a team sorted by count. The EventDefinition model doesn't store the count of events, so this query mitigates that. @@ -63,12 +60,3 @@ def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery: ) return query - - def _is_stale(self, last_refresh: Optional[datetime], lazy: bool = False) -> bool: - """ - Despite the lazy mode, it caches for an hour by default. We don't want frequent updates here. - """ - return is_stale(self.team, date_to=None, interval=None, last_refresh=last_refresh, mode=ThresholdMode.AI) - - def cache_target_age(self, last_refresh: Optional[datetime], lazy: bool = False) -> Optional[datetime]: - return None diff --git a/posthog/hogql_queries/ai/test/__snapshots__/test_actors_property_taxonomy_query_runner.ambr b/posthog/hogql_queries/ai/test/__snapshots__/test_actors_property_taxonomy_query_runner.ambr new file mode 100644 index 0000000000000..a0724393e1fe3 --- /dev/null +++ b/posthog/hogql_queries/ai/test/__snapshots__/test_actors_property_taxonomy_query_runner.ambr @@ -0,0 +1,157 @@ +# serializer version: 1 +# name: TestActorsPropertyTaxonomyQueryRunner.test_group_property_taxonomy_query_runner + ''' + SELECT groupArray(5)(prop), count() + FROM + (SELECT DISTINCT groups.properties___industry AS prop + FROM + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'industry'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___industry, + argMax(toTimeZone(groups.created_at, 'UTC'), toTimeZone(groups._timestamp, 'UTC')) AS created_at, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE equals(groups.team_id, 2) + GROUP BY groups.group_type_index, + groups.group_key) AS groups + WHERE and(isNotNull(prop), ifNull(equals(groups.index, 0), 0)) + ORDER BY groups.created_at DESC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestActorsPropertyTaxonomyQueryRunner.test_group_property_taxonomy_query_runner.1 + ''' + SELECT groupArray(5)(prop), count() + FROM + (SELECT DISTINCT groups.`properties___does not exist` AS prop + FROM + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'does not exist'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS `properties___does not exist`, + argMax(toTimeZone(groups.created_at, 'UTC'), toTimeZone(groups._timestamp, 'UTC')) AS created_at, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE equals(groups.team_id, 2) + GROUP BY groups.group_type_index, + groups.group_key) AS groups + WHERE and(isNotNull(prop), ifNull(equals(groups.index, 0), 0)) + ORDER BY groups.created_at DESC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestActorsPropertyTaxonomyQueryRunner.test_group_property_taxonomy_query_runner.2 + ''' + SELECT groupArray(5)(prop), count() + FROM + (SELECT DISTINCT groups.properties___employee_count AS prop + FROM + (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(groups.group_properties, 'employee_count'), ''), 'null'), '^"|"$', ''), toTimeZone(groups._timestamp, 'UTC')) AS properties___employee_count, + argMax(toTimeZone(groups.created_at, 'UTC'), toTimeZone(groups._timestamp, 'UTC')) AS created_at, + groups.group_type_index AS index, + groups.group_key AS key + FROM groups + WHERE equals(groups.team_id, 2) + GROUP BY groups.group_type_index, + groups.group_key) AS groups + WHERE and(isNotNull(prop), ifNull(equals(groups.index, 0), 0)) + ORDER BY groups.created_at DESC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestActorsPropertyTaxonomyQueryRunner.test_person_property_taxonomy_query_runner + ''' + SELECT groupArray(5)(prop), count() + FROM + (SELECT DISTINCT persons.properties___email AS prop + FROM + (SELECT person.id AS id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', '') AS properties___email, + toTimeZone(person.created_at, 'UTC') AS created_at + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons + WHERE isNotNull(prop) + ORDER BY persons.created_at DESC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestActorsPropertyTaxonomyQueryRunner.test_person_property_taxonomy_query_runner.1 + ''' + SELECT groupArray(5)(prop), count() + FROM + (SELECT DISTINCT persons.`properties___does not exist` AS prop + FROM + (SELECT person.id AS id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'does not exist'), ''), 'null'), '^"|"$', '') AS `properties___does not exist`, + toTimeZone(person.created_at, 'UTC') AS created_at + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons + WHERE isNotNull(prop) + ORDER BY persons.created_at DESC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- +# name: TestActorsPropertyTaxonomyQueryRunner.test_person_property_taxonomy_query_runner.2 + ''' + SELECT groupArray(5)(prop), count() + FROM + (SELECT DISTINCT persons.properties___age AS prop + FROM + (SELECT person.id AS id, + replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'age'), ''), 'null'), '^"|"$', '') AS properties___age, + toTimeZone(person.created_at, 'UTC') AS created_at + FROM person + WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), + (SELECT person.id AS id, max(person.version) AS version + FROM person + WHERE equals(person.team_id, 2) + GROUP BY person.id + HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'UTC'), person.version), plus(now64(6, 'UTC'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS persons + WHERE isNotNull(prop) + ORDER BY persons.created_at DESC) + LIMIT 100 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1, + format_csv_allow_double_quotes=0, + max_ast_elements=4000000, + max_expanded_ast_elements=4000000, + max_bytes_before_external_group_by=0 + ''' +# --- diff --git a/posthog/hogql_queries/ai/test/test_actors_property_taxonomy_query_runner.py b/posthog/hogql_queries/ai/test/test_actors_property_taxonomy_query_runner.py new file mode 100644 index 0000000000000..606a716dfaf91 --- /dev/null +++ b/posthog/hogql_queries/ai/test/test_actors_property_taxonomy_query_runner.py @@ -0,0 +1,107 @@ +from django.test import override_settings + +from posthog.hogql_queries.ai.actors_property_taxonomy_query_runner import ActorsPropertyTaxonomyQueryRunner +from posthog.models.group.util import create_group +from posthog.models.group_type_mapping import GroupTypeMapping +from posthog.schema import ActorsPropertyTaxonomyQuery +from posthog.test.base import ( + APIBaseTest, + ClickhouseTestMixin, + _create_person, + snapshot_clickhouse_queries, +) + + +@override_settings(IN_UNIT_TESTING=True) +class TestActorsPropertyTaxonomyQueryRunner(ClickhouseTestMixin, APIBaseTest): + @snapshot_clickhouse_queries + def test_person_property_taxonomy_query_runner(self): + _create_person( + distinct_ids=["person1"], + properties={"email": "person1@example.com", "name": "Person One", "age": 30}, + team=self.team, + ) + _create_person( + distinct_ids=["person2"], + properties={"email": "person2@example.com", "age": 30}, + team=self.team, + ) + _create_person( + distinct_ids=["person3"], + properties={"email": "person3@example.com"}, + team=self.team, + ) + + # regular person property + results = ActorsPropertyTaxonomyQueryRunner( + team=self.team, query=ActorsPropertyTaxonomyQuery(property="email") + ).calculate() + self.assertEqual(len(results.results.sample_values), 3) + self.assertEqual( + set(results.results.sample_values), {"person1@example.com", "person2@example.com", "person3@example.com"} + ) + self.assertEqual(results.results.sample_count, 3) + + # does not exist + results = ActorsPropertyTaxonomyQueryRunner( + team=self.team, query=ActorsPropertyTaxonomyQuery(property="does not exist") + ).calculate() + self.assertEqual(len(results.results.sample_values), 0) + self.assertEqual(results.results.sample_count, 0) + + # Ensure only distinct values are returned + results = ActorsPropertyTaxonomyQueryRunner( + team=self.team, query=ActorsPropertyTaxonomyQuery(property="age") + ).calculate() + self.assertEqual(len(results.results.sample_values), 1) + self.assertEqual(results.results.sample_count, 1) + # Ensure the value is a string + self.assertEqual(results.results.sample_values[0], "30") + + @snapshot_clickhouse_queries + def test_group_property_taxonomy_query_runner(self): + GroupTypeMapping.objects.create(team=self.team, group_type="Company", group_type_index=0) + create_group( + team_id=self.team.pk, + group_type_index=0, + group_key="Hooli", + properties={"industry": "tech", "employee_count": 30}, + ) + create_group( + team_id=self.team.pk, + group_type_index=0, + group_key="Pied Piper", + properties={"industry": "energy", "employee_count": 30}, + ) + create_group( + team_id=self.team.pk, + group_type_index=0, + group_key="BYG", + properties={"industry": "ecommerce"}, + ) + + # regular group property + results = ActorsPropertyTaxonomyQueryRunner( + team=self.team, query=ActorsPropertyTaxonomyQuery(property="industry", group_type_index=0) + ).calculate() + self.assertEqual(len(results.results.sample_values), 3) + self.assertEqual(set(results.results.sample_values), {"tech", "energy", "ecommerce"}) + self.assertEqual(results.results.sample_count, 3) + + # does not exist + results = ActorsPropertyTaxonomyQueryRunner( + team=self.team, + query=ActorsPropertyTaxonomyQuery(property="does not exist", group_type_index=0), + ).calculate() + self.assertEqual(len(results.results.sample_values), 0) + self.assertEqual(results.results.sample_count, 0) + + # Ensure only distinct values are returned + results = ActorsPropertyTaxonomyQueryRunner( + team=self.team, + query=ActorsPropertyTaxonomyQuery(property="employee_count", group_type_index=0), + ).calculate() + self.assertEqual(len(results.results.sample_values), 1) + self.assertEqual(results.results.sample_count, 1) + # Ensure the value is a string + self.assertEqual(results.results.sample_values[0], "30") diff --git a/posthog/hogql_queries/ai/test/test_event_taxonomy_query_runner.py b/posthog/hogql_queries/ai/test/test_event_taxonomy_query_runner.py index 6807d1d8c53e8..bf66d6966e9a0 100644 --- a/posthog/hogql_queries/ai/test/test_event_taxonomy_query_runner.py +++ b/posthog/hogql_queries/ai/test/test_event_taxonomy_query_runner.py @@ -176,18 +176,6 @@ def test_event_taxonomy_includes_properties_from_multiple_persons(self): self.assertEqual(results[2].sample_values, ["1024x768"]) self.assertEqual(results[2].sample_count, 1) - def test_is_stale(self): - date = timezone.now() - runner = EventTaxonomyQueryRunner(team=self.team, query=EventTaxonomyQuery(event="event1")) - self.assertFalse(runner._is_stale(last_refresh=date, lazy=False)) - self.assertFalse(runner._is_stale(last_refresh=date, lazy=True)) - self.assertFalse(runner._is_stale(last_refresh=date - timedelta(minutes=15), lazy=False)) - self.assertFalse(runner._is_stale(last_refresh=date - timedelta(minutes=15), lazy=True)) - self.assertFalse(runner._is_stale(last_refresh=date - timedelta(minutes=59), lazy=True)) - self.assertFalse(runner._is_stale(last_refresh=date - timedelta(minutes=59), lazy=False)) - self.assertTrue(runner._is_stale(last_refresh=date - timedelta(minutes=60), lazy=True)) - self.assertTrue(runner._is_stale(last_refresh=date - timedelta(minutes=60), lazy=False)) - def test_caching(self): now = timezone.now() diff --git a/posthog/hogql_queries/ai/test/test_team_taxonomy_query_runner.py b/posthog/hogql_queries/ai/test/test_team_taxonomy_query_runner.py index 8c29090224e1e..c5a7699f99bdd 100644 --- a/posthog/hogql_queries/ai/test/test_team_taxonomy_query_runner.py +++ b/posthog/hogql_queries/ai/test/test_team_taxonomy_query_runner.py @@ -51,18 +51,6 @@ def test_taxonomy_query_runner(self): self.assertEqual(results.results[1].event, "event2") self.assertEqual(results.results[1].count, 1) - def test_is_stale(self): - date = timezone.now() - runner = TeamTaxonomyQueryRunner(team=self.team, query=TeamTaxonomyQuery()) - self.assertFalse(runner._is_stale(last_refresh=date, lazy=False)) - self.assertFalse(runner._is_stale(last_refresh=date, lazy=True)) - self.assertFalse(runner._is_stale(last_refresh=date - timedelta(minutes=15), lazy=False)) - self.assertFalse(runner._is_stale(last_refresh=date - timedelta(minutes=15), lazy=True)) - self.assertFalse(runner._is_stale(last_refresh=date - timedelta(minutes=59), lazy=True)) - self.assertFalse(runner._is_stale(last_refresh=date - timedelta(minutes=59), lazy=False)) - self.assertTrue(runner._is_stale(last_refresh=date - timedelta(minutes=60), lazy=True)) - self.assertTrue(runner._is_stale(last_refresh=date - timedelta(minutes=60), lazy=False)) - def test_caching(self): now = timezone.now() diff --git a/posthog/hogql_queries/ai/test/test_utils.py b/posthog/hogql_queries/ai/test/test_utils.py new file mode 100644 index 0000000000000..4729a21657be1 --- /dev/null +++ b/posthog/hogql_queries/ai/test/test_utils.py @@ -0,0 +1,24 @@ +from datetime import timedelta + +from django.utils import timezone + +from posthog.hogql_queries.ai.utils import TaxonomyCacheMixin +from posthog.test.base import BaseTest + + +class TestTaxonomyUtils(BaseTest): + def test_is_stale(self): + class Mixin(TaxonomyCacheMixin): + team = self.team + + date = timezone.now() + + mixin = Mixin() + self.assertFalse(mixin._is_stale(last_refresh=date, lazy=False)) + self.assertFalse(mixin._is_stale(last_refresh=date, lazy=True)) + self.assertFalse(mixin._is_stale(last_refresh=date - timedelta(minutes=15), lazy=False)) + self.assertFalse(mixin._is_stale(last_refresh=date - timedelta(minutes=15), lazy=True)) + self.assertFalse(mixin._is_stale(last_refresh=date - timedelta(minutes=59), lazy=True)) + self.assertFalse(mixin._is_stale(last_refresh=date - timedelta(minutes=59), lazy=False)) + self.assertTrue(mixin._is_stale(last_refresh=date - timedelta(minutes=60), lazy=True)) + self.assertTrue(mixin._is_stale(last_refresh=date - timedelta(minutes=60), lazy=False)) diff --git a/posthog/hogql_queries/ai/utils.py b/posthog/hogql_queries/ai/utils.py new file mode 100644 index 0000000000000..2402e1ec2cc17 --- /dev/null +++ b/posthog/hogql_queries/ai/utils.py @@ -0,0 +1,19 @@ +from abc import ABC +from datetime import datetime +from typing import Optional + +from posthog.caching.utils import ThresholdMode, is_stale +from posthog.models.team.team import Team + + +class TaxonomyCacheMixin(ABC): + team: Team + + def _is_stale(self, last_refresh: Optional[datetime], lazy: bool = False) -> bool: + """ + Despite the lazy mode, it caches for an hour by default. We don't want frequent updates here. + """ + return is_stale(self.team, date_to=None, interval=None, last_refresh=last_refresh, mode=ThresholdMode.AI) + + def cache_target_age(self, last_refresh: Optional[datetime], lazy: bool = False) -> Optional[datetime]: + return None diff --git a/posthog/schema.py b/posthog/schema.py index d9b13b129c4c3..f588282aa5554 100644 --- a/posthog/schema.py +++ b/posthog/schema.py @@ -27,6 +27,14 @@ class ActionConversionGoal(BaseModel): actionId: int +class ActorsPropertyTaxonomyResponse(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + sample_count: int + sample_values: list[str] + + class AggregationAxisFormat(StrEnum): NUMERIC = "numeric" DURATION = "duration" @@ -860,6 +868,7 @@ class NodeKind(StrEnum): SUGGESTED_QUESTIONS_QUERY = "SuggestedQuestionsQuery" TEAM_TAXONOMY_QUERY = "TeamTaxonomyQuery" EVENT_TAXONOMY_QUERY = "EventTaxonomyQuery" + ACTORS_PROPERTY_TAXONOMY_QUERY = "ActorsPropertyTaxonomyQuery" class PathCleaningFilter(BaseModel): @@ -1663,6 +1672,27 @@ class YAxisSettings(BaseModel): startAtZero: Optional[bool] = Field(default=None, description="Whether the Y axis should start at zero") +class ActorsPropertyTaxonomyQueryResponse(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + error: Optional[str] = Field( + default=None, + description="Query error. Returned only if 'explain' or `modifiers.debug` is true. Throws an error otherwise.", + ) + hogql: Optional[str] = Field(default=None, description="Generated HogQL query.") + modifiers: Optional[HogQLQueryModifiers] = Field( + default=None, description="Modifiers used when performing the query" + ) + query_status: Optional[QueryStatus] = Field( + default=None, description="Query status indicates whether next to the provided data, a query is still running." + ) + results: ActorsPropertyTaxonomyResponse + timings: Optional[list[QueryTiming]] = Field( + default=None, description="Measured timings for different parts of the query generation process" + ) + + class ActorsQueryResponse(BaseModel): model_config = ConfigDict( extra="forbid", @@ -1731,6 +1761,36 @@ class CacheMissResponse(BaseModel): query_status: Optional[QueryStatus] = None +class CachedActorsPropertyTaxonomyQueryResponse(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + cache_key: str + cache_target_age: Optional[AwareDatetime] = None + calculation_trigger: Optional[str] = Field( + default=None, description="What triggered the calculation of the query, leave empty if user/immediate" + ) + error: Optional[str] = Field( + default=None, + description="Query error. Returned only if 'explain' or `modifiers.debug` is true. Throws an error otherwise.", + ) + hogql: Optional[str] = Field(default=None, description="Generated HogQL query.") + is_cached: bool + last_refresh: AwareDatetime + modifiers: Optional[HogQLQueryModifiers] = Field( + default=None, description="Modifiers used when performing the query" + ) + next_allowed_client_refresh: AwareDatetime + query_status: Optional[QueryStatus] = Field( + default=None, description="Query status indicates whether next to the provided data, a query is still running." + ) + results: ActorsPropertyTaxonomyResponse + timezone: str + timings: Optional[list[QueryTiming]] = Field( + default=None, description="Measured timings for different parts of the query generation process" + ) + + class CachedActorsQueryResponse(BaseModel): model_config = ConfigDict( extra="forbid", @@ -4036,6 +4096,19 @@ class WebTopClicksQuery(BaseModel): useSessionsTable: Optional[bool] = None +class ActorsPropertyTaxonomyQuery(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + group_type_index: Optional[int] = None + kind: Literal["ActorsPropertyTaxonomyQuery"] = "ActorsPropertyTaxonomyQuery" + modifiers: Optional[HogQLQueryModifiers] = Field( + default=None, description="Modifiers used when performing the query" + ) + property: str + response: Optional[ActorsPropertyTaxonomyQueryResponse] = None + + class AnyResponseType( RootModel[ Union[