diff --git a/frontend/src/queries/schema.json b/frontend/src/queries/schema.json index ad5628d48295b..65ee5bdf4702b 100644 --- a/frontend/src/queries/schema.json +++ b/frontend/src/queries/schema.json @@ -1674,16 +1674,29 @@ "PersonsQuery": { "additionalProperties": false, "properties": { - "day": { - "type": "string" - }, - "group": { - "type": "string" + "fixedProperties": { + "items": { + "anyOf": [ + { + "$ref": "#/definitions/PersonPropertyFilter" + }, + { + "$ref": "#/definitions/HogQLPropertyFilter" + } + ] + }, + "type": "array" }, "kind": { "const": "PersonsQuery", "type": "string" }, + "limit": { + "type": "number" + }, + "offset": { + "type": "number" + }, "properties": { "items": { "anyOf": [ @@ -1706,14 +1719,27 @@ }, "source": { "$ref": "#/definitions/InsightQueryNode" + }, + "sourceDay": { + "type": "string" + }, + "sourceGroup": { + "type": "string" } }, - "required": ["kind", "source"], + "required": ["kind"], "type": "object" }, "PersonsQueryResponse": { "additionalProperties": false, "properties": { + "columns": { + "items": {}, + "type": "array" + }, + "hasMore": { + "type": "boolean" + }, "hogql": { "type": "string" }, @@ -1730,9 +1756,15 @@ "$ref": "#/definitions/QueryTiming" }, "type": "array" + }, + "types": { + "items": { + "type": "string" + }, + "type": "array" } }, - "required": ["results", "hogql"], + "required": ["results", "columns", "types", "hogql"], "type": "object" }, "PropertyFilterValue": { diff --git a/frontend/src/queries/schema.ts b/frontend/src/queries/schema.ts index efcb15e220edb..a4585e5f5da85 100644 --- a/frontend/src/queries/schema.ts +++ b/frontend/src/queries/schema.ts @@ -466,17 +466,23 @@ export interface LifecycleQuery extends InsightsQueryBase { export interface PersonsQueryResponse { /** Results in the format: [ ['uuid', breakdown1, breakdown2, ...], ... ] */ results: any[][] // typed as any[], not [str, ...any] because python + columns: any[] + types: string[] hogql: string timings?: QueryTiming[] + hasMore?: boolean } export interface PersonsQuery extends DataNode { kind: NodeKind.PersonsQuery - source: InsightQueryNode - day?: string - group?: string + source?: InsightQueryNode + sourceDay?: string + sourceGroup?: string search?: string properties?: (PersonPropertyFilter | HogQLPropertyFilter)[] + fixedProperties?: (PersonPropertyFilter | HogQLPropertyFilter)[] + limit?: number + offset?: number response?: PersonsQueryResponse } diff --git a/posthog/hogql_queries/lifecycle_query_runner.py b/posthog/hogql_queries/lifecycle_query_runner.py index b55a866b95f3f..1c81bcaea1d46 100644 --- a/posthog/hogql_queries/lifecycle_query_runner.py +++ b/posthog/hogql_queries/lifecycle_query_runner.py @@ -72,7 +72,7 @@ def to_query(self) -> ast.SelectQuery: ) return lifecycle_query - def to_persons_query(self) -> str: + def to_persons_query(self) -> ast.SelectQuery: # TODO: add support for selecting and filtering by breakdowns with self.timings.measure("persons_query"): return parse_select( diff --git a/posthog/hogql_queries/persons_query_runner.py b/posthog/hogql_queries/persons_query_runner.py index 8de0a27bb4e2e..c15ada5ec0571 100644 --- a/posthog/hogql_queries/persons_query_runner.py +++ b/posthog/hogql_queries/persons_query_runner.py @@ -1,7 +1,7 @@ -from typing import Optional, Any, Dict +from typing import Optional, Any, Dict, List from posthog.hogql import ast -from posthog.hogql.parser import parse_select +from posthog.hogql.property import property_to_expr from posthog.hogql.query import execute_hogql_query from posthog.hogql.timings import HogQLTimings from posthog.hogql_queries.query_runner import QueryRunner, get_query_runner @@ -17,7 +17,7 @@ def __init__(self, query: PersonsQuery | Dict[str, Any], team: Team, timings: Op if isinstance(query, PersonsQuery): self.query = query else: - self.query = PersonsQuery.parse_obj(query) + self.query = PersonsQuery.model_validate(query) def run(self) -> PersonsQueryResponse: response = execute_hogql_query( @@ -26,17 +26,54 @@ def run(self) -> PersonsQueryResponse: team=self.team, timings=self.timings, ) - return PersonsQueryResponse(results=response.results, timings=response.timings, hogql=response.hogql) + return PersonsQueryResponse( + results=response.results, + timings=response.timings, + hogql=response.hogql, + columns=[], + types=[], + ) def to_query(self) -> ast.SelectQuery: - source = self.query.source - if isinstance(source, LifecycleQuery): - query = get_query_runner(source, self.team, self.timings).to_persons_query() - return parse_select( - "select * from persons where id in {query}", placeholders={"query": query}, timings=self.timings + where: List[ast.Expr] = [] + + if self.query.properties: + where.append(property_to_expr(self.query.properties, self.team)) + + if self.query.fixedProperties: + where.append(property_to_expr(self.query.fixedProperties, self.team)) + + if self.query.source: + source = self.query.source + if isinstance(source, LifecycleQuery): + source_query = get_query_runner(source, self.team, self.timings).to_persons_query() + where.append( + ast.CompareOperation(op=ast.CompareOperationOp.In, left=ast.Field(chain=["id"]), right=source_query) + ) + else: + raise ValueError(f"Queries of type '{source.kind}' are not supported as a PersonsQuery sources.") + + # adding +1 to the limit to check if there's a "next page" after the requested results + from posthog.hogql.constants import DEFAULT_RETURNED_ROWS, MAX_SELECT_RETURNED_ROWS + + limit = ( + min(MAX_SELECT_RETURNED_ROWS, DEFAULT_RETURNED_ROWS if self.query.limit is None else self.query.limit) + 1 + ) + offset = 0 if self.query.offset is None else self.query.offset + + with self.timings.measure("select"): + stmt = ast.SelectQuery( + select=[], + select_from=ast.JoinExpr(table=ast.Field(chain=["persons"])), + where=ast.And(exprs=where) if len(where) > 0 else None, + # having=having, + # group_by=group_by if has_any_aggregation else None, + # order_by=order_by, + limit=ast.Constant(value=limit), + offset=ast.Constant(value=offset), ) - raise ValueError(f"Can't get a runner for an unknown query kind: {source.kind}") + return stmt def to_persons_query(self) -> ast.SelectQuery: return self.to_query() diff --git a/posthog/hogql_queries/query_runner.py b/posthog/hogql_queries/query_runner.py index 9cfeeb95d962a..a3c32708dcd17 100644 --- a/posthog/hogql_queries/query_runner.py +++ b/posthog/hogql_queries/query_runner.py @@ -44,7 +44,7 @@ def run(self) -> BaseModel: def to_query(self) -> ast.SelectQuery: raise NotImplementedError() - def to_persons_query(self) -> str: + def to_persons_query(self) -> ast.SelectQuery: # TODO: add support for selecting and filtering by breakdowns raise NotImplementedError() diff --git a/posthog/hogql_queries/test/test_lifecycle_hogql_query.py b/posthog/hogql_queries/test/test_lifecycle_query_runner.py similarity index 99% rename from posthog/hogql_queries/test/test_lifecycle_hogql_query.py rename to posthog/hogql_queries/test/test_lifecycle_query_runner.py index d9996640f64c3..e912522ad1d5b 100644 --- a/posthog/hogql_queries/test/test_lifecycle_hogql_query.py +++ b/posthog/hogql_queries/test/test_lifecycle_query_runner.py @@ -9,7 +9,7 @@ from posthog.test.base import APIBaseTest, ClickhouseTestMixin, _create_event, _create_person, flush_persons_and_events -class TestQuery(ClickhouseTestMixin, APIBaseTest): +class TestLifecycleQueryRunner(ClickhouseTestMixin, APIBaseTest): maxDiff = None def _create_random_events(self) -> str: diff --git a/posthog/hogql_queries/test/test_persons_query_runner.py b/posthog/hogql_queries/test/test_persons_query_runner.py new file mode 100644 index 0000000000000..da9da6dbf9a01 --- /dev/null +++ b/posthog/hogql_queries/test/test_persons_query_runner.py @@ -0,0 +1,43 @@ +from posthog.hogql import ast +from posthog.hogql_queries.persons_query_runner import PersonsQueryRunner +from posthog.models.utils import UUIDT +from posthog.schema import PersonsQuery +from posthog.test.base import APIBaseTest, ClickhouseTestMixin, _create_person, flush_persons_and_events + + +class TestPersonsQueryRunner(ClickhouseTestMixin, APIBaseTest): + maxDiff = None + + def _create_random_persons(self) -> str: + random_uuid = str(UUIDT()) + for index in range(10): + _create_person( + properties={"sneaky_mail": f"tim{index}@posthog.com", "random_uuid": random_uuid, "index": index}, + team=self.team, + distinct_ids=[f"id-{index}"], + is_identified=True, + ) + flush_persons_and_events() + return random_uuid + + def _create_runner(self, query: PersonsQuery) -> PersonsQueryRunner: + return PersonsQueryRunner(team=self.team, query=query) + + def test_basic_persons_query(self): + self._create_random_persons() + runner = self._create_runner(PersonsQuery()) + + query = runner.to_query() + self.assertEqual( + query, + ast.SelectQuery( + select=[], + select_from=ast.JoinExpr(table=ast.Field(chain=["persons"])), + where=None, + limit=ast.Constant(value=101), + offset=ast.Constant(value=0), + ), + ) + + response = runner.run() + self.assertEqual(len(response.results), 10) diff --git a/posthog/schema.py b/posthog/schema.py index a09e33564c209..8a58edcedad1f 100644 --- a/posthog/schema.py +++ b/posthog/schema.py @@ -643,11 +643,14 @@ class PersonsQueryResponse(BaseModel): model_config = ConfigDict( extra="forbid", ) + columns: List + hasMore: Optional[bool] = None hogql: str results: List[List] = Field( ..., description="Results in the format: [ ['uuid', breakdown1, breakdown2, ...], ... ]" ) timings: Optional[List[QueryTiming]] = None + types: List[str] class RetentionFilter(BaseModel): @@ -1213,13 +1216,18 @@ class PersonsQuery(BaseModel): model_config = ConfigDict( extra="forbid", ) - day: Optional[str] = None - group: Optional[str] = None + fixedProperties: Optional[List[Union[PersonPropertyFilter, HogQLPropertyFilter]]] = None kind: Literal["PersonsQuery"] = "PersonsQuery" + limit: Optional[float] = None + offset: Optional[float] = None properties: Optional[List[Union[PersonPropertyFilter, HogQLPropertyFilter]]] = None response: Optional[PersonsQueryResponse] = Field(default=None, description="Cached query response") search: Optional[str] = None - source: Union[TrendsQuery, FunnelsQuery, RetentionQuery, PathsQuery, StickinessQuery, LifecycleQuery] + source: Optional[ + Union[TrendsQuery, FunnelsQuery, RetentionQuery, PathsQuery, StickinessQuery, LifecycleQuery] + ] = None + sourceDay: Optional[str] = None + sourceGroup: Optional[str] = None class DataTableNode(BaseModel):