Skip to content

Commit

Permalink
feat(database): faster queries from the persons table (#17811)
Browse files Browse the repository at this point in the history
  • Loading branch information
mariusandra authored and daibhin committed Oct 23, 2023
1 parent a138a8c commit c6173d4
Show file tree
Hide file tree
Showing 25 changed files with 1,001 additions and 413 deletions.
5 changes: 5 additions & 0 deletions frontend/src/queries/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -1235,7 +1235,12 @@
"additionalProperties": false,
"description": "HogQL Query Options are automatically set per team. However, they can be overriden in the query.",
"properties": {
"personsArgMaxVersion": {
"enum": ["v1", "v2"],
"type": "string"
},
"personsOnEventsMode": {
"enum": ["disabled", "v1_enabled", "v2_enabled"],
"type": "string"
}
},
Expand Down
3 changes: 2 additions & 1 deletion frontend/src/queries/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -132,7 +132,8 @@ export interface DataNode extends Node {

/** HogQL Query Options are automatically set per team. However, they can be overriden in the query. */
export interface HogQLQueryModifiers {
personsOnEventsMode?: string
personsOnEventsMode?: 'disabled' | 'v1_enabled' | 'v2_enabled'
personsArgMaxVersion?: 'v1' | 'v2'
}

export interface HogQLQueryResponse {
Expand Down
22 changes: 19 additions & 3 deletions frontend/src/scenes/debug/HogQLDebug.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,9 @@ export function HogQLDebug({ query, setQuery }: HogQLDebugProps): JSX.Element {
<DateRange key="date-range" query={query} setQuery={setQuery} />
<EventPropertyFilters key="event-property" query={query} setQuery={setQuery} />
</div>
<div className="flex">
<div className="flex gap-2">
<LemonLabel>
POE:
POE Version:
<LemonSelect
options={[
{ value: 'disabled', label: 'Disabled' },
Expand All @@ -40,7 +40,23 @@ export function HogQLDebug({ query, setQuery }: HogQLDebugProps): JSX.Element {
modifiers: { ...query.modifiers, personsOnEventsMode: value },
} as HogQLQuery)
}
value={(query.modifiers ?? response?.modifiers)?.personsOnEventsMode}
value={query.modifiers?.personsOnEventsMode ?? response?.modifiers?.personsOnEventsMode}
/>
</LemonLabel>
<LemonLabel>
Persons ArgMax Version
<LemonSelect
options={[
{ value: 'v1', label: 'V1' },
{ value: 'v2', label: 'V2' },
]}
onChange={(value) =>
setQuery({
...query,
modifiers: { ...query.modifiers, personsArgMaxVersion: value },
} as HogQLQuery)
}
value={query.modifiers?.personsArgMaxVersion ?? response?.modifiers?.personsArgMaxVersion}
/>
</LemonLabel>
</div>
Expand Down
7 changes: 4 additions & 3 deletions posthog/hogql/database/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from pydantic import ConfigDict, BaseModel

from posthog.hogql.errors import HogQLException, NotImplementedException
from posthog.schema import HogQLQueryModifiers

if TYPE_CHECKING:
from posthog.hogql.context import HogQLContext
Expand Down Expand Up @@ -100,19 +101,19 @@ def get_asterisk(self):
class LazyJoin(FieldOrTable):
model_config = ConfigDict(extra="forbid")

join_function: Callable[[str, str, Dict[str, Any]], Any]
join_function: Callable[[str, str, Dict[str, Any], HogQLQueryModifiers], Any]
join_table: Table
from_field: str


class LazyTable(Table):
"""
A table that is replaced with a subquery returned from `lazy_select(requested_fields: Dict[name, chain])`
A table that is replaced with a subquery returned from `lazy_select(requested_fields: Dict[name, chain], modifiers: HogQLQueryModifiers)`
"""

model_config = ConfigDict(extra="forbid")

def lazy_select(self, requested_fields: Dict[str, List[str]]) -> Any:
def lazy_select(self, requested_fields: Dict[str, List[str]], modifiers: HogQLQueryModifiers) -> Any:
raise NotImplementedException("LazyTable.lazy_select not overridden")


Expand Down
3 changes: 2 additions & 1 deletion posthog/hogql/database/schema/cohort_people.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
FieldOrTable,
)
from posthog.hogql.database.schema.persons import PersonsTable, join_with_persons_table
from posthog.schema import HogQLQueryModifiers

COHORT_PEOPLE_FIELDS = {
"person_id": StringDatabaseField(name="person_id"),
Expand Down Expand Up @@ -56,7 +57,7 @@ def to_printed_hogql(self):
class CohortPeople(LazyTable):
fields: Dict[str, FieldOrTable] = COHORT_PEOPLE_FIELDS

def lazy_select(self, requested_fields: Dict[str, Any]):
def lazy_select(self, requested_fields: Dict[str, Any], modifiers: HogQLQueryModifiers):
return select_from_cohort_people_table(requested_fields)

def to_printed_clickhouse(self, context):
Expand Down
7 changes: 5 additions & 2 deletions posthog/hogql/database/schema/groups.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
FieldOrTable,
)
from posthog.hogql.errors import HogQLException
from posthog.schema import HogQLQueryModifiers

GROUPS_TABLE_FIELDS = {
"index": IntegerDatabaseField(name="group_type_index"),
Expand All @@ -32,7 +33,9 @@ def select_from_groups_table(requested_fields: Dict[str, List[str]]):


def join_with_group_n_table(group_index: int):
def join_with_group_table(from_table: str, to_table: str, requested_fields: Dict[str, Any]):
def join_with_group_table(
from_table: str, to_table: str, requested_fields: Dict[str, Any], modifiers: HogQLQueryModifiers
):
from posthog.hogql import ast

if not requested_fields:
Expand Down Expand Up @@ -72,7 +75,7 @@ def to_printed_hogql(self):
class GroupsTable(LazyTable):
fields: Dict[str, FieldOrTable] = GROUPS_TABLE_FIELDS

def lazy_select(self, requested_fields: Dict[str, List[str]]):
def lazy_select(self, requested_fields: Dict[str, List[str]], modifiers: HogQLQueryModifiers):
return select_from_groups_table(requested_fields)

def to_printed_clickhouse(self, context):
Expand Down
5 changes: 3 additions & 2 deletions posthog/hogql/database/schema/log_entries.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
LazyTable,
FieldOrTable,
)
from posthog.schema import HogQLQueryModifiers

LOG_ENTRIES_FIELDS: Dict[str, FieldOrTable] = {
"team_id": IntegerDatabaseField(name="team_id"),
Expand All @@ -34,7 +35,7 @@ def to_printed_hogql(self):
class ReplayConsoleLogsLogEntriesTable(LazyTable):
fields: Dict[str, FieldOrTable] = LOG_ENTRIES_FIELDS

def lazy_select(self, requested_fields: Dict[str, List[str]]):
def lazy_select(self, requested_fields: Dict[str, List[str]], modifiers: HogQLQueryModifiers):
fields: List[ast.Expr] = [ast.Field(chain=["log_entries"] + chain) for name, chain in requested_fields.items()]

return ast.SelectQuery(
Expand All @@ -57,7 +58,7 @@ def to_printed_hogql(self):
class BatchExportLogEntriesTable(LazyTable):
fields: Dict[str, FieldOrTable] = LOG_ENTRIES_FIELDS

def lazy_select(self, requested_fields: Dict[str, List[str]]):
def lazy_select(self, requested_fields: Dict[str, List[str]], modifiers: HogQLQueryModifiers):
fields: List[ast.Expr] = [ast.Field(chain=["log_entries"] + chain) for name, chain in requested_fields.items()]

return ast.SelectQuery(
Expand Down
7 changes: 5 additions & 2 deletions posthog/hogql/database/schema/person_distinct_ids.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@
)
from posthog.hogql.database.schema.persons import PersonsTable, join_with_persons_table
from posthog.hogql.errors import HogQLException
from posthog.schema import HogQLQueryModifiers

PERSON_DISTINCT_IDS_FIELDS = {
"team_id": IntegerDatabaseField(name="team_id"),
Expand All @@ -34,7 +35,9 @@ def select_from_person_distinct_ids_table(requested_fields: Dict[str, List[str]]
)


def join_with_person_distinct_ids_table(from_table: str, to_table: str, requested_fields: Dict[str, List[str]]):
def join_with_person_distinct_ids_table(
from_table: str, to_table: str, requested_fields: Dict[str, List[str]], modifiers: HogQLQueryModifiers
):
from posthog.hogql import ast

if not requested_fields:
Expand Down Expand Up @@ -69,7 +72,7 @@ def to_printed_hogql(self):
class PersonDistinctIdsTable(LazyTable):
fields: Dict[str, FieldOrTable] = PERSON_DISTINCT_IDS_FIELDS

def lazy_select(self, requested_fields: Dict[str, List[str]]):
def lazy_select(self, requested_fields: Dict[str, List[str]], modifiers: HogQLQueryModifiers):
return select_from_person_distinct_ids_table(requested_fields)

def to_printed_clickhouse(self, context):
Expand Down
7 changes: 5 additions & 2 deletions posthog/hogql/database/schema/person_overrides.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
)

from posthog.hogql.errors import HogQLException
from posthog.schema import HogQLQueryModifiers

PERSON_OVERRIDES_FIELDS: Dict[str, FieldOrTable] = {
"team_id": IntegerDatabaseField(name="team_id"),
Expand All @@ -30,7 +31,9 @@ def select_from_person_overrides_table(requested_fields: Dict[str, List[str]]):
)


def join_with_person_overrides_table(from_table: str, to_table: str, requested_fields: Dict[str, Any]):
def join_with_person_overrides_table(
from_table: str, to_table: str, requested_fields: Dict[str, Any], modifiers: HogQLQueryModifiers
):
from posthog.hogql import ast

if not requested_fields:
Expand Down Expand Up @@ -65,7 +68,7 @@ def to_printed_hogql(self):
class PersonOverridesTable(Table):
fields: Dict[str, FieldOrTable] = PERSON_OVERRIDES_FIELDS

def lazy_select(self, requested_fields: Dict[str, Any]):
def lazy_select(self, requested_fields: Dict[str, Any], modifiers: HogQLQueryModifiers):
return select_from_person_overrides_table(requested_fields)

def to_printed_clickhouse(self, context):
Expand Down
59 changes: 45 additions & 14 deletions posthog/hogql/database/schema/persons.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
)
from posthog.hogql.errors import HogQLException
from posthog.hogql.database.schema.persons_pdi import PersonsPDITable, persons_pdi_join
from posthog.schema import HogQLQueryModifiers, PersonsArgMaxVersion

PERSONS_FIELDS: Dict[str, FieldOrTable] = {
"id": StringDatabaseField(name="id"),
Expand All @@ -30,24 +31,54 @@
}


def select_from_persons_table(requested_fields: Dict[str, List[str]]):
select = argmax_select(
table_name="raw_persons",
select_fields=requested_fields,
group_fields=["id"],
argmax_field="version",
deleted_field="is_deleted",
)
select.settings = HogQLQuerySettings(optimize_aggregation_in_order=True)
return select
def select_from_persons_table(requested_fields: Dict[str, List[str]], modifiers: HogQLQueryModifiers):
if modifiers.personsArgMaxVersion == PersonsArgMaxVersion.v2:
from posthog.hogql.parser import parse_select
from posthog.hogql import ast

query = parse_select(
"""
SELECT id FROM raw_persons WHERE (id, version) IN (
SELECT id, max(version) as version
FROM raw_persons
GROUP BY id
HAVING ifNull(equals(argMax(raw_persons.is_deleted, raw_persons.version), 0), 0)
)
"""
)
query.settings = HogQLQuerySettings(optimize_aggregation_in_order=True)

for field_name, field_chain in requested_fields.items():
# We need to always select the 'id' field for the join constraint. The field name here is likely to
# be "persons__id" if anything, but just in case, let's avoid duplicates.
if field_name != "id":
query.select.append(
ast.Alias(
alias=field_name,
expr=ast.Field(chain=field_chain),
)
)
return query
else:
select = argmax_select(
table_name="raw_persons",
select_fields=requested_fields,
group_fields=["id"],
argmax_field="version",
deleted_field="is_deleted",
)
select.settings = HogQLQuerySettings(optimize_aggregation_in_order=True)
return select


def join_with_persons_table(from_table: str, to_table: str, requested_fields: Dict[str, List[str]]):
def join_with_persons_table(
from_table: str, to_table: str, requested_fields: Dict[str, List[str]], modifiers: HogQLQueryModifiers
):
from posthog.hogql import ast

if not requested_fields:
raise HogQLException("No fields requested from persons table")
join_expr = ast.JoinExpr(table=select_from_persons_table(requested_fields))
join_expr = ast.JoinExpr(table=select_from_persons_table(requested_fields, modifiers))
join_expr.join_type = "INNER JOIN"
join_expr.alias = to_table
join_expr.constraint = ast.JoinConstraint(
Expand Down Expand Up @@ -77,8 +108,8 @@ def to_printed_hogql(self):
class PersonsTable(LazyTable):
fields: Dict[str, FieldOrTable] = PERSONS_FIELDS

def lazy_select(self, requested_fields: Dict[str, List[str]]):
return select_from_persons_table(requested_fields)
def lazy_select(self, requested_fields: Dict[str, List[str]], modifiers: HogQLQueryModifiers):
return select_from_persons_table(requested_fields, modifiers)

def to_printed_clickhouse(self, context):
return "person"
Expand Down
8 changes: 6 additions & 2 deletions posthog/hogql/database/schema/persons_pdi.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,8 @@
FieldOrTable,
)
from posthog.hogql.errors import HogQLException
from posthog.schema import HogQLQueryModifiers


# :NOTE: We already have person_distinct_ids.py, which most tables link to. This persons_pdi.py is a hack to
# make "select persons.pdi.distinct_id from persons" work while avoiding circular imports. Don't use directly.
Expand All @@ -26,7 +28,9 @@ def persons_pdi_select(requested_fields: Dict[str, List[str]]):

# :NOTE: We already have person_distinct_ids.py, which most tables link to. This persons_pdi.py is a hack to
# make "select persons.pdi.distinct_id from persons" work while avoiding circular imports. Don't use directly.
def persons_pdi_join(from_table: str, to_table: str, requested_fields: Dict[str, List[str]]):
def persons_pdi_join(
from_table: str, to_table: str, requested_fields: Dict[str, List[str]], modifiers: HogQLQueryModifiers
):
from posthog.hogql import ast

if not requested_fields:
Expand All @@ -53,7 +57,7 @@ class PersonsPDITable(LazyTable):
"person_id": StringDatabaseField(name="person_id"),
}

def lazy_select(self, requested_fields: Dict[str, List[str]]):
def lazy_select(self, requested_fields: Dict[str, List[str]], modifiers: HogQLQueryModifiers):
return persons_pdi_select(requested_fields)

def to_printed_clickhouse(self, context):
Expand Down
3 changes: 2 additions & 1 deletion posthog/hogql/database/schema/session_replay_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
PersonDistinctIdsTable,
join_with_person_distinct_ids_table,
)
from posthog.schema import HogQLQueryModifiers

SESSION_REPLAY_EVENTS_COMMON_FIELDS: Dict[str, FieldOrTable] = {
"session_id": StringDatabaseField(name="session_id"),
Expand Down Expand Up @@ -108,7 +109,7 @@ class SessionReplayEventsTable(LazyTable):
"first_url": StringDatabaseField(name="first_url"),
}

def lazy_select(self, requested_fields: Dict[str, List[str]]):
def lazy_select(self, requested_fields: Dict[str, List[str]], modifiers: HogQLQueryModifiers):
return select_from_session_replay_events_table(requested_fields)

def to_printed_clickhouse(self, context):
Expand Down
3 changes: 3 additions & 0 deletions posthog/hogql/modifiers.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,4 +16,7 @@ def create_default_modifiers_for_team(
if modifiers.personsOnEventsMode is None:
modifiers.personsOnEventsMode = team.person_on_events_mode or PersonOnEventsMode.DISABLED

if modifiers.personsArgMaxVersion is None:
modifiers.personsArgMaxVersion = "v1"

return modifiers
Loading

0 comments on commit c6173d4

Please sign in to comment.