Skip to content

Commit

Permalink
feat: Adding session duration lazy join (#18132)
Browse files Browse the repository at this point in the history
* Added session duration as a lazy join

* Moved the session field within events

* Moved session duration logic to the join itself

* More improvements

* Updated the extractor to not use a traverser

* Fixed unused var

* Fixed broken join func and removed unused join func

* Update query snapshots

* Resolve aliased table field properly and clean the field chain

* Update query snapshots

* Update query snapshots

* Update query snapshots

---------

Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
  • Loading branch information
Gilbert09 and github-actions[bot] authored Oct 25, 2023
1 parent 29382cb commit ccdc2ae
Show file tree
Hide file tree
Showing 17 changed files with 501 additions and 82 deletions.
18 changes: 18 additions & 0 deletions posthog/api/test/dashboards/__snapshots__/test_dashboard.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -11232,6 +11232,24 @@
5 /* ... */) /*controller='project_dashboards-list',route='api/projects/%28%3FP%3Cparent_lookup_team_id%3E%5B%5E/.%5D%2B%29/dashboards/%3F%24'*/
'
---
# name: TestDashboard.test_retrieve_dashboard_list.33
'
SELECT "posthog_sharingconfiguration"."id",
"posthog_sharingconfiguration"."team_id",
"posthog_sharingconfiguration"."dashboard_id",
"posthog_sharingconfiguration"."insight_id",
"posthog_sharingconfiguration"."recording_id",
"posthog_sharingconfiguration"."created_at",
"posthog_sharingconfiguration"."enabled",
"posthog_sharingconfiguration"."access_token"
FROM "posthog_sharingconfiguration"
WHERE "posthog_sharingconfiguration"."dashboard_id" IN (1,
2,
3,
4,
5 /* ... */) /*controller='project_dashboards-list',route='api/projects/%28%3FP%3Cparent_lookup_team_id%3E%5B%5E/.%5D%2B%29/dashboards/%3F%24'*/
'
---
# name: TestDashboard.test_retrieve_dashboard_list.4
'
SELECT "posthog_dashboardtile"."id"
Expand Down
23 changes: 0 additions & 23 deletions posthog/hogql/database/database.py
Original file line number Diff line number Diff line change
Expand Up @@ -164,29 +164,6 @@ def create_hogql_database(team_id: int, modifiers: Optional[HogQLQueryModifiers]
return database


def determine_join_function(view):
def join_function(from_table: str, to_table: str, requested_fields: Dict[str, Any]):
from posthog.hogql import ast
from posthog.hogql.parser import parse_select

if not requested_fields:
raise HogQLException(f"No fields requested from {to_table}")

join_expr = ast.JoinExpr(table=parse_select(view.saved_query.query["query"]))
join_expr.join_type = "INNER JOIN"
join_expr.alias = to_table
join_expr.constraint = ast.JoinConstraint(
expr=ast.CompareOperation(
op=ast.CompareOperationOp.Eq,
left=ast.Field(chain=[from_table, view.from_join_key]),
right=ast.Field(chain=[to_table, view.to_join_key]),
)
)
return join_expr

return join_function


class _SerializedFieldBase(TypedDict):
key: str
type: Literal[
Expand Down
3 changes: 2 additions & 1 deletion posthog/hogql/database/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@

if TYPE_CHECKING:
from posthog.hogql.context import HogQLContext
from posthog.hogql.ast import SelectQuery


class FieldOrTable(BaseModel):
Expand Down Expand Up @@ -101,7 +102,7 @@ def get_asterisk(self):
class LazyJoin(FieldOrTable):
model_config = ConfigDict(extra="forbid")

join_function: Callable[[str, str, Dict[str, Any], HogQLQueryModifiers], Any]
join_function: Callable[[str, str, Dict[str, Any], "HogQLContext", "SelectQuery"], Any]
join_table: Table
from_field: str

Expand Down
166 changes: 166 additions & 0 deletions posthog/hogql/database/schema/event_sessions.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,166 @@
from copy import deepcopy
from typing import Any, Dict, List, Optional
from posthog.hogql import ast
from posthog.hogql.context import HogQLContext
from posthog.hogql.database.models import FieldOrTable, IntegerDatabaseField, StringDatabaseField, VirtualTable
from posthog.hogql.parser import parse_select
from posthog.hogql.resolver_utils import get_long_table_name, lookup_field_by_name
from posthog.hogql.visitor import CloningVisitor, TraversingVisitor


class EventsSessionSubTable(VirtualTable):
fields: Dict[str, FieldOrTable] = {
"$session_id": StringDatabaseField(name="$session_id"),
"session_duration": IntegerDatabaseField(name="session_duration"),
}

def to_printed_clickhouse(self, context):
return "events"

def to_printed_hogql(self):
return "events"


class GetFieldsTraverser(TraversingVisitor):
fields: List[ast.Field]

def __init__(self, expr: ast.Expr):
super().__init__()
self.fields = []
super().visit(expr)

def visit_field(self, node: ast.Field):
self.fields.append(node)


class CleanTableNameFromChain(CloningVisitor):
def __init__(self, table_name: str, select_query_type: ast.SelectQueryType):
super().__init__()
self.table_name = table_name
self.select_query_type = select_query_type

def visit_field(self, node: ast.Field):
if len(node.chain) > 1 and str(node.chain[0]) in self.select_query_type.tables:
type = self.select_query_type.tables[str(node.chain[0])]

name = get_long_table_name(self.select_query_type, type)
if name == self.table_name:
node.chain.pop(0)

return super().visit_field(node)


class WhereClauseExtractor:
compare_operators: List[ast.Expr]

def __init__(self, where_expression: ast.Expr, from_table_name: str, select_query_type: ast.SelectQueryType):
self.table_name = from_table_name
self.select_query_type = select_query_type
self.compare_operators = self.run(deepcopy(where_expression))

def _is_field_on_table(self, field: ast.Field) -> bool:
if len(field.chain) == 0:
return False

type: Optional[ast.Type] = None

# If the field contains at least two parts, the first might be a table.
if len(field.chain) > 1 and str(field.chain[0]) in self.select_query_type.tables:
type = self.select_query_type.tables[str(field.chain[0])]

name = get_long_table_name(self.select_query_type, type)
if name != self.table_name:
return False

# Field in scope
if not type:
type = lookup_field_by_name(self.select_query_type, str(field.chain[0]))

if not type:
return False

# Recursively resolve the rest of the chain until we can point to the deepest node.
loop_type = type
chain_to_parse = field.chain[1:]
while True:
if isinstance(loop_type, ast.FieldTraverserType):
chain_to_parse = loop_type.chain + chain_to_parse
loop_type = loop_type.table_type
continue
if len(chain_to_parse) == 0:
break
next_chain = chain_to_parse.pop(0)
loop_type = loop_type.get_child(str(next_chain))
if loop_type is None:
return False

return True

def run(self, expr: ast.Expr) -> List[ast.Expr]:
exprs_to_apply: List[ast.Expr] = []

if isinstance(expr, ast.And):
for expression in expr.exprs:
if not isinstance(expression, ast.CompareOperation):
continue

fields = GetFieldsTraverser(expression).fields
res = [self._is_field_on_table(field) for field in fields]
if all(res):
exprs_to_apply.append(expression)
elif isinstance(expr, ast.CompareOperation):
exprs_to_apply.extend(self.run(ast.And(exprs=[expr])))
elif isinstance(expr, ast.Or):
pass # Ignore for now

# Clone field nodes and remove table name from field chains
return [
CleanTableNameFromChain(self.table_name, self.select_query_type).visit(
CloningVisitor(clear_types=True, clear_locations=True).visit(e)
)
for e in exprs_to_apply
]


def join_with_events_table_session_duration(
from_table: str,
to_table: str,
requested_fields: Dict[str, Any],
context: HogQLContext,
node: ast.SelectQuery,
):
select_query = parse_select(
"""
select "$session_id", dateDiff('second', min(timestamp), max(timestamp)) as session_duration
from events
group by "$session_id"
"""
)

if isinstance(select_query, ast.SelectQuery):
compare_operators = (
WhereClauseExtractor(node.where, from_table, node.type).compare_operators
if node.where and node.type
else []
)
select_query.where = ast.And(
exprs=[
*compare_operators,
ast.CompareOperation(
left=ast.Field(chain=["$session_id"]), op=ast.CompareOperationOp.NotEq, right=ast.Constant(value="")
),
]
)

join_expr = ast.JoinExpr(table=select_query)
join_expr.join_type = "INNER JOIN"
join_expr.alias = to_table
join_expr.constraint = ast.JoinConstraint(
expr=ast.CompareOperation(
op=ast.CompareOperationOp.Eq,
left=ast.Field(chain=[from_table, "$session_id"]),
right=ast.Field(chain=[to_table, "$session_id"]),
)
)

return join_expr
7 changes: 7 additions & 0 deletions posthog/hogql/database/schema/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
FieldTraverser,
FieldOrTable,
)
from posthog.hogql.database.schema.event_sessions import EventsSessionSubTable, join_with_events_table_session_duration
from posthog.hogql.database.schema.groups import GroupsTable, join_with_group_n_table
from posthog.hogql.database.schema.person_distinct_ids import (
PersonDistinctIdsTable,
Expand Down Expand Up @@ -54,6 +55,7 @@ def to_printed_hogql(self):


class EventsTable(Table):

fields: Dict[str, FieldOrTable] = {
"uuid": StringDatabaseField(name="uuid"),
"event": StringDatabaseField(name="event"),
Expand Down Expand Up @@ -97,6 +99,11 @@ class EventsTable(Table):
"group_3": LazyJoin(from_field="$group_3", join_table=GroupsTable(), join_function=join_with_group_n_table(3)),
"$group_4": StringDatabaseField(name="$group_4"),
"group_4": LazyJoin(from_field="$group_4", join_table=GroupsTable(), join_function=join_with_group_n_table(4)),
"session": LazyJoin(
from_field="$session_id",
join_table=EventsSessionSubTable(),
join_function=join_with_events_table_session_duration,
),
}

def to_printed_clickhouse(self, context):
Expand Down
8 changes: 7 additions & 1 deletion posthog/hogql/database/schema/groups.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from typing import Any, Dict, List
from posthog.hogql.ast import SelectQuery
from posthog.hogql.context import HogQLContext

from posthog.hogql.database.argmax import argmax_select
from posthog.hogql.database.models import (
Expand Down Expand Up @@ -34,7 +36,11 @@ def select_from_groups_table(requested_fields: Dict[str, List[str]]):

def join_with_group_n_table(group_index: int):
def join_with_group_table(
from_table: str, to_table: str, requested_fields: Dict[str, Any], modifiers: HogQLQueryModifiers
from_table: str,
to_table: str,
requested_fields: Dict[str, Any],
context: HogQLContext,
node: SelectQuery,
):
from posthog.hogql import ast

Expand Down
8 changes: 7 additions & 1 deletion posthog/hogql/database/schema/person_distinct_ids.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from typing import Dict, List
from posthog.hogql.ast import SelectQuery
from posthog.hogql.context import HogQLContext

from posthog.hogql.database.argmax import argmax_select
from posthog.hogql.database.models import (
Expand Down Expand Up @@ -36,7 +38,11 @@ def select_from_person_distinct_ids_table(requested_fields: Dict[str, List[str]]


def join_with_person_distinct_ids_table(
from_table: str, to_table: str, requested_fields: Dict[str, List[str]], modifiers: HogQLQueryModifiers
from_table: str,
to_table: str,
requested_fields: Dict[str, List[str]],
context: HogQLContext,
node: SelectQuery,
):
from posthog.hogql import ast

Expand Down
4 changes: 3 additions & 1 deletion posthog/hogql/database/schema/person_overrides.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from typing import Any, Dict, List
from posthog.hogql.ast import SelectQuery
from posthog.hogql.context import HogQLContext

from posthog.hogql.database.argmax import argmax_select
from posthog.hogql.database.models import (
Expand Down Expand Up @@ -32,7 +34,7 @@ def select_from_person_overrides_table(requested_fields: Dict[str, List[str]]):


def join_with_person_overrides_table(
from_table: str, to_table: str, requested_fields: Dict[str, Any], modifiers: HogQLQueryModifiers
from_table: str, to_table: str, requested_fields: Dict[str, Any], context: HogQLContext, node: SelectQuery
):
from posthog.hogql import ast

Expand Down
6 changes: 4 additions & 2 deletions posthog/hogql/database/schema/persons.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,8 @@
from typing import Dict, List
from posthog.hogql.ast import SelectQuery

from posthog.hogql.constants import HogQLQuerySettings
from posthog.hogql.context import HogQLContext
from posthog.hogql.database.argmax import argmax_select
from posthog.hogql.database.models import (
Table,
Expand Down Expand Up @@ -81,13 +83,13 @@ def select_from_persons_table(requested_fields: Dict[str, List[str]], modifiers:


def join_with_persons_table(
from_table: str, to_table: str, requested_fields: Dict[str, List[str]], modifiers: HogQLQueryModifiers
from_table: str, to_table: str, requested_fields: Dict[str, List[str]], context: HogQLContext, node: SelectQuery
):
from posthog.hogql import ast

if not requested_fields:
raise HogQLException("No fields requested from persons table")
join_expr = ast.JoinExpr(table=select_from_persons_table(requested_fields, modifiers))
join_expr = ast.JoinExpr(table=select_from_persons_table(requested_fields, context.modifiers))
join_expr.join_type = "INNER JOIN"
join_expr.alias = to_table
join_expr.constraint = ast.JoinConstraint(
Expand Down
4 changes: 3 additions & 1 deletion posthog/hogql/database/schema/persons_pdi.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,6 @@
from typing import Dict, List
from posthog.hogql.ast import SelectQuery
from posthog.hogql.context import HogQLContext

from posthog.hogql.database.argmax import argmax_select
from posthog.hogql.database.models import (
Expand Down Expand Up @@ -29,7 +31,7 @@ def persons_pdi_select(requested_fields: Dict[str, List[str]]):
# :NOTE: We already have person_distinct_ids.py, which most tables link to. This persons_pdi.py is a hack to
# make "select persons.pdi.distinct_id from persons" work while avoiding circular imports. Don't use directly.
def persons_pdi_join(
from_table: str, to_table: str, requested_fields: Dict[str, List[str]], modifiers: HogQLQueryModifiers
from_table: str, to_table: str, requested_fields: Dict[str, List[str]], context: HogQLContext, node: SelectQuery
):
from posthog.hogql import ast

Expand Down
Loading

0 comments on commit ccdc2ae

Please sign in to comment.