Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(product-analytics): Use sessions table for session duration #21208

Merged
merged 9 commits into from
Mar 28, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
9 changes: 3 additions & 6 deletions posthog/hogql/database/schema/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,15 +11,12 @@
FieldTraverser,
FieldOrTable,
)
from posthog.hogql.database.schema.event_sessions import (
EventsSessionSubTable,
join_with_events_table_session_duration,
)
from posthog.hogql.database.schema.groups import GroupsTable, join_with_group_n_table
from posthog.hogql.database.schema.person_distinct_ids import (
PersonDistinctIdsTable,
join_with_person_distinct_ids_table,
)
from posthog.hogql.database.schema.sessions import join_events_table_to_sessions_table, SessionsTable


class EventsPersonSubTable(VirtualTable):
Expand Down Expand Up @@ -116,8 +113,8 @@ class EventsTable(Table):
),
"session": LazyJoin(
from_field=["$session_id"],
join_table=EventsSessionSubTable(),
join_function=join_with_events_table_session_duration,
join_table=SessionsTable(),
join_function=join_events_table_to_sessions_table,
),
}

Expand Down
32 changes: 30 additions & 2 deletions posthog/hogql/database/schema/sessions.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import Dict, List, cast
from typing import Dict, List, cast, Any

from posthog.hogql import ast
from posthog.hogql.context import HogQLContext
Expand All @@ -14,9 +14,12 @@
)
from posthog.hogql.database.schema.channel_type import create_channel_type_expr
from posthog.hogql.database.schema.util.session_where_clause_extractor import SessionMinTimestampWhereClauseExtractor

from posthog.hogql.errors import HogQLException

SESSIONS_COMMON_FIELDS: Dict[str, FieldOrTable] = {
"id": StringDatabaseField(
name="session_id"
), # TODO remove this, it's a duplicate of the correct session_id field below to get some trends working on a deadline
"session_id": StringDatabaseField(name="session_id"),
"team_id": IntegerDatabaseField(name="team_id"),
"distinct_id": StringDatabaseField(name="distinct_id"),
Expand Down Expand Up @@ -71,6 +74,10 @@ def select_from_sessions_table(

table_name = "raw_sessions"

# Always include "session_id", as it's the key we use to make further joins, and it'd be great if it's available
if "session_id" not in requested_fields:
requested_fields = {**requested_fields, "session_id": ["session_id"]}

aggregate_fields = {
"distinct_id": ast.Call(name="any", args=[ast.Field(chain=[table_name, "distinct_id"])]),
"min_timestamp": ast.Call(name="min", args=[ast.Field(chain=[table_name, "min_timestamp"])]),
Expand Down Expand Up @@ -163,3 +170,24 @@ def to_printed_clickhouse(self, context):

def to_printed_hogql(self):
return "sessions"


def join_events_table_to_sessions_table(
from_table: str, to_table: str, requested_fields: Dict[str, Any], context: HogQLContext, node: ast.SelectQuery
) -> ast.JoinExpr:
from posthog.hogql import ast

if not requested_fields:
raise HogQLException("No fields requested from events")

join_expr = ast.JoinExpr(table=select_from_sessions_table(requested_fields, node, context))
join_expr.join_type = "LEFT JOIN"
join_expr.alias = to_table
join_expr.constraint = ast.JoinConstraint(
expr=ast.CompareOperation(
op=ast.CompareOperationOp.Eq,
left=ast.Field(chain=[from_table, "$session_id"]),
right=ast.Field(chain=[to_table, "session_id"]),
)
)
return join_expr
48 changes: 48 additions & 0 deletions posthog/hogql/database/schema/test/test_sessions.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,3 +55,51 @@ def test_channel_type(self):
result[0],
"Paid Search",
)

def test_event_dot_session_dot_channel_type(self):
session_id = "event_dot_session_dot_channel_type"

_create_event(
event="$pageview",
team=self.team,
distinct_id="d1",
properties={"gad_source": "1", "$session_id": session_id},
)

response = execute_hogql_query(
parse_select(
"select events.session.channel_type from events where $session_id = {session_id}",
placeholders={"session_id": ast.Constant(value=session_id)},
),
self.team,
)

result = (response.results or [])[0]
self.assertEqual(
result[0],
"Paid Search",
)

def test_events_session_dot_channel_type(self):
session_id = "event_dot_session_dot_channel_type"

_create_event(
event="$pageview",
team=self.team,
distinct_id="d1",
properties={"gad_source": "1", "$session_id": session_id},
)

response = execute_hogql_query(
parse_select(
"select session.channel_type from events where $session_id = {session_id}",
placeholders={"session_id": ast.Constant(value=session_id)},
),
self.team,
)

result = (response.results or [])[0]
self.assertEqual(
result[0],
"Paid Search",
)
64 changes: 60 additions & 4 deletions posthog/hogql/database/test/__snapshots__/test_database.ambr
Original file line number Diff line number Diff line change
Expand Up @@ -214,10 +214,30 @@
{
"key": "session",
"type": "lazy_table",
"table": "events",
"table": "sessions",
"fields": [
"id",
"duration"
"session_id",
"team_id",
"distinct_id",
"min_timestamp",
"max_timestamp",
"urls",
"entry_url",
"exit_url",
"initial_utm_source",
"initial_utm_campaign",
"initial_utm_medium",
"initial_utm_term",
"initial_utm_content",
"initial_referring_domain",
"initial_gclid",
"initial_gad_source",
"event_count_map",
"pageview_count",
"autocapture_count",
"duration",
"channel_type"
]
}
],
Expand Down Expand Up @@ -569,6 +589,10 @@
}
],
"sessions": [
{
"key": "id",
"type": "string"
},
{
"key": "session_id",
"type": "string"
Expand Down Expand Up @@ -875,6 +899,10 @@
}
],
"raw_sessions": [
{
"key": "id",
"type": "string"
},
{
"key": "session_id",
"type": "string"
Expand Down Expand Up @@ -1123,10 +1151,30 @@
{
"key": "session",
"type": "lazy_table",
"table": "events",
"table": "sessions",
"fields": [
"id",
"duration"
"session_id",
"team_id",
"distinct_id",
"min_timestamp",
"max_timestamp",
"urls",
"entry_url",
"exit_url",
"initial_utm_source",
"initial_utm_campaign",
"initial_utm_medium",
"initial_utm_term",
"initial_utm_content",
"initial_referring_domain",
"initial_gclid",
"initial_gad_source",
"event_count_map",
"pageview_count",
"autocapture_count",
"duration",
"channel_type"
]
}
],
Expand Down Expand Up @@ -1478,6 +1526,10 @@
}
],
"sessions": [
{
"key": "id",
"type": "string"
},
{
"key": "session_id",
"type": "string"
Expand Down Expand Up @@ -1784,6 +1836,10 @@
}
],
"raw_sessions": [
{
"key": "id",
"type": "string"
},
{
"key": "session_id",
"type": "string"
Expand Down
4 changes: 3 additions & 1 deletion posthog/hogql/test/test_query.py
Original file line number Diff line number Diff line change
Expand Up @@ -1452,7 +1452,9 @@ def test_events_sessions_table(self):
properties={"$session_id": random_uuid},
)

query = "SELECT session.id, session.duration from events WHERE distinct_id={distinct_id} order by timestamp"
query = (
"SELECT session.session_id, session.duration from events WHERE distinct_id={distinct_id} order by timestamp"
)
response = execute_hogql_query(
query, team=self.team, placeholders={"distinct_id": ast.Constant(value=random_uuid)}
)
Expand Down
Loading
Loading