From 61948ce4cb8d125be04b06ad6ca5321580ad738e Mon Sep 17 00:00:00 2001 From: Robbie Date: Tue, 16 Apr 2024 11:15:12 +0100 Subject: [PATCH] Add session values --- posthog/api/session.py | 5 +- posthog/hogql/database/schema/sessions.py | 70 ++++++++++++++++++++++- posthog/models/sessions/sql.py | 37 +++--------- posthog/queries/property_values.py | 32 ----------- 4 files changed, 77 insertions(+), 67 deletions(-) diff --git a/posthog/api/session.py b/posthog/api/session.py index aaf34399d1e63..7683187f74d2a 100644 --- a/posthog/api/session.py +++ b/posthog/api/session.py @@ -7,8 +7,7 @@ from rest_framework_csv import renderers as csvrenderers from posthog.api.routing import TeamAndOrgViewSetMixin -from posthog.hogql.database.schema.sessions import get_lazy_session_table_properties -from posthog.queries.property_values import get_session_column_values_for_key +from posthog.hogql.database.schema.sessions import get_lazy_session_table_properties, get_lazy_session_table_values from posthog.rate_limit import ( ClickHouseBurstRateThrottle, ClickHouseSustainedRateThrottle, @@ -34,7 +33,7 @@ def values(self, request: request.Request, **kwargs) -> response.Response: if not key: raise ValidationError(detail=f"Key not provided") - result = get_session_column_values_for_key(key, team, search_term=search_term) + result = get_lazy_session_table_values(key, search_term=search_term, team=team) flattened = [] for value in result: diff --git a/posthog/hogql/database/schema/sessions.py b/posthog/hogql/database/schema/sessions.py index 115562c7a7a58..f7ecc102cb96c 100644 --- a/posthog/hogql/database/schema/sessions.py +++ b/posthog/hogql/database/schema/sessions.py @@ -1,4 +1,4 @@ -from typing import Dict, List, cast, Any, Optional +from typing import Dict, List, cast, Any, Optional, TYPE_CHECKING from posthog.hogql import ast from posthog.hogql.context import HogQLContext @@ -13,10 +13,18 @@ LazyTable, FloatDatabaseField, ) -from posthog.hogql.database.schema.channel_type import create_channel_type_expr +from posthog.hogql.database.schema.channel_type import create_channel_type_expr, POSSIBLE_CHANNEL_TYPES from posthog.hogql.database.schema.util.session_where_clause_extractor import SessionMinTimestampWhereClauseExtractor from posthog.hogql.errors import ResolutionError from posthog.models.property_definition import PropertyType +from posthog.models.sessions.sql import ( + SELECT_SESSION_PROP_STRING_VALUES_SQL_WITH_FILTER, + SELECT_SESSION_PROP_STRING_VALUES_SQL, +) +from posthog.queries.insight import insight_sync_execute + +if TYPE_CHECKING: + from posthog.models.team import Team RAW_SESSIONS_FIELDS: Dict[str, FieldOrTable] = { "id": StringDatabaseField(name="session_id"), @@ -242,3 +250,61 @@ def get_lazy_session_table_properties(search: Optional[str]): if (not search or search.lower() in field_name.lower()) and field_name not in hidden_fields ] return results + + +SESSION_PROPERTY_TO_RAW_SESSIONS_EXPR_MAP = { + "$initial_referring_domain": "finalizeAggregation(initial_referring_domain)", + "$initial_utm_source": "finalizeAggregation(initial_utm_source)", + "$initial_utm_campaign": "finalizeAggregation(initial_utm_campaign)", + "$initial_utm_medium": "finalizeAggregation(initial_utm_medium)", + "$initial_utm_term": "finalizeAggregation(initial_utm_term)", + "$initial_utm_content": "finalizeAggregation(initial_utm_content)", + "$initial_gclid": "finalizeAggregation(initial_gclid)", + "$initial_gad_source": "finalizeAggregation(initial_gad_source)", + "$initial_gclsrc": "finalizeAggregation(initial_gclsrc)", + "$initial_dclid": "finalizeAggregation(initial_dclid)", + "$initial_gbraid": "finalizeAggregation(initial_gbraid)", + "$initial_wbraid": "finalizeAggregation(initial_wbraid)", + "$initial_fbclid": "finalizeAggregation(initial_fbclid)", + "$initial_msclkid": "finalizeAggregation(initial_msclkid)", + "$initial_twclid": "finalizeAggregation(initial_twclid)", + "$initial_li_fat_id": "finalizeAggregation(initial_li_fat_id)", + "$initial_mc_cid": "finalizeAggregation(initial_mc_cid)", + "$initial_igshid": "finalizeAggregation(initial_igshid)", + "$initial_ttclid": "finalizeAggregation(initial_ttclid)", + "$entry_url": "finalizeAggregation(entry_url)", + "$exit_url": "finalizeAggregation(exit_url)", +} + + +def get_lazy_session_table_values(key: str, search_term: Optional[str], team: "Team"): + # the sessions table does not have a properties json object like the events and person tables + + if key == "$channel_type": + return [[name] for name in POSSIBLE_CHANNEL_TYPES if not search_term or search_term.lower() in name.lower()] + + expr = SESSION_PROPERTY_TO_RAW_SESSIONS_EXPR_MAP.get(key) + + if not expr: + return [] + + field_definition = LAZY_SESSIONS_FIELDS.get(key) + if not field_definition: + return [] + + if isinstance(field_definition, StringDatabaseField): + if search_term: + return insight_sync_execute( + SELECT_SESSION_PROP_STRING_VALUES_SQL_WITH_FILTER.format(property_expr=expr), + {"team_id": team.pk, "key": key, "value": "%{}%".format(search_term)}, + query_type="get_session_property_values_with_value", + team_id=team.pk, + ) + return insight_sync_execute( + SELECT_SESSION_PROP_STRING_VALUES_SQL.format(property_expr=expr), + {"team_id": team.pk, "key": key}, + query_type="get_session_property_values", + team_id=team.pk, + ) + + return [] diff --git a/posthog/models/sessions/sql.py b/posthog/models/sessions/sql.py index a68ec2ff97e1e..22d3431099f94 100644 --- a/posthog/models/sessions/sql.py +++ b/posthog/models/sessions/sql.py @@ -261,19 +261,19 @@ def source_column(column_name: str) -> str: """ ) -SELECT_SESSION_PROP_VALUES_SQL = """ +SELECT_SESSION_PROP_STRING_VALUES_SQL = """ SELECT value, count(value) FROM ( SELECT - {property_field} as value + {property_expr} as value FROM sessions WHERE team_id = %(team_id)s AND - {property_field} IS NOT NULL AND - {property_field} != '' + {property_expr} IS NOT NULL AND + {property_expr} != '' ORDER BY session_id DESC LIMIT 100000 ) @@ -282,18 +282,18 @@ def source_column(column_name: str) -> str: LIMIT 20 """ -SELECT_SESSION_PROP_VALUES_SQL_WITH_FILTER = """ +SELECT_SESSION_PROP_STRING_VALUES_SQL_WITH_FILTER = """ SELECT value, count(value) FROM ( SELECT - {property_field} as value + {property_expr} as value FROM sessions WHERE team_id = %(team_id)s AND - {property_field} ILIKE %(value)s + {property_expr} ILIKE %(value)s ORDER BY session_id DESC LIMIT 100000 ) @@ -301,26 +301,3 @@ def source_column(column_name: str) -> str: ORDER BY count(value) DESC LIMIT 20 """ - - -SESSION_PROPERTY_TO_COLUMN_MAP = { - "$initial_referring_domain": "initial_referring_domain", - "$initial_utm_source": "initial_utm_source", - "$initial_utm_campaign": "initial_utm_campaign", - "$initial_utm_medium": "initial_utm_medium", - "$initial_utm_term": "initial_utm_term", - "$initial_utm_content": "initial_utm_content", - "$initial_gclid": "initial_gclid", - "$initial_gad_source": "initial_gad_source", - "$initial_gclsrc": "initial_gclsrc", - "$initial_dclid": "initial_dclid", - "$initial_gbraid": "initial_gbraid", - "$initial_wbraid": "initial_wbraid", - "$initial_fbclid": "initial_fbclid", - "$initial_msclkid": "initial_msclkid", - "$initial_twclid": "initial_twclid", - "$initial_li_fat_id": "initial_li_fat_id", - "$initial_mc_cid": "initial_mc_cid", - "$initial_igshid": "initial_igshid", - "$initial_ttclid": "initial_ttclid", -} diff --git a/posthog/queries/property_values.py b/posthog/queries/property_values.py index 7f7655bbd379c..a8b943f25d1d2 100644 --- a/posthog/queries/property_values.py +++ b/posthog/queries/property_values.py @@ -2,18 +2,12 @@ from django.utils import timezone -from posthog.hogql.database.schema.channel_type import POSSIBLE_CHANNEL_TYPES from posthog.models.event.sql import SELECT_PROP_VALUES_SQL_WITH_FILTER from posthog.models.person.sql import ( SELECT_PERSON_PROP_VALUES_SQL, SELECT_PERSON_PROP_VALUES_SQL_WITH_FILTER, ) from posthog.models.property.util import get_property_string_expr -from posthog.models.sessions.sql import ( - SESSION_PROPERTY_TO_COLUMN_MAP, - SELECT_SESSION_PROP_VALUES_SQL, - SELECT_SESSION_PROP_VALUES_SQL_WITH_FILTER, -) from posthog.models.team import Team from posthog.queries.insight import insight_sync_execute from posthog.utils import relative_date_parse @@ -85,29 +79,3 @@ def get_person_property_values_for_key(key: str, team: Team, value: Optional[str query_type="get_person_property_values", team_id=team.pk, ) - - -def get_session_column_values_for_key(key: str, team: Team, search_term: Optional[str] = None): - # the sessions table does not have a properties json object like the events and person tables - - if key == "$channel_type": - return [[name] for name in POSSIBLE_CHANNEL_TYPES if not search_term or search_term.lower() in name.lower()] - - column = SESSION_PROPERTY_TO_COLUMN_MAP.get(key) - - if not column: - return [] - - if search_term: - return insight_sync_execute( - SELECT_SESSION_PROP_VALUES_SQL_WITH_FILTER.format(property_field=column), - {"team_id": team.pk, "key": key, "value": "%{}%".format(search_term)}, - query_type="get_session_property_values_with_value", - team_id=team.pk, - ) - return insight_sync_execute( - SELECT_SESSION_PROP_VALUES_SQL.format(property_field=column), - {"team_id": team.pk, "key": key}, - query_type="get_session_property_values", - team_id=team.pk, - )