diff --git a/ee/clickhouse/queries/test/__snapshots__/test_lifecycle.ambr b/ee/clickhouse/queries/test/__snapshots__/test_lifecycle.ambr index 249b940160578..29eb93b4ae929 100644 --- a/ee/clickhouse/queries/test/__snapshots__/test_lifecycle.ambr +++ b/ee/clickhouse/queries/test/__snapshots__/test_lifecycle.ambr @@ -501,7 +501,7 @@ AND event = '$pageview' AND timestamp >= toDateTime(dateTrunc('day', toDateTime('2021-04-28 00:00:00', 'UTC'))) - INTERVAL 1 day AND timestamp < toDateTime(dateTrunc('day', toDateTime('2021-05-05 23:59:59', 'UTC'))) + INTERVAL 1 day - AND (like(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_props, 'email'), ''), 'null'), '^"|"$', ''), '%test.com')) + AND (like(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_properties, 'email'), ''), 'null'), '^"|"$', ''), '%test.com')) GROUP BY pdi.person_id) GROUP BY start_of_period, status) @@ -576,7 +576,7 @@ AND event = '$pageview' AND timestamp >= toDateTime(dateTrunc('day', toDateTime('2021-04-28 00:00:00', 'UTC'))) - INTERVAL 1 day AND timestamp < toDateTime(dateTrunc('day', toDateTime('2021-05-05 23:59:59', 'UTC'))) + INTERVAL 1 day - AND (like(nullIf(nullIf(pmat_email, ''), 'null'), '%test.com')) + AND (like(nullIf(nullIf(mat_pp_email, ''), 'null'), '%test.com')) GROUP BY pdi.person_id) GROUP BY start_of_period, status) diff --git a/frontend/src/queries/schema.json b/frontend/src/queries/schema.json index 4fdcee6671535..2bad6b4aecc86 100644 --- a/frontend/src/queries/schema.json +++ b/frontend/src/queries/schema.json @@ -1213,6 +1213,9 @@ "const": "HogQLQuery", "type": "string" }, + "modifiers": { + "$ref": "#/definitions/HogQLQueryModifiers" + }, "query": { "type": "string" }, @@ -1228,6 +1231,16 @@ "required": ["kind", "query"], "type": "object" }, + "HogQLQueryModifiers": { + "additionalProperties": false, + "description": "HogQL Query Options are automatically set per team. However, they can be overriden in the query.", + "properties": { + "personsOnEventsMode": { + "type": "string" + } + }, + "type": "object" + }, "HogQLQueryResponse": { "additionalProperties": false, "properties": { @@ -1241,6 +1254,9 @@ "hogql": { "type": "string" }, + "modifiers": { + "$ref": "#/definitions/HogQLQueryModifiers" + }, "query": { "type": "string" }, diff --git a/frontend/src/queries/schema.ts b/frontend/src/queries/schema.ts index 87d5ca1075c6c..c312e54709501 100644 --- a/frontend/src/queries/schema.ts +++ b/frontend/src/queries/schema.ts @@ -130,6 +130,11 @@ export interface DataNode extends Node { response?: Record } +/** HogQL Query Options are automatically set per team. However, they can be overriden in the query. */ +export interface HogQLQueryModifiers { + personsOnEventsMode?: string +} + export interface HogQLQueryResponse { query?: string hogql?: string @@ -138,6 +143,7 @@ export interface HogQLQueryResponse { types?: any[] columns?: any[] timings?: QueryTiming[] + modifiers?: HogQLQueryModifiers } /** Filters object that will be converted to a HogQL {filters} placeholder */ @@ -152,6 +158,7 @@ export interface HogQLQuery extends DataNode { filters?: HogQLFilters /** Constant values that can be referenced with the {placeholder} syntax in the query */ values?: Record + modifiers?: HogQLQueryModifiers response?: HogQLQueryResponse } diff --git a/frontend/src/scenes/debug/HogQLDebug.tsx b/frontend/src/scenes/debug/HogQLDebug.tsx index 62b495947c04f..24861251e371a 100644 --- a/frontend/src/scenes/debug/HogQLDebug.tsx +++ b/frontend/src/scenes/debug/HogQLDebug.tsx @@ -7,6 +7,8 @@ import { dataNodeLogic, DataNodeLogicProps } from '~/queries/nodes/DataNode/data import { ElapsedTime, Timings } from '~/queries/nodes/DataNode/ElapsedTime' import { CodeSnippet, Language } from 'lib/components/CodeSnippet' import { CodeEditor } from 'lib/components/CodeEditors' +import { LemonSelect } from 'lib/lemon-ui/LemonSelect' +import { LemonLabel } from 'lib/lemon-ui/LemonLabel' interface HogQLDebugProps { query: HogQLQuery @@ -23,6 +25,25 @@ export function HogQLDebug({ query, setQuery }: HogQLDebugProps): JSX.Element { +
+ + POE: + + setQuery({ + ...query, + modifiers: { ...query.modifiers, personsOnEventsMode: value }, + } as HogQLQuery) + } + value={(query.modifiers ?? response?.modifiers)?.personsOnEventsMode} + /> + +
{dataLoading ? ( <>

Running query...

diff --git a/posthog/api/query.py b/posthog/api/query.py index c7fc71ffee7ec..c93594dbb463b 100644 --- a/posthog/api/query.py +++ b/posthog/api/query.py @@ -25,6 +25,7 @@ from posthog.hogql.database.database import create_hogql_database, serialize_database from posthog.hogql.errors import HogQLException from posthog.hogql.metadata import get_hogql_metadata +from posthog.hogql.modifiers import create_default_modifiers_for_team from posthog.hogql.query import execute_hogql_query from posthog.hogql_queries.query_runner import get_query_runner @@ -236,6 +237,7 @@ def process_query( query=hogql_query.query, team=team, filters=hogql_query.filters, + modifiers=hogql_query.modifiers, placeholders=values, default_limit=default_limit, ) @@ -245,7 +247,7 @@ def process_query( metadata_response = get_hogql_metadata(query=metadata_query, team=team) return _unwrap_pydantic_dict(metadata_response) elif query_kind == "DatabaseSchemaQuery": - database = create_hogql_database(team.pk) + database = create_hogql_database(team.pk, modifiers=create_default_modifiers_for_team(team)) return serialize_database(database) elif query_kind == "TimeToSeeDataSessionsQuery": sessions_query_serializer = SessionsQuerySerializer(data=query_json) diff --git a/posthog/api/test/__snapshots__/test_insight.ambr b/posthog/api/test/__snapshots__/test_insight.ambr index dd66fdd0adce2..c34e895c79c91 100644 --- a/posthog/api/test/__snapshots__/test_insight.ambr +++ b/posthog/api/test/__snapshots__/test_insight.ambr @@ -3,7 +3,7 @@ /* user_id:0 request:_snapshot_ */ SELECT groupArray(value) FROM - (SELECT array(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_props, 'fish'), ''), 'null'), '^"|"$', '')) AS value, + (SELECT array(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_properties, 'fish'), ''), 'null'), '^"|"$', '')) AS value, count(*) as count FROM events e INNER JOIN @@ -79,7 +79,7 @@ if(step_0 = 1, timestamp, null) as latest_0, if(event = 'user did things', 1, 0) as step_1, if(step_1 = 1, timestamp, null) as latest_1, - array(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_props, 'fish'), ''), 'null'), '^"|"$', '')) AS prop_basic, + array(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_properties, 'fish'), ''), 'null'), '^"|"$', '')) AS prop_basic, prop_basic as prop, argMinIf(prop, timestamp, notEmpty(arrayFilter(x -> notEmpty(x), prop))) over (PARTITION by aggregation_target) as prop_vals FROM events e @@ -170,7 +170,7 @@ AND toTimeZone(timestamp, 'UTC') >= toDateTime('2012-01-08 00:00:00', 'UTC') AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-15 23:59:59', 'UTC') AND ((and(ifNull(less(toInt64OrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, 'int_value'), ''), 'null'), '^"|"$', '')), 10), 0), 1)) - AND (like(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_props, 'fish'), ''), 'null'), '^"|"$', ''), '%fish%'))) + AND (like(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_properties, 'fish'), ''), 'null'), '^"|"$', ''), '%fish%'))) AND (step_0 = 1 OR step_1 = 1) )) WHERE step_0 = 1 )) @@ -215,11 +215,11 @@ person.person_props as person_props , if(event = 'user signed up' AND (and(ifNull(less(toInt64OrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, 'int_value'), ''), 'null'), '^"|"$', '')), 10), 0), 1) - AND like(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_props, 'fish'), ''), 'null'), '^"|"$', ''), '%fish%')), 1, 0) as step_0, + AND like(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_properties, 'fish'), ''), 'null'), '^"|"$', ''), '%fish%')), 1, 0) as step_0, if(step_0 = 1, timestamp, null) as latest_0, if(event = 'user did things' AND (and(ifNull(less(toInt64OrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, 'int_value'), ''), 'null'), '^"|"$', '')), 10), 0), 1) - AND like(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_props, 'fish'), ''), 'null'), '^"|"$', ''), '%fish%')), 1, 0) as step_1, + AND like(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_properties, 'fish'), ''), 'null'), '^"|"$', ''), '%fish%')), 1, 0) as step_1, if(step_1 = 1, timestamp, null) as latest_1 FROM events e INNER JOIN @@ -438,7 +438,7 @@ AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2012-01-08 00:00:00', 'UTC')), 'UTC') AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-15 23:59:59', 'UTC') AND ((and(ifNull(greater(toInt64OrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, 'int_value'), ''), 'null'), '^"|"$', '')), 10), 0), 1)) - AND (like(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_props, 'fish'), ''), 'null'), '^"|"$', ''), '%fish%'))) + AND (like(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_properties, 'fish'), ''), 'null'), '^"|"$', ''), '%fish%'))) GROUP BY date) GROUP BY day_start ORDER BY day_start) @@ -506,7 +506,7 @@ AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2012-01-08 00:00:00', 'UTC')), 'UTC') AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-15 23:59:59', 'UTC') AND ((and(ifNull(greater(toInt64OrNull(nullIf(nullIf(events.mat_int_value, ''), 'null')), 10), 0), 1)) - AND (like(nullIf(nullIf(pmat_fish, ''), 'null'), '%fish%'))) + AND (like(nullIf(nullIf(mat_pp_fish, ''), 'null'), '%fish%'))) GROUP BY date) GROUP BY day_start ORDER BY day_start) @@ -548,7 +548,7 @@ AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2012-01-08 00:00:00', 'UTC')), 'UTC') AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-15 23:59:59', 'UTC') AND (and(ifNull(less(toInt64OrNull(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, 'int_value'), ''), 'null'), '^"|"$', '')), 10), 0), 1) - AND like(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_props, 'fish'), ''), 'null'), '^"|"$', ''), '%fish%')) + AND like(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_properties, 'fish'), ''), 'null'), '^"|"$', ''), '%fish%')) GROUP BY date) GROUP BY day_start ORDER BY day_start) @@ -590,7 +590,7 @@ AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2012-01-08 00:00:00', 'UTC')), 'UTC') AND toTimeZone(timestamp, 'UTC') <= toDateTime('2012-01-15 23:59:59', 'UTC') AND (and(ifNull(less(toInt64OrNull(nullIf(nullIf(events.mat_int_value, ''), 'null')), 10), 0), 1) - AND like(nullIf(nullIf(pmat_fish, ''), 'null'), '%fish%')) + AND like(nullIf(nullIf(mat_pp_fish, ''), 'null'), '%fish%')) GROUP BY date) GROUP BY day_start ORDER BY day_start) diff --git a/posthog/hogql/ai.py b/posthog/hogql/ai.py index 8e6de21736e09..915d03b77e49c 100644 --- a/posthog/hogql/ai.py +++ b/posthog/hogql/ai.py @@ -7,6 +7,7 @@ from posthog.hogql.printer import print_ast from .database.database import create_hogql_database, serialize_database from posthog.utils import get_instance_region +from .query import create_default_modifiers_for_team if TYPE_CHECKING: from posthog.models import User, Team @@ -52,7 +53,12 @@ class PromptUnclear(Exception): def write_sql_from_prompt(prompt: str, *, current_query: Optional[str] = None, team: "Team", user: "User") -> str: database = create_hogql_database(team.pk) - context = HogQLContext(team_id=team.pk, enable_select_queries=True, database=database) + context = HogQLContext( + team_id=team.pk, + enable_select_queries=True, + database=database, + modifiers=create_default_modifiers_for_team(team), + ) serialized_database = serialize_database(database) schema_description = "\n\n".join( ( diff --git a/posthog/hogql/context.py b/posthog/hogql/context.py index 6d1e1e9a25e58..65c17ba7006be 100644 --- a/posthog/hogql/context.py +++ b/posthog/hogql/context.py @@ -2,8 +2,7 @@ from typing import TYPE_CHECKING, Dict, List, Literal, Optional, Any from posthog.hogql.timings import HogQLTimings -from posthog.utils import PersonOnEventsMode -from posthog.schema import HogQLNotice +from posthog.schema import HogQLNotice, HogQLQueryModifiers if TYPE_CHECKING: from posthog.hogql.database.database import Database @@ -29,8 +28,6 @@ class HogQLContext: values: Dict = field(default_factory=dict) # Are we small part of a non-HogQL query? If so, use custom syntax for accessed person properties. within_non_hogql_query: bool = False - # Do we need to join the persons table or not. Has effect if within_non_hogql_query = True - person_on_events_mode: PersonOnEventsMode = PersonOnEventsMode.V1_ENABLED # Enable full SELECT queries and subqueries in ClickHouse enable_select_queries: bool = False # Do we apply a limit of MAX_SELECT_RETURNED_ROWS=10000 to the topmost select query? @@ -44,6 +41,8 @@ class HogQLContext: notices: List["HogQLNotice"] = field(default_factory=list) # Timings in seconds for different parts of the HogQL query timings: HogQLTimings = field(default_factory=HogQLTimings) + # Modifications requested by the HogQL client + modifiers: HogQLQueryModifiers = field(default_factory=HogQLQueryModifiers) def add_value(self, value: Any) -> str: key = f"hogql_val_{len(self.values)}" diff --git a/posthog/hogql/database/database.py b/posthog/hogql/database/database.py index 6c03b05a14418..d5ebcabddeb37 100644 --- a/posthog/hogql/database/database.py +++ b/posthog/hogql/database/database.py @@ -35,6 +35,7 @@ from posthog.hogql.errors import HogQLException from posthog.models.group_type_mapping import GroupTypeMapping from posthog.models.team.team import WeekStartDay +from posthog.schema import HogQLQueryModifiers from posthog.utils import PersonOnEventsMode @@ -108,13 +109,15 @@ def add_warehouse_tables(self, **field_definitions: Any): setattr(self, f_name, f_def) -def create_hogql_database(team_id: int) -> Database: +def create_hogql_database(team_id: int, modifiers: Optional[HogQLQueryModifiers] = None) -> Database: from posthog.models import Team + from posthog.hogql.query import create_default_modifiers_for_team from posthog.warehouse.models import DataWarehouseTable, DataWarehouseSavedQuery, DataWarehouseViewLink team = Team.objects.get(pk=team_id) + modifiers = create_default_modifiers_for_team(team, modifiers) database = Database(timezone=team.timezone, week_start_day=team.week_start_day) - if team.person_on_events_mode != PersonOnEventsMode.DISABLED: + if modifiers.personsOnEventsMode != PersonOnEventsMode.DISABLED: # TODO: split PoE v1 and v2 once SQL Expression fields are supported #15180 database.events.fields["person"] = FieldTraverser(chain=["poe"]) database.events.fields["person_id"] = StringDatabaseField(name="person_id") diff --git a/posthog/hogql/database/test/test_s3_table.py b/posthog/hogql/database/test/test_s3_table.py index 90453a492175f..1711aebb688a6 100644 --- a/posthog/hogql/database/test/test_s3_table.py +++ b/posthog/hogql/database/test/test_s3_table.py @@ -2,6 +2,7 @@ from posthog.hogql.database.database import create_hogql_database from posthog.hogql.parser import parse_select from posthog.hogql.printer import print_ast +from posthog.hogql.query import create_default_modifiers_for_team from posthog.test.base import BaseTest from posthog.hogql.database.test.tables import create_aapl_stock_s3_table from posthog.hogql.errors import HogQLException @@ -12,7 +13,12 @@ def _init_database(self): self.database = create_hogql_database(self.team.pk) self.database.aapl_stock = create_aapl_stock_s3_table() self.database.aapl_stock_2 = create_aapl_stock_s3_table(name="aapl_stock_2") - self.context = HogQLContext(team_id=self.team.pk, enable_select_queries=True, database=self.database) + self.context = HogQLContext( + team_id=self.team.pk, + enable_select_queries=True, + database=self.database, + modifiers=create_default_modifiers_for_team(self.team), + ) def _select(self, query: str, dialect: str = "clickhouse") -> str: return print_ast(parse_select(query), self.context, dialect=dialect) diff --git a/posthog/hogql/database/test/test_saved_query.py b/posthog/hogql/database/test/test_saved_query.py index d2f1a5edffb88..5e64f9760fcbf 100644 --- a/posthog/hogql/database/test/test_saved_query.py +++ b/posthog/hogql/database/test/test_saved_query.py @@ -2,6 +2,7 @@ from posthog.hogql.database.database import create_hogql_database from posthog.hogql.parser import parse_select from posthog.hogql.printer import print_ast +from posthog.hogql.query import create_default_modifiers_for_team from posthog.test.base import BaseTest from posthog.hogql.database.test.tables import ( create_aapl_stock_table_view, @@ -20,7 +21,12 @@ def _init_database(self): self.database.aapl_stock = create_aapl_stock_s3_table() self.database.aapl_stock_nested_view = create_nested_aapl_stock_view() self.database.aapl_stock_self = create_aapl_stock_table_self_referencing() - self.context = HogQLContext(team_id=self.team.pk, enable_select_queries=True, database=self.database) + self.context = HogQLContext( + team_id=self.team.pk, + enable_select_queries=True, + database=self.database, + modifiers=create_default_modifiers_for_team(self.team), + ) def _select(self, query: str, dialect: str = "clickhouse") -> str: return print_ast(parse_select(query), self.context, dialect=dialect) diff --git a/posthog/hogql/database/test/test_view.py b/posthog/hogql/database/test/test_view.py index 51c69ba17f02a..3d773314e1f8f 100644 --- a/posthog/hogql/database/test/test_view.py +++ b/posthog/hogql/database/test/test_view.py @@ -2,6 +2,7 @@ from posthog.hogql.database.database import create_hogql_database from posthog.hogql.parser import parse_select from posthog.hogql.printer import print_ast +from posthog.hogql.query import create_default_modifiers_for_team from posthog.test.base import BaseTest from posthog.hogql.database.test.tables import ( create_aapl_stock_table_view, @@ -20,7 +21,12 @@ def _init_database(self): self.database.aapl_stock = create_aapl_stock_s3_table() self.database.aapl_stock_nested_view = create_nested_aapl_stock_view() self.database.aapl_stock_self = create_aapl_stock_table_self_referencing() - self.context = HogQLContext(team_id=self.team.pk, enable_select_queries=True, database=self.database) + self.context = HogQLContext( + team_id=self.team.pk, + enable_select_queries=True, + database=self.database, + modifiers=create_default_modifiers_for_team(self.team), + ) def _select(self, query: str, dialect: str = "clickhouse") -> str: return print_ast(parse_select(query), self.context, dialect=dialect) diff --git a/posthog/hogql/metadata.py b/posthog/hogql/metadata.py index 745b4f41cb71a..de044ed2c4743 100644 --- a/posthog/hogql/metadata.py +++ b/posthog/hogql/metadata.py @@ -5,6 +5,7 @@ from posthog.hogql.hogql import translate_hogql from posthog.hogql.parser import parse_select from posthog.hogql.printer import print_ast +from posthog.hogql.query import create_default_modifiers_for_team from posthog.models import Team from posthog.schema import HogQLMetadataResponse, HogQLMetadata, HogQLNotice from posthog.hogql import ast @@ -26,10 +27,12 @@ def get_hogql_metadata( try: if isinstance(query.expr, str): - context = HogQLContext(team_id=team.pk) + context = HogQLContext(team_id=team.pk, modifiers=create_default_modifiers_for_team(team)) translate_hogql(query.expr, context=context, table=query.table or "events") elif isinstance(query.select, str): - context = HogQLContext(team_id=team.pk, enable_select_queries=True) + context = HogQLContext( + team_id=team.pk, modifiers=create_default_modifiers_for_team(team), enable_select_queries=True + ) select_ast = parse_select(query.select) if query.filters: diff --git a/posthog/hogql/modifiers.py b/posthog/hogql/modifiers.py new file mode 100644 index 0000000000000..2811c60501719 --- /dev/null +++ b/posthog/hogql/modifiers.py @@ -0,0 +1,19 @@ +from typing import Optional + +from posthog.models import Team +from posthog.schema import HogQLQueryModifiers +from posthog.utils import PersonOnEventsMode + + +def create_default_modifiers_for_team( + team: Team, modifiers: Optional[HogQLQueryModifiers] = None +) -> HogQLQueryModifiers: + if modifiers is None: + modifiers = HogQLQueryModifiers() + else: + modifiers = modifiers.model_copy() + + if modifiers.personsOnEventsMode is None: + modifiers.personsOnEventsMode = team.person_on_events_mode or PersonOnEventsMode.DISABLED + + return modifiers diff --git a/posthog/hogql/printer.py b/posthog/hogql/printer.py index eb355e02fe421..135e13e6f7346 100644 --- a/posthog/hogql/printer.py +++ b/posthog/hogql/printer.py @@ -72,7 +72,7 @@ def prepare_ast_for_printing( settings: Optional[HogQLGlobalSettings] = None, ) -> ast.Expr: with context.timings.measure("create_hogql_database"): - context.database = context.database or create_hogql_database(context.team_id) + context.database = context.database or create_hogql_database(context.team_id, context.modifiers) with context.timings.measure("resolve_types"): node = resolve_types(node, context, scopes=[node.type for node in stack] if stack else None) @@ -770,7 +770,7 @@ def visit_field_type(self, type: ast.FieldType): and type.name == "properties" and type.table_type.field == "poe" ): - if self.context.person_on_events_mode != PersonOnEventsMode.DISABLED: + if self.context.modifiers.personsOnEventsMode != PersonOnEventsMode.DISABLED: field_sql = "person_properties" else: field_sql = "person_props" @@ -789,7 +789,7 @@ def visit_field_type(self, type: ast.FieldType): # :KLUDGE: Legacy person properties handling. Only used within non-HogQL queries, such as insights. if self.context.within_non_hogql_query and field_sql == "events__pdi__person.properties": - if self.context.person_on_events_mode != PersonOnEventsMode.DISABLED: + if self.context.modifiers.personsOnEventsMode != PersonOnEventsMode.DISABLED: field_sql = "person_properties" else: field_sql = "person_props" @@ -833,7 +833,7 @@ def visit_property_type(self, type: ast.PropertyType): or (isinstance(table, ast.VirtualTableType) and table.field == "poe") ): # :KLUDGE: Legacy person properties handling. Only used within non-HogQL queries, such as insights. - if self.context.person_on_events_mode != PersonOnEventsMode.DISABLED: + if self.context.modifiers.personsOnEventsMode != PersonOnEventsMode.DISABLED: materialized_column = self._get_materialized_column("events", type.chain[0], "person_properties") else: materialized_column = self._get_materialized_column("person", type.chain[0], "properties") diff --git a/posthog/hogql/query.py b/posthog/hogql/query.py index 24be00dc35852..5f0fa0368a893 100644 --- a/posthog/hogql/query.py +++ b/posthog/hogql/query.py @@ -5,6 +5,7 @@ from posthog.hogql.constants import HogQLGlobalSettings from posthog.hogql.errors import HogQLException from posthog.hogql.hogql import HogQLContext +from posthog.hogql.modifiers import create_default_modifiers_for_team from posthog.hogql.parser import parse_select from posthog.hogql.placeholders import replace_placeholders, find_placeholders from posthog.hogql.printer import prepare_ast_for_printing, print_ast, print_prepared_ast @@ -14,7 +15,7 @@ from posthog.models.team import Team from posthog.clickhouse.query_tagging import tag_queries from posthog.client import sync_execute -from posthog.schema import HogQLQueryResponse, HogQLFilters +from posthog.schema import HogQLQueryResponse, HogQLFilters, HogQLQueryModifiers def execute_hogql_query( @@ -25,6 +26,7 @@ def execute_hogql_query( placeholders: Optional[Dict[str, ast.Expr]] = None, workload: Workload = Workload.ONLINE, settings: Optional[HogQLGlobalSettings] = None, + modifiers: Optional[HogQLQueryModifiers] = None, default_limit: Optional[int] = None, timings: Optional[HogQLTimings] = None, ) -> HogQLQueryResponse: @@ -70,12 +72,13 @@ def execute_hogql_query( # Get printed HogQL query, and returned columns. Using a cloned query. with timings.measure("hogql"): + query_modifiers = create_default_modifiers_for_team(team, modifiers) with timings.measure("prepare_ast"): hogql_query_context = HogQLContext( team_id=team.pk, enable_select_queries=True, - person_on_events_mode=team.person_on_events_mode, timings=timings, + modifiers=query_modifiers, ) with timings.measure("clone"): cloned_query = clone_expr(select_query, True) @@ -107,8 +110,8 @@ def execute_hogql_query( clickhouse_context = HogQLContext( team_id=team.pk, enable_select_queries=True, - person_on_events_mode=team.person_on_events_mode, timings=timings, + modifiers=query_modifiers, ) clickhouse_sql = print_ast( select_query, context=clickhouse_context, dialect="clickhouse", settings=settings or HogQLGlobalSettings() @@ -141,4 +144,5 @@ def execute_hogql_query( results=results, columns=print_columns, types=types, + modifiers=query_modifiers, ) diff --git a/posthog/hogql/test/test_modifiers.py b/posthog/hogql/test/test_modifiers.py new file mode 100644 index 0000000000000..e519bdf3e984a --- /dev/null +++ b/posthog/hogql/test/test_modifiers.py @@ -0,0 +1,37 @@ +from posthog.hogql.modifiers import create_default_modifiers_for_team +from posthog.hogql.query import execute_hogql_query +from posthog.schema import HogQLQueryModifiers +from posthog.test.base import BaseTest +from django.test import override_settings +from posthog.utils import PersonOnEventsMode + + +class TestModifiers(BaseTest): + @override_settings(PERSON_ON_EVENTS_OVERRIDE=False, PERSON_ON_EVENTS_V2_OVERRIDE=False) + def test_create_default_modifiers_for_team_init(self): + assert self.team.person_on_events_mode == "disabled" + modifiers = create_default_modifiers_for_team(self.team) + assert modifiers.personsOnEventsMode == PersonOnEventsMode.DISABLED # NB! not a None + modifiers = create_default_modifiers_for_team( + self.team, HogQLQueryModifiers(personsOnEventsMode=PersonOnEventsMode.V1_ENABLED) + ) + assert modifiers.personsOnEventsMode == PersonOnEventsMode.V1_ENABLED + modifiers = create_default_modifiers_for_team( + self.team, HogQLQueryModifiers(personsOnEventsMode=PersonOnEventsMode.V2_ENABLED) + ) + assert modifiers.personsOnEventsMode == PersonOnEventsMode.V2_ENABLED + + def test_modifiers_person_on_events_mode_v1_enabled(self): + query = "SELECT event, person_id FROM events" + + # Control + response = execute_hogql_query( + query, team=self.team, modifiers=HogQLQueryModifiers(personsOnEventsMode=PersonOnEventsMode.DISABLED) + ) + assert " JOIN " in response.clickhouse + + # Test + response = execute_hogql_query( + query, team=self.team, modifiers=HogQLQueryModifiers(personsOnEventsMode=PersonOnEventsMode.V1_ENABLED) + ) + assert " JOIN " not in response.clickhouse diff --git a/posthog/hogql/test/test_printer.py b/posthog/hogql/test/test_printer.py index 4b523860aab15..1c92b32daa805 100644 --- a/posthog/hogql/test/test_printer.py +++ b/posthog/hogql/test/test_printer.py @@ -12,6 +12,7 @@ from posthog.hogql.parser import parse_select from posthog.hogql.printer import print_ast from posthog.models.team.team import WeekStartDay +from posthog.schema import HogQLQueryModifiers from posthog.test.base import BaseTest from posthog.utils import PersonOnEventsMode @@ -106,7 +107,9 @@ def test_fields_and_properties(self): with override_settings(PERSON_ON_EVENTS_V2_OVERRIDE=False): context = HogQLContext( - team_id=self.team.pk, within_non_hogql_query=True, person_on_events_mode=PersonOnEventsMode.DISABLED + team_id=self.team.pk, + within_non_hogql_query=True, + modifiers=HogQLQueryModifiers(personsOnEventsMode=PersonOnEventsMode.DISABLED), ) self.assertEqual( self._expr("person.properties.bla", context), @@ -120,7 +123,9 @@ def test_fields_and_properties(self): with override_settings(PERSON_ON_EVENTS_OVERRIDE=True): context = HogQLContext( - team_id=self.team.pk, within_non_hogql_query=True, person_on_events_mode=PersonOnEventsMode.V1_ENABLED + team_id=self.team.pk, + within_non_hogql_query=True, + modifiers=HogQLQueryModifiers(personsOnEventsMode=PersonOnEventsMode.V1_ENABLED), ) self.assertEqual( self._expr("person.properties.bla", context), diff --git a/posthog/hogql/transforms/property_types.py b/posthog/hogql/transforms/property_types.py index 8a920d220b71b..be46d24873a91 100644 --- a/posthog/hogql/transforms/property_types.py +++ b/posthog/hogql/transforms/property_types.py @@ -149,7 +149,7 @@ def _convert_string_property_to_type( def _add_property_notice(self, node: ast.Field, property_type: Literal["event", "person"], field_type: str) -> str: property_name = node.chain[-1] if property_type == "person": - if self.context.person_on_events_mode != PersonOnEventsMode.DISABLED: + if self.context.modifiers.personsOnEventsMode != PersonOnEventsMode.DISABLED: materialized_column = self._get_materialized_column("events", property_name, "person_properties") else: materialized_column = self._get_materialized_column("person", property_name, "properties") diff --git a/posthog/hogql_queries/query_runner.py b/posthog/hogql_queries/query_runner.py index eed9595dbbfd1..91b9c711c1e27 100644 --- a/posthog/hogql_queries/query_runner.py +++ b/posthog/hogql_queries/query_runner.py @@ -11,6 +11,7 @@ from posthog.hogql import ast from posthog.hogql.context import HogQLContext from posthog.hogql.printer import print_ast +from posthog.hogql.query import create_default_modifiers_for_team from posthog.hogql.timings import HogQLTimings from posthog.metrics import LABEL_TEAM_ID from posthog.models import Team @@ -184,7 +185,12 @@ def to_hogql(self) -> str: with self.timings.measure("to_hogql"): return print_ast( self.to_query(), - HogQLContext(team_id=self.team.pk, enable_select_queries=True, timings=self.timings), + HogQLContext( + team_id=self.team.pk, + enable_select_queries=True, + timings=self.timings, + modifiers=create_default_modifiers_for_team(self.team), + ), "hogql", ) diff --git a/posthog/queries/trends/test/__snapshots__/test_formula.ambr b/posthog/queries/trends/test/__snapshots__/test_formula.ambr index 9ca8bfc30e1ee..d24e8b3d2872f 100644 --- a/posthog/queries/trends/test/__snapshots__/test_formula.ambr +++ b/posthog/queries/trends/test/__snapshots__/test_formula.ambr @@ -362,7 +362,7 @@ SELECT groupArray(value) FROM - (SELECT concat(ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_props, '$some_prop'), ''), 'null'), '^"|"$', '')), ''), ' : ', ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'location'), ''), 'null'), '^"|"$', '')), '')) AS value, + (SELECT concat(ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_properties, '$some_prop'), ''), 'null'), '^"|"$', '')), ''), ' : ', ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'location'), ''), 'null'), '^"|"$', '')), '')) AS value, sum(toFloat64OrNull(replaceRegexpAll(JSONExtractRaw(properties, 'session duration'), '^"|"$', ''))) as count FROM events e INNER JOIN @@ -394,7 +394,7 @@ SELECT groupArray(value) FROM - (SELECT concat(ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_props, '$some_prop'), ''), 'null'), '^"|"$', '')), ''), ' : ', ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'location'), ''), 'null'), '^"|"$', '')), '')) AS value, + (SELECT concat(ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_properties, '$some_prop'), ''), 'null'), '^"|"$', '')), ''), ' : ', ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'location'), ''), 'null'), '^"|"$', '')), '')) AS value, avg(toFloat64OrNull(replaceRegexpAll(JSONExtractRaw(properties, 'session duration'), '^"|"$', ''))) as count FROM events e INNER JOIN @@ -454,7 +454,7 @@ day_start UNION ALL SELECT sum(toFloat64OrNull(replaceRegexpAll(JSONExtractRaw(properties, 'session duration'), '^"|"$', ''))) as total, toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) as day_start, - concat(ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_props, '$some_prop'), ''), 'null'), '^"|"$', '')), ''), ' : ', ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'location'), ''), 'null'), '^"|"$', '')), '')) as breakdown_value + concat(ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_properties, '$some_prop'), ''), 'null'), '^"|"$', '')), ''), ' : ', ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'location'), ''), 'null'), '^"|"$', '')), '')) as breakdown_value FROM events e INNER JOIN (SELECT distinct_id, @@ -474,7 +474,7 @@ AND event = 'session start' AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')), 'UTC') AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC') - AND concat(ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_props, '$some_prop'), ''), 'null'), '^"|"$', '')), ''), ' : ', ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'location'), ''), 'null'), '^"|"$', '')), '')) in (['some_val : London', 'some_val : Paris']) + AND concat(ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_properties, '$some_prop'), ''), 'null'), '^"|"$', '')), ''), ' : ', ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'location'), ''), 'null'), '^"|"$', '')), '')) in (['some_val : London', 'some_val : Paris']) GROUP BY day_start, breakdown_value)) GROUP BY day_start, @@ -510,7 +510,7 @@ day_start UNION ALL SELECT avg(toFloat64OrNull(replaceRegexpAll(JSONExtractRaw(properties, 'session duration'), '^"|"$', ''))) as total, toStartOfDay(toTimeZone(toDateTime(timestamp, 'UTC'), 'UTC')) as day_start, - concat(ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_props, '$some_prop'), ''), 'null'), '^"|"$', '')), ''), ' : ', ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'location'), ''), 'null'), '^"|"$', '')), '')) as breakdown_value + concat(ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_properties, '$some_prop'), ''), 'null'), '^"|"$', '')), ''), ' : ', ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'location'), ''), 'null'), '^"|"$', '')), '')) as breakdown_value FROM events e INNER JOIN (SELECT distinct_id, @@ -530,7 +530,7 @@ AND event = 'session start' AND toTimeZone(timestamp, 'UTC') >= toDateTime(toStartOfDay(toDateTime('2019-12-28 00:00:00', 'UTC')), 'UTC') AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-04 23:59:59', 'UTC') - AND concat(ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_props, '$some_prop'), ''), 'null'), '^"|"$', '')), ''), ' : ', ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'location'), ''), 'null'), '^"|"$', '')), '')) in (['some_val : London', 'some_val : Paris']) + AND concat(ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person_properties, '$some_prop'), ''), 'null'), '^"|"$', '')), ''), ' : ', ifNull(toString(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'location'), ''), 'null'), '^"|"$', '')), '')) in (['some_val : London', 'some_val : Paris']) GROUP BY day_start, breakdown_value)) GROUP BY day_start, diff --git a/posthog/schema.py b/posthog/schema.py index a87907ba43167..29efb896c634a 100644 --- a/posthog/schema.py +++ b/posthog/schema.py @@ -228,6 +228,13 @@ class HogQLNotice(BaseModel): start: Optional[float] = None +class HogQLQueryModifiers(BaseModel): + model_config = ConfigDict( + extra="forbid", + ) + personsOnEventsMode: Optional[str] = None + + class IntervalType(str, Enum): hour = "hour" day = "day" @@ -644,6 +651,7 @@ class HogQLQueryResponse(BaseModel): clickhouse: Optional[str] = None columns: Optional[List] = None hogql: Optional[str] = None + modifiers: Optional[HogQLQueryModifiers] = None query: Optional[str] = None results: Optional[List] = None timings: Optional[List[QueryTiming]] = None @@ -917,6 +925,7 @@ class HogQLQuery(BaseModel): ) filters: Optional[HogQLFilters] = None kind: Literal["HogQLQuery"] = "HogQLQuery" + modifiers: Optional[HogQLQueryModifiers] = None query: str response: Optional[HogQLQueryResponse] = Field(default=None, description="Cached query response") values: Optional[Dict[str, Any]] = Field( diff --git a/posthog/session_recordings/queries/session_recording_list_from_replay_summary.py b/posthog/session_recordings/queries/session_recording_list_from_replay_summary.py index 924b3481a5b74..fc20ebcc3866c 100644 --- a/posthog/session_recordings/queries/session_recording_list_from_replay_summary.py +++ b/posthog/session_recordings/queries/session_recording_list_from_replay_summary.py @@ -463,7 +463,7 @@ def _get_person_id_clause(self) -> Tuple[str, Dict[str, Any]]: return person_id_clause, person_id_params def matching_events(self) -> List[str]: - self._filter.hogql_context.person_on_events_mode = PersonOnEventsMode.DISABLED + self._filter.hogql_context.modifiers.personsOnEventsMode = PersonOnEventsMode.DISABLED query, query_params = self.get_query(select_event_ids=True) query_results = sync_execute(query, {**query_params, **self._filter.hogql_context.values}) results = [row[0] for row in query_results] @@ -563,7 +563,7 @@ def _paginate_results(self, session_recordings) -> SessionRecordingQueryResult: return SessionRecordingQueryResult(session_recordings, more_recordings_available) def run(self) -> SessionRecordingQueryResult: - self._filter.hogql_context.person_on_events_mode = PersonOnEventsMode.DISABLED + self._filter.hogql_context.modifiers.personsOnEventsMode = PersonOnEventsMode.DISABLED query, query_params = self.get_query() query_results = sync_execute(query, {**query_params, **self._filter.hogql_context.values}) session_recordings = self._data_to_return(query_results) diff --git a/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_session_replay.ambr b/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_session_replay.ambr index 89ccc9654e91d..ff2b7aba4834d 100644 --- a/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_session_replay.ambr +++ b/posthog/session_recordings/queries/test/__snapshots__/test_session_recording_list_from_session_replay.ambr @@ -1394,7 +1394,7 @@ FROM person WHERE team_id = 2 GROUP BY id - HAVING max(is_deleted) = 0) person ON person.id = pdi.person_id + HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id WHERE team_id = 2 GROUP BY distinct_id HAVING argMax(is_deleted, version) = 0 @@ -1424,7 +1424,7 @@ FROM person WHERE team_id = 2 GROUP BY id - HAVING max(is_deleted) = 0) person ON person.id = pdi.person_id + HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id WHERE team_id = 2 GROUP BY distinct_id HAVING argMax(is_deleted, version) = 0 @@ -1475,7 +1475,7 @@ FROM person WHERE team_id = 2 GROUP BY id - HAVING max(is_deleted) = 0) person ON person.id = pdi.person_id + HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id WHERE team_id = 2 GROUP BY distinct_id HAVING argMax(is_deleted, version) = 0 @@ -1504,7 +1504,7 @@ FROM person WHERE team_id = 2 GROUP BY id - HAVING max(is_deleted) = 0) person ON person.id = pdi.person_id + HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id WHERE team_id = 2 GROUP BY distinct_id HAVING argMax(is_deleted, version) = 0 @@ -1598,7 +1598,7 @@ FROM person WHERE team_id = 2 GROUP BY id - HAVING max(is_deleted) = 0) person ON person.id = pdi.person_id + HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id WHERE team_id = 2 GROUP BY distinct_id HAVING argMax(is_deleted, version) = 0 @@ -1627,7 +1627,7 @@ FROM person WHERE team_id = 2 GROUP BY id - HAVING max(is_deleted) = 0) person ON person.id = pdi.person_id + HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id WHERE team_id = 2 GROUP BY distinct_id HAVING argMax(is_deleted, version) = 0 @@ -1677,7 +1677,7 @@ FROM person WHERE team_id = 2 GROUP BY id - HAVING max(is_deleted) = 0) person ON person.id = pdi.person_id + HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id WHERE team_id = 2 GROUP BY distinct_id HAVING argMax(is_deleted, version) = 0 @@ -1705,7 +1705,7 @@ FROM person WHERE team_id = 2 GROUP BY id - HAVING max(is_deleted) = 0) person ON person.id = pdi.person_id + HAVING max(is_deleted) = 0 SETTINGS optimize_aggregation_in_order = 1) person ON person.id = pdi.person_id WHERE team_id = 2 GROUP BY distinct_id HAVING argMax(is_deleted, version) = 0 diff --git a/posthog/warehouse/models/datawarehouse_saved_query.py b/posthog/warehouse/models/datawarehouse_saved_query.py index 6ce428cf2b494..64617a744c421 100644 --- a/posthog/warehouse/models/datawarehouse_saved_query.py +++ b/posthog/warehouse/models/datawarehouse_saved_query.py @@ -53,10 +53,13 @@ def s3_tables(self): from posthog.hogql.parser import parse_select from posthog.hogql.context import HogQLContext from posthog.hogql.database.database import create_hogql_database + from posthog.hogql.query import create_default_modifiers_for_team from posthog.hogql.resolver import resolve_types from posthog.models.property.util import S3TableVisitor - context = HogQLContext(team_id=self.team.pk, enable_select_queries=True) + context = HogQLContext( + team_id=self.team.pk, enable_select_queries=True, modifiers=create_default_modifiers_for_team(self.team) + ) node = parse_select(self.query["query"]) context.database = create_hogql_database(context.team_id)