From 35b0adfb0502351c732324c49636df4cb20cc5e1 Mon Sep 17 00:00:00 2001 From: Marius Andra Date: Fri, 15 Sep 2023 11:50:13 +0200 Subject: [PATCH] feat: lifecycle query runner class (#17440) --- posthog/api/query.py | 10 +- .../hogql_queries/lifecycle_hogql_query.py | 237 ---------------- .../hogql_queries/lifecycle_query_runner.py | 252 ++++++++++++++++++ posthog/hogql_queries/query_runner.py | 37 +++ .../test/test_lifecycle_hogql_query.py | 75 +++++- .../{ => utils}/query_date_range.py | 11 +- .../{ => utils}/test/test_query_date_range.py | 2 +- 7 files changed, 377 insertions(+), 247 deletions(-) delete mode 100644 posthog/hogql_queries/lifecycle_hogql_query.py create mode 100644 posthog/hogql_queries/lifecycle_query_runner.py create mode 100644 posthog/hogql_queries/query_runner.py rename posthog/hogql_queries/{ => utils}/query_date_range.py (91%) rename posthog/hogql_queries/{ => utils}/test/test_query_date_range.py (97%) diff --git a/posthog/api/query.py b/posthog/api/query.py index 5e4e14c34f999..385f14d2f7905 100644 --- a/posthog/api/query.py +++ b/posthog/api/query.py @@ -25,7 +25,8 @@ from posthog.hogql.errors import HogQLException from posthog.hogql.metadata import get_hogql_metadata from posthog.hogql.query import execute_hogql_query -from posthog.hogql_queries.lifecycle_hogql_query import run_lifecycle_query + +from posthog.hogql_queries.lifecycle_query_runner import LifecycleQueryRunner from posthog.models import Team from posthog.models.event.events_query import run_events_query from posthog.models.user import User @@ -33,7 +34,7 @@ from posthog.queries.time_to_see_data.serializers import SessionEventsQuerySerializer, SessionsQuerySerializer from posthog.queries.time_to_see_data.sessions import get_session_events, get_sessions from posthog.rate_limit import AIBurstRateThrottle, AISustainedRateThrottle, TeamRateThrottle -from posthog.schema import EventsQuery, HogQLQuery, HogQLMetadata, LifecycleQuery +from posthog.schema import EventsQuery, HogQLQuery, HogQLMetadata class QueryThrottle(TeamRateThrottle): @@ -221,9 +222,8 @@ def process_query(team: Team, query_json: Dict, default_limit: Optional[int] = N metadata_response = get_hogql_metadata(query=metadata_query, team=team) return _unwrap_pydantic_dict(metadata_response) elif query_kind == "LifecycleQuery": - lifecycle_query = LifecycleQuery.parse_obj(query_json) - lifecycle_response = run_lifecycle_query(query=lifecycle_query, team=team) - return _unwrap_pydantic_dict(lifecycle_response) + lifecycle_query_runner = LifecycleQueryRunner(query_json, team) + return _unwrap_pydantic_dict(lifecycle_query_runner.run()) elif query_kind == "DatabaseSchemaQuery": database = create_hogql_database(team.pk) return serialize_database(database) diff --git a/posthog/hogql_queries/lifecycle_hogql_query.py b/posthog/hogql_queries/lifecycle_hogql_query.py deleted file mode 100644 index 6b73034fdfcf3..0000000000000 --- a/posthog/hogql_queries/lifecycle_hogql_query.py +++ /dev/null @@ -1,237 +0,0 @@ -from typing import Optional - -from django.utils.timezone import datetime - -from posthog.hogql import ast -from posthog.hogql.parser import parse_expr, parse_select -from posthog.hogql.property import property_to_expr, action_to_expr -from posthog.hogql.query import execute_hogql_query -from posthog.hogql.timings import HogQLTimings -from posthog.models import Team, Action -from posthog.hogql_queries.query_date_range import QueryDateRange -from posthog.schema import LifecycleQuery, ActionsNode, EventsNode, LifecycleQueryResponse - - -def create_events_query( - query_date_range: QueryDateRange, - event_filter: Optional[ast.Expr], - timings: HogQLTimings, - sampling_factor: Optional[float] = None, -): - placeholders = { - "event_filter": event_filter or ast.Constant(value=True), - "interval": query_date_range.interval_period_string_as_hogql_constant(), - "one_interval_period": query_date_range.one_interval_period(), - } - - events_query = parse_select( - """ - SELECT - events.person.id as person_id, - min(events.person.created_at) AS created_at, - arraySort(groupUniqArray(dateTrunc({interval}, events.timestamp))) AS all_activity, - arrayPopBack(arrayPushFront(all_activity, dateTrunc({interval}, created_at))) as previous_activity, - arrayPopFront(arrayPushBack(all_activity, dateTrunc({interval}, toDateTime('1970-01-01 00:00:00')))) as following_activity, - arrayMap((previous, current, index) -> (previous = current ? 'new' : ((current - {one_interval_period}) = previous AND index != 1) ? 'returning' : 'resurrecting'), previous_activity, all_activity, arrayEnumerate(all_activity)) as initial_status, - arrayMap((current, next) -> (current + {one_interval_period} = next ? '' : 'dormant'), all_activity, following_activity) as dormant_status, - arrayMap(x -> x + {one_interval_period}, arrayFilter((current, is_dormant) -> is_dormant = 'dormant', all_activity, dormant_status)) as dormant_periods, - arrayMap(x -> 'dormant', dormant_periods) as dormant_label, - arrayConcat(arrayZip(all_activity, initial_status), arrayZip(dormant_periods, dormant_label)) as temp_concat, - arrayJoin(temp_concat) as period_status_pairs, - period_status_pairs.1 as start_of_period, - period_status_pairs.2 as status - FROM events - WHERE {event_filter} - GROUP BY person_id - """, - placeholders=placeholders, - timings=timings, - ) - - if sampling_factor is not None and isinstance(sampling_factor, float): - sample_expr = ast.SampleExpr(sample_value=ast.RatioExpr(left=ast.Constant(value=sampling_factor))) - events_query.select_from.sample = sample_expr - - return events_query - - -def run_lifecycle_query(team: Team, query: LifecycleQuery) -> LifecycleQueryResponse: - now_dt = datetime.now() - timings = HogQLTimings() - - event_filter = [] - with timings.measure("date_range"): - query_date_range = QueryDateRange(date_range=query.dateRange, team=team, interval=query.interval, now=now_dt) - event_filter.append( - parse_expr( - "timestamp >= dateTrunc({interval}, {date_from}) - {one_interval}", - { - "interval": query_date_range.interval_period_string_as_hogql_constant(), - "one_interval": query_date_range.one_interval_period(), - "date_from": query_date_range.date_from_as_hogql(), - }, - timings=timings, - ) - ) - event_filter.append( - parse_expr( - "timestamp < dateTrunc({interval}, {date_to}) + {one_interval}", - { - "interval": query_date_range.interval_period_string_as_hogql_constant(), - "one_interval": query_date_range.one_interval_period(), - "date_to": query_date_range.date_to_as_hogql(), - }, - timings=timings, - ) - ) - - with timings.measure("properties"): - if query.properties is not None and query.properties != []: - event_filter.append(property_to_expr(query.properties, team)) - - with timings.measure("series_filters"): - for serie in query.series or []: - if isinstance(serie, ActionsNode): - action = Action.objects.get(pk=int(serie.id), team=team) - event_filter.append(action_to_expr(action)) - elif isinstance(serie, EventsNode): - if serie.event is not None: - event_filter.append( - ast.CompareOperation( - op=ast.CompareOperationOp.Eq, - left=ast.Field(chain=["event"]), - right=ast.Constant(value=str(serie.event)), - ) - ) - else: - raise ValueError(f"Invalid serie kind: {serie.kind}") - if serie.properties is not None and serie.properties != []: - event_filter.append(property_to_expr(serie.properties, team)) - - with timings.measure("test_account_filters"): - if ( - query.filterTestAccounts - and isinstance(team.test_account_filters, list) - and len(team.test_account_filters) > 0 - ): - for property in team.test_account_filters: - event_filter.append(property_to_expr(property, team)) - - if len(event_filter) == 0: - event_filter = ast.Constant(value=True) - elif len(event_filter) == 1: - event_filter = event_filter[0] - else: - event_filter = ast.And(exprs=event_filter) - - placeholders = { - "interval": query_date_range.interval_period_string_as_hogql_constant(), - "one_interval_period": query_date_range.one_interval_period(), - "number_interval_period": query_date_range.number_interval_periods(), - "event_filter": event_filter, - "date_from": query_date_range.date_from_as_hogql(), - "date_to": query_date_range.date_to_as_hogql(), - } - - with timings.measure("events_query"): - events_query = create_events_query( - query_date_range=query_date_range, - event_filter=event_filter, - sampling_factor=query.samplingFactor, - timings=timings, - ) - - with timings.measure("periods_query"): - periods = parse_select( - """ - SELECT ( - dateTrunc({interval}, {date_to}) - {number_interval_period} - ) AS start_of_period - FROM numbers( - dateDiff( - {interval}, - dateTrunc({interval}, {date_from}), - dateTrunc({interval}, {date_to} + {one_interval_period}) - ) - ) - """, - placeholders=placeholders, - timings=timings, - ) - - with timings.measure("lifecycle_query"): - lifecycle_sql = parse_select( - """ - SELECT groupArray(start_of_period) AS date, - groupArray(counts) AS total, - status - FROM ( - SELECT - status = 'dormant' ? negate(sum(counts)) : negate(negate(sum(counts))) as counts, - start_of_period, - status - FROM ( - SELECT - periods.start_of_period as start_of_period, - 0 AS counts, - status - FROM {periods} as periods - CROSS JOIN ( - SELECT status - FROM (SELECT 1) - ARRAY JOIN ['new', 'returning', 'resurrecting', 'dormant'] as status - ) as sec - ORDER BY status, start_of_period - UNION ALL - SELECT - start_of_period, count(DISTINCT person_id) AS counts, status - FROM {events_query} - GROUP BY start_of_period, status - ) - WHERE start_of_period <= dateTrunc({interval}, {date_to}) - AND start_of_period >= dateTrunc({interval}, {date_from}) - GROUP BY start_of_period, status - ORDER BY start_of_period ASC - ) - GROUP BY status - """, - {**placeholders, "periods": periods, "events_query": events_query}, - timings=timings, - ) - - response = execute_hogql_query( - team=team, - query=lifecycle_sql, - query_type="LifecycleQuery", - timings=timings, - ) - - # ensure that the items are in a deterministic order - order = {"new": 1, "returning": 2, "resurrecting": 3, "dormant": 4} - results = sorted(response.results, key=lambda result: order.get(result[2], 5)) - - res = [] - for val in results: - counts = val[1] - labels = [ - item.strftime("%-d-%b-%Y{}".format(" %H:%M" if query_date_range.interval_name == "hour" else "")) - for item in val[0] - ] - days = [ - item.strftime("%Y-%m-%d{}".format(" %H:%M:%S" if query_date_range.interval_name == "hour" else "")) - for item in val[0] - ] - - label = "{} - {}".format("", val[2]) # entity.name - additional_values = {"label": label, "status": val[2]} - res.append( - { - "data": [float(c) for c in counts], - "count": float(sum(counts)), - "labels": labels, - "days": days, - **additional_values, - } - ) - - return LifecycleQueryResponse(result=res, timings=response.timings) diff --git a/posthog/hogql_queries/lifecycle_query_runner.py b/posthog/hogql_queries/lifecycle_query_runner.py new file mode 100644 index 0000000000000..2b970bb95156c --- /dev/null +++ b/posthog/hogql_queries/lifecycle_query_runner.py @@ -0,0 +1,252 @@ +from typing import Optional, Any, Dict, List + +from django.utils.timezone import datetime + +from posthog.hogql import ast +from posthog.hogql.parser import parse_expr, parse_select +from posthog.hogql.property import property_to_expr, action_to_expr +from posthog.hogql.query import execute_hogql_query +from posthog.hogql.timings import HogQLTimings +from posthog.hogql_queries.query_runner import QueryRunner +from posthog.models import Team, Action +from posthog.hogql_queries.utils.query_date_range import QueryDateRange +from posthog.models.filters.mixins.utils import cached_property +from posthog.schema import LifecycleQuery, ActionsNode, EventsNode, LifecycleQueryResponse + + +class LifecycleQueryRunner(QueryRunner): + query: LifecycleQuery + + def __init__(self, query: LifecycleQuery | Dict[str, Any], team: Team, timings: Optional[HogQLTimings] = None): + super().__init__(team, timings) + if isinstance(query, LifecycleQuery): + self.query = query + else: + self.query = LifecycleQuery.parse_obj(query) + + def to_query(self) -> ast.SelectQuery: + placeholders = { + **self.query_date_range.to_placeholders(), + "events_query": self.events_query, + "periods_query": self.periods_query, + } + with self.timings.measure("lifecycle_query"): + lifecycle_query = parse_select( + """ + SELECT groupArray(start_of_period) AS date, + groupArray(counts) AS total, + status + FROM ( + SELECT + status = 'dormant' ? negate(sum(counts)) : negate(negate(sum(counts))) as counts, + start_of_period, + status + FROM ( + SELECT + periods.start_of_period as start_of_period, + 0 AS counts, + status + FROM {periods_query} as periods + CROSS JOIN ( + SELECT status + FROM (SELECT 1) + ARRAY JOIN ['new', 'returning', 'resurrecting', 'dormant'] as status + ) as sec + ORDER BY status, start_of_period + UNION ALL + SELECT + start_of_period, count(DISTINCT person_id) AS counts, status + FROM {events_query} + GROUP BY start_of_period, status + ) + WHERE start_of_period <= dateTrunc({interval}, {date_to}) + AND start_of_period >= dateTrunc({interval}, {date_from}) + GROUP BY start_of_period, status + ORDER BY start_of_period ASC + ) + GROUP BY status + """, + placeholders, + timings=self.timings, + ) + return lifecycle_query + + def to_persons_query(self) -> str: + # TODO: add support for selecting and filtering by breakdowns + with self.timings.measure("persons_query"): + return parse_select( + """ + SELECT + person_id, start_of_period as breakdown_1, status as breakdown_2 + FROM + {events_query} + """, + placeholders={"events_query": self.events_query}, + ) + + def run(self) -> LifecycleQueryResponse: + response = execute_hogql_query( + query_type="LifecycleQuery", + query=self.to_query(), + team=self.team, + timings=self.timings, + ) + + # TODO: can we move the data conversion part into the query as well? It would make it easier to swap + # e.g. the LifecycleQuery with HogQLQuery, while keeping the chart logic the same. + + # ensure that the items are in a deterministic order + order = {"new": 1, "returning": 2, "resurrecting": 3, "dormant": 4} + results = sorted(response.results, key=lambda result: order.get(result[2], 5)) + + res = [] + for val in results: + counts = val[1] + labels = [ + item.strftime("%-d-%b-%Y{}".format(" %H:%M" if self.query_date_range.interval_name == "hour" else "")) + for item in val[0] + ] + days = [ + item.strftime("%Y-%m-%d{}".format(" %H:%M:%S" if self.query_date_range.interval_name == "hour" else "")) + for item in val[0] + ] + + label = "{} - {}".format("", val[2]) # entity.name + additional_values = {"label": label, "status": val[2]} + res.append( + { + "data": [float(c) for c in counts], + "count": float(sum(counts)), + "labels": labels, + "days": days, + **additional_values, + } + ) + + return LifecycleQueryResponse(result=res, timings=response.timings) + + @cached_property + def query_date_range(self): + return QueryDateRange( + date_range=self.query.dateRange, team=self.team, interval=self.query.interval, now=datetime.now() + ) + + @cached_property + def event_filter(self) -> ast.Expr: + event_filters: List[ast.Expr] = [] + with self.timings.measure("date_range"): + event_filters.append( + parse_expr( + "timestamp >= dateTrunc({interval}, {date_from}) - {one_interval}", + { + "interval": self.query_date_range.interval_period_string_as_hogql_constant(), + "one_interval": self.query_date_range.one_interval_period(), + "date_from": self.query_date_range.date_from_as_hogql(), + }, + timings=self.timings, + ) + ) + event_filters.append( + parse_expr( + "timestamp < dateTrunc({interval}, {date_to}) + {one_interval}", + { + "interval": self.query_date_range.interval_period_string_as_hogql_constant(), + "one_interval": self.query_date_range.one_interval_period(), + "date_to": self.query_date_range.date_to_as_hogql(), + }, + timings=self.timings, + ) + ) + with self.timings.measure("properties"): + if self.query.properties is not None and self.query.properties != []: + event_filters.append(property_to_expr(self.query.properties, self.team)) + with self.timings.measure("series_filters"): + for serie in self.query.series or []: + if isinstance(serie, ActionsNode): + action = Action.objects.get(pk=int(serie.id), team=self.team) + event_filters.append(action_to_expr(action)) + elif isinstance(serie, EventsNode): + if serie.event is not None: + event_filters.append( + ast.CompareOperation( + op=ast.CompareOperationOp.Eq, + left=ast.Field(chain=["event"]), + right=ast.Constant(value=str(serie.event)), + ) + ) + else: + raise ValueError(f"Invalid serie kind: {serie.kind}") + if serie.properties is not None and serie.properties != []: + event_filters.append(property_to_expr(serie.properties, self.team)) + with self.timings.measure("test_account_filters"): + if ( + self.query.filterTestAccounts + and isinstance(self.team.test_account_filters, list) + and len(self.team.test_account_filters) > 0 + ): + for property in self.team.test_account_filters: + event_filters.append(property_to_expr(property, self.team)) + + if len(event_filters) == 0: + return ast.Constant(value=True) + elif len(event_filters) == 1: + return event_filters[0] + else: + return ast.And(exprs=event_filters) + + @cached_property + def events_query(self): + with self.timings.measure("events_query"): + events_query = parse_select( + """ + SELECT + events.person.id as person_id, + min(events.person.created_at) AS created_at, + arraySort(groupUniqArray(dateTrunc({interval}, events.timestamp))) AS all_activity, + arrayPopBack(arrayPushFront(all_activity, dateTrunc({interval}, created_at))) as previous_activity, + arrayPopFront(arrayPushBack(all_activity, dateTrunc({interval}, toDateTime('1970-01-01 00:00:00')))) as following_activity, + arrayMap((previous, current, index) -> (previous = current ? 'new' : ((current - {one_interval_period}) = previous AND index != 1) ? 'returning' : 'resurrecting'), previous_activity, all_activity, arrayEnumerate(all_activity)) as initial_status, + arrayMap((current, next) -> (current + {one_interval_period} = next ? '' : 'dormant'), all_activity, following_activity) as dormant_status, + arrayMap(x -> x + {one_interval_period}, arrayFilter((current, is_dormant) -> is_dormant = 'dormant', all_activity, dormant_status)) as dormant_periods, + arrayMap(x -> 'dormant', dormant_periods) as dormant_label, + arrayConcat(arrayZip(all_activity, initial_status), arrayZip(dormant_periods, dormant_label)) as temp_concat, + arrayJoin(temp_concat) as period_status_pairs, + period_status_pairs.1 as start_of_period, + period_status_pairs.2 as status + FROM events + WHERE {event_filter} + GROUP BY person_id + """, + placeholders={ + **self.query_date_range.to_placeholders(), + "event_filter": self.event_filter, + }, + timings=self.timings, + ) + sampling_factor = self.query.samplingFactor + if sampling_factor is not None and isinstance(sampling_factor, float): + sample_expr = ast.SampleExpr(sample_value=ast.RatioExpr(left=ast.Constant(value=sampling_factor))) + events_query.select_from.sample = sample_expr + + return events_query + + @cached_property + def periods_query(self): + with self.timings.measure("periods_query"): + periods_query = parse_select( + """ + SELECT ( + dateTrunc({interval}, {date_to}) - {number_interval_period} + ) AS start_of_period + FROM numbers( + dateDiff( + {interval}, + dateTrunc({interval}, {date_from}), + dateTrunc({interval}, {date_to} + {one_interval_period}) + ) + ) + """, + placeholders=self.query_date_range.to_placeholders(), + timings=self.timings, + ) + return periods_query diff --git a/posthog/hogql_queries/query_runner.py b/posthog/hogql_queries/query_runner.py new file mode 100644 index 0000000000000..b8a3a10a4aa7b --- /dev/null +++ b/posthog/hogql_queries/query_runner.py @@ -0,0 +1,37 @@ +from typing import Optional + +from pydantic import BaseModel + +from posthog.hogql import ast +from posthog.hogql.context import HogQLContext +from posthog.hogql.printer import print_ast +from posthog.hogql.timings import HogQLTimings +from posthog.models import Team + + +class QueryRunner: + query: BaseModel + team: Team + timings: HogQLTimings + + def __init__(self, team: Team, timings: Optional[HogQLTimings] = None): + self.team = team + self.timings = timings or HogQLTimings() + + def run(self) -> BaseModel: + raise NotImplementedError() + + def to_query(self) -> ast.SelectQuery: + raise NotImplementedError() + + def to_persons_query(self) -> str: + # TODO: add support for selecting and filtering by breakdowns + raise NotImplementedError() + + def to_hogql(self) -> str: + with self.timings.measure("to_hogql"): + return print_ast( + self.to_query(), + HogQLContext(team_id=self.team.pk, enable_select_queries=True, timings=self.timings), + "hogql", + ) diff --git a/posthog/hogql_queries/test/test_lifecycle_hogql_query.py b/posthog/hogql_queries/test/test_lifecycle_hogql_query.py index fb35ace5f5baa..d9996640f64c3 100644 --- a/posthog/hogql_queries/test/test_lifecycle_hogql_query.py +++ b/posthog/hogql_queries/test/test_lifecycle_hogql_query.py @@ -1,7 +1,10 @@ +from datetime import datetime + from freezegun import freeze_time +from posthog.hogql.query import execute_hogql_query +from posthog.hogql_queries.lifecycle_query_runner import LifecycleQueryRunner from posthog.models.utils import UUIDT -from posthog.hogql_queries.lifecycle_hogql_query import run_lifecycle_query from posthog.schema import DateRange, IntervalType, LifecycleQuery, EventsNode from posthog.test.base import APIBaseTest, ClickhouseTestMixin, _create_event, _create_person, flush_persons_and_events @@ -63,12 +66,29 @@ def _create_test_events(self): ] ) - def _run_lifecycle_query(self, date_from, date_to, interval): + def _create_query_runner(self, date_from, date_to, interval) -> LifecycleQueryRunner: series = [EventsNode(event="$pageview")] query = LifecycleQuery( dateRange=DateRange(date_from=date_from, date_to=date_to), interval=interval, series=series ) - return run_lifecycle_query(team=self.team, query=query) + return LifecycleQueryRunner(team=self.team, query=query) + + def _run_events_query(self, date_from, date_to, interval): + events_query = self._create_query_runner(date_from, date_to, interval).events_query + return execute_hogql_query( + team=self.team, + query=""" + SELECT + start_of_period, count(DISTINCT person_id) AS counts, status + FROM {events_query} + GROUP BY start_of_period, status + """, + placeholders={"events_query": events_query}, + query_type="LifecycleEventsQuery", + ) + + def _run_lifecycle_query(self, date_from, date_to, interval): + return self._create_query_runner(date_from, date_to, interval).run() def test_lifecycle_query_whole_range(self): self._create_test_events() @@ -262,3 +282,52 @@ def test_lifecycle_query_whole_range(self): ], response.result, ) + + def test_events_query_whole_range(self): + self._create_test_events() + + date_from = "2020-01-09" + date_to = "2020-01-19" + + response = self._run_events_query(date_from, date_to, IntervalType.day) + + self.assertEqual( + { + (datetime(2020, 1, 9, 0, 0), 1, "new"), # p2 + (datetime(2020, 1, 10, 0, 0), 1, "dormant"), # p2 + (datetime(2020, 1, 11, 0, 0), 1, "new"), # p1 + (datetime(2020, 1, 12, 0, 0), 1, "new"), # p3 + (datetime(2020, 1, 12, 0, 0), 1, "resurrecting"), # p2 + (datetime(2020, 1, 12, 0, 0), 1, "returning"), # p1 + (datetime(2020, 1, 13, 0, 0), 1, "returning"), # p1 + (datetime(2020, 1, 13, 0, 0), 2, "dormant"), # p2, p3 + (datetime(2020, 1, 14, 0, 0), 1, "dormant"), # p1 + (datetime(2020, 1, 15, 0, 0), 1, "resurrecting"), # p1 + (datetime(2020, 1, 15, 0, 0), 1, "new"), # p4 + (datetime(2020, 1, 16, 0, 0), 2, "dormant"), # p1, p4 + (datetime(2020, 1, 17, 0, 0), 1, "resurrecting"), # p1 + (datetime(2020, 1, 18, 0, 0), 1, "dormant"), # p1 + (datetime(2020, 1, 19, 0, 0), 1, "resurrecting"), # p1 + (datetime(2020, 1, 20, 0, 0), 1, "dormant"), # p1 + }, + set(response.results), + ) + + def test_events_query_partial_range(self): + self._create_test_events() + date_from = "2020-01-12" + date_to = "2020-01-14" + response = self._run_events_query(date_from, date_to, IntervalType.day) + + self.assertEqual( + { + (datetime(2020, 1, 11, 0, 0), 1, "new"), # p1 + (datetime(2020, 1, 12, 0, 0), 1, "new"), # p3 + (datetime(2020, 1, 12, 0, 0), 1, "resurrecting"), # p2 + (datetime(2020, 1, 12, 0, 0), 1, "returning"), # p1 + (datetime(2020, 1, 13, 0, 0), 1, "returning"), # p1 + (datetime(2020, 1, 13, 0, 0), 2, "dormant"), # p2, p3 + (datetime(2020, 1, 14, 0, 0), 1, "dormant"), # p1 + }, + set(response.results), + ) diff --git a/posthog/hogql_queries/query_date_range.py b/posthog/hogql_queries/utils/query_date_range.py similarity index 91% rename from posthog/hogql_queries/query_date_range.py rename to posthog/hogql_queries/utils/query_date_range.py index 35695b37181c5..a9c86614cac5f 100644 --- a/posthog/hogql_queries/query_date_range.py +++ b/posthog/hogql_queries/utils/query_date_range.py @@ -1,7 +1,7 @@ import re from functools import cached_property from datetime import datetime -from typing import Optional +from typing import Optional, Dict from zoneinfo import ZoneInfo from dateutil.relativedelta import relativedelta @@ -113,3 +113,12 @@ def number_interval_periods(self) -> ast.Expr: def interval_period_string_as_hogql_constant(self) -> ast.Expr: return ast.Constant(value=self.interval_name) + + def to_placeholders(self) -> Dict[str, ast.Expr]: + return { + "interval": self.interval_period_string_as_hogql_constant(), + "one_interval_period": self.one_interval_period(), + "number_interval_period": self.number_interval_periods(), + "date_from": self.date_from_as_hogql(), + "date_to": self.date_to_as_hogql(), + } diff --git a/posthog/hogql_queries/test/test_query_date_range.py b/posthog/hogql_queries/utils/test/test_query_date_range.py similarity index 97% rename from posthog/hogql_queries/test/test_query_date_range.py rename to posthog/hogql_queries/utils/test/test_query_date_range.py index 42787912887b2..0ab8467567a50 100644 --- a/posthog/hogql_queries/test/test_query_date_range.py +++ b/posthog/hogql_queries/utils/test/test_query_date_range.py @@ -1,6 +1,6 @@ from dateutil import parser -from posthog.hogql_queries.query_date_range import QueryDateRange +from posthog.hogql_queries.utils.query_date_range import QueryDateRange from posthog.schema import DateRange, IntervalType from posthog.test.base import APIBaseTest