Skip to content

Commit

Permalink
fix(hogql): Optimize querying for date frame in trends actors query (#…
Browse files Browse the repository at this point in the history
  • Loading branch information
webjunkie authored Mar 21, 2024
1 parent 3f93d5f commit c491bf2
Show file tree
Hide file tree
Showing 5 changed files with 59 additions and 58 deletions.
4 changes: 2 additions & 2 deletions mypy-baseline.txt
Original file line number Diff line number Diff line change
Expand Up @@ -377,11 +377,11 @@ posthog/hogql_queries/insights/trends/trends_query_runner.py:0: error: Signature
posthog/hogql_queries/insights/trends/trends_query_runner.py:0: note: Superclass:
posthog/hogql_queries/insights/trends/trends_query_runner.py:0: note: def to_actors_query(self) -> SelectQuery | SelectUnionQuery
posthog/hogql_queries/insights/trends/trends_query_runner.py:0: note: Subclass:
posthog/hogql_queries/insights/trends/trends_query_runner.py:0: note: def to_actors_query(self, time_frame: str | int | None, series_index: int, breakdown_value: str | int | None = ..., compare: Compare | None = ...) -> SelectQuery | SelectUnionQuery
posthog/hogql_queries/insights/trends/trends_query_runner.py:0: note: def to_actors_query(self, time_frame: str | None, series_index: int, breakdown_value: str | int | None = ..., compare: Compare | None = ...) -> SelectQuery | SelectUnionQuery
posthog/hogql_queries/insights/trends/trends_query_runner.py:0: note: Superclass:
posthog/hogql_queries/insights/trends/trends_query_runner.py:0: note: def to_actors_query(self) -> SelectQuery | SelectUnionQuery
posthog/hogql_queries/insights/trends/trends_query_runner.py:0: note: Subclass:
posthog/hogql_queries/insights/trends/trends_query_runner.py:0: note: def to_actors_query(self, time_frame: str | int | None, series_index: int, breakdown_value: str | int | None = ..., compare: Compare | None = ...) -> SelectQuery | SelectUnionQuery
posthog/hogql_queries/insights/trends/trends_query_runner.py:0: note: def to_actors_query(self, time_frame: str | None, series_index: int, breakdown_value: str | int | None = ..., compare: Compare | None = ...) -> SelectQuery | SelectUnionQuery
posthog/hogql_queries/insights/trends/trends_query_runner.py:0: error: Statement is unreachable [unreachable]
posthog/hogql_queries/insights/trends/trends_query_runner.py:0: error: Argument 1 to "_event_property" of "TrendsQueryRunner" has incompatible type "str | float | list[str | float] | None"; expected "str" [arg-type]
posthog/hogql_queries/insights/retention_query_runner.py:0: error: Incompatible types in assignment (expression has type "Expr", variable has type "Call") [assignment]
Expand Down
4 changes: 2 additions & 2 deletions posthog/hogql_queries/insights/insight_actors_query_runner.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
from datetime import timedelta
from typing import cast
from typing import cast, Optional

from posthog.hogql import ast
from posthog.hogql.query import execute_hogql_query
Expand Down Expand Up @@ -37,7 +37,7 @@ def to_query(self) -> ast.SelectQuery | ast.SelectUnionQuery:
trends_runner = cast(TrendsQueryRunner, self.source_runner)
query = cast(InsightActorsQuery, self.query)
return trends_runner.to_actors_query(
time_frame=query.day,
time_frame=cast(Optional[str], query.day), # Other runner accept day as int, but not this one
series_index=query.series or 0,
breakdown_value=query.breakdown,
compare=query.compare,
Expand Down
60 changes: 26 additions & 34 deletions posthog/hogql_queries/insights/trends/trends_query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,7 +68,7 @@ def build_query(self) -> ast.SelectQuery | ast.SelectUnionQuery:
return full_query

def build_actors_query(
self, time_frame: Optional[str | int] = None, breakdown_filter: Optional[str | int] = None
self, time_frame: Optional[str] = None, breakdown_filter: Optional[str | int] = None
) -> ast.SelectQuery | ast.SelectUnionQuery:
breakdown = self._breakdown(is_actors_query=True, breakdown_values_override=breakdown_filter)

Expand Down Expand Up @@ -169,7 +169,7 @@ def _get_events_subquery(
is_actors_query: bool,
breakdown: Breakdown,
breakdown_values_override: Optional[str | int] = None,
actors_query_time_frame: Optional[str | int] = None,
actors_query_time_frame: Optional[str] = None,
) -> ast.SelectQuery:
day_start = ast.Alias(
alias="day_start",
Expand All @@ -186,31 +186,16 @@ def _get_events_subquery(
actors_query_time_frame=actors_query_time_frame,
)

default_query = cast(
ast.SelectQuery,
parse_select(
"""
SELECT
{aggregation_operation} AS total
FROM {table} AS e
WHERE {events_filter}
"""
if isinstance(self.series, DataWarehouseNode)
else """
SELECT
{aggregation_operation} AS total
FROM {table} AS e
SAMPLE {sample}
WHERE {events_filter}
""",
placeholders={
"table": self._table_expr,
"events_filter": events_filter,
"aggregation_operation": self._aggregation_operation.select_aggregation(),
"sample": self._sample_value(),
},
),
default_query = ast.SelectQuery(
select=[ast.Alias(alias="total", expr=self._aggregation_operation.select_aggregation())],
select_from=ast.JoinExpr(table=self._table_expr, alias="e"),
where=events_filter,
)
if not isinstance(self.series, DataWarehouseNode):
assert default_query.select_from is not None
default_query.select_from.sample = ast.SampleExpr(
sample_value=self._sample_value(),
)

default_query.group_by = []

Expand Down Expand Up @@ -463,20 +448,27 @@ def _events_filter(
breakdown: Breakdown | None,
ignore_breakdowns: bool = False,
breakdown_values_override: Optional[str | int] = None,
actors_query_time_frame: Optional[str | int] = None,
actors_query_time_frame: Optional[str] = None,
) -> ast.Expr:
series = self.series
filters: List[ast.Expr] = []

# Dates
if is_actors_query and actors_query_time_frame is not None:
to_start_of_time_frame = f"toStartOf{self.query_date_range.interval_name.capitalize()}"
filters.append(
ast.CompareOperation(
left=ast.Call(name=to_start_of_time_frame, args=[ast.Field(chain=["timestamp"])]),
op=ast.CompareOperationOp.Eq,
right=ast.Call(name="toDateTime", args=[ast.Constant(value=actors_query_time_frame)]),
)
actors_from, actors_to = self.query_date_range.interval_bounds_from_str(actors_query_time_frame)
filters.extend(
[
ast.CompareOperation(
left=ast.Field(chain=["timestamp"]),
op=ast.CompareOperationOp.GtEq,
right=ast.Constant(value=actors_from),
),
ast.CompareOperation(
left=ast.Field(chain=["timestamp"]),
op=ast.CompareOperationOp.Lt,
right=ast.Constant(value=actors_to),
),
]
)
elif not self._aggregation_operation.requires_query_orchestration():
filters.extend(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -142,7 +142,7 @@ def to_queries(self) -> List[ast.SelectQuery | ast.SelectUnionQuery]:

def to_actors_query(
self,
time_frame: Optional[str | int],
time_frame: Optional[str],
series_index: int,
breakdown_value: Optional[str | int] = None,
compare: Optional[Compare] = None,
Expand Down
47 changes: 28 additions & 19 deletions posthog/hogql_queries/utils/query_date_range.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
from typing import Literal, Optional, Dict, List
from zoneinfo import ZoneInfo

from dateutil.parser import parse
from dateutil.relativedelta import relativedelta

from posthog.hogql.errors import HogQLException
Expand Down Expand Up @@ -116,36 +117,39 @@ def interval_type(self) -> IntervalType:
def interval_name(self) -> Literal["hour", "day", "week", "month"]:
return self.interval_type.name

def all_values(self) -> List[str]:
start: datetime = self.date_from()
end: datetime = self.date_to()
interval = self.interval_name

if interval == "hour":
start = start.replace(minute=0, second=0, microsecond=0)
elif interval == "day":
start = start.replace(hour=0, minute=0, second=0, microsecond=0)
elif interval == "week":
def align_with_interval(self, start: datetime) -> datetime:
if self.interval_name == "hour":
return start.replace(minute=0, second=0, microsecond=0)
elif self.interval_name == "day":
return start.replace(hour=0, minute=0, second=0, microsecond=0)
elif self.interval_name == "week":
start = start.replace(hour=0, minute=0, second=0, microsecond=0)
week_start_alignment_days = start.isoweekday() % 7
if self._team.week_start_day == WeekStartDay.MONDAY:
week_start_alignment_days = start.weekday()
start -= timedelta(days=week_start_alignment_days)
elif interval == "month":
start = start.replace(day=1, hour=0, minute=0, second=0, microsecond=0)
return start
elif self.interval_name == "month":
return start.replace(day=1, hour=0, minute=0, second=0, microsecond=0)

def interval_relativedelta(self) -> relativedelta:
return relativedelta(
days=1 if self.interval_name == "day" else 0,
weeks=1 if self.interval_name == "week" else 0,
months=1 if self.interval_name == "month" else 0,
hours=1 if self.interval_name == "hour" else 0,
)

def all_values(self) -> List[str]:
start = self.align_with_interval(self.date_from())
end: datetime = self.date_to()
values: List[str] = []
while start <= end:
if interval == "hour":
if self.interval_name == "hour":
values.append(start.strftime("%Y-%m-%d %H:%M:%S"))
else:
values.append(start.strftime("%Y-%m-%d"))
start += relativedelta(
days=1 if interval == "day" else 0,
weeks=1 if interval == "week" else 0,
months=1 if interval == "month" else 0,
hours=1 if interval == "hour" else 0,
)
start += self.interval_relativedelta()
return values

def date_to_as_hogql(self) -> ast.Expr:
Expand Down Expand Up @@ -257,6 +261,11 @@ def to_placeholders(self) -> Dict[str, ast.Expr]:
else self.date_from_as_hogql(),
}

def interval_bounds_from_str(self, time_frame: str) -> tuple[datetime, datetime]:
date_from = parse(time_frame, tzinfos={None: self._team.timezone_info})
date_to = date_from + self.interval_relativedelta()
return date_from, date_to


class QueryDateRangeWithIntervals(QueryDateRange):
def __init__(
Expand Down

0 comments on commit c491bf2

Please sign in to comment.