Skip to content

Commit

Permalink
Added support for histogram breakdowns
Browse files Browse the repository at this point in the history
  • Loading branch information
Gilbert09 committed Oct 12, 2023
1 parent cb77cdf commit 4b57daf
Show file tree
Hide file tree
Showing 6 changed files with 200 additions and 43 deletions.
2 changes: 1 addition & 1 deletion posthog/hogql/functions/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -674,7 +674,7 @@ class HogQLFunctionMeta:
"medianBFloat16If": HogQLFunctionMeta("medianBFloat16If", 2, 2, aggregate=True),
"quantile": HogQLFunctionMeta("quantile", 1, 1, min_params=1, max_params=1, aggregate=True),
"quantileIf": HogQLFunctionMeta("quantileIf", 2, 2, min_params=1, max_params=1, aggregate=True),
"quantiles": HogQLFunctionMeta("quantiles", 1, 1, min_params=1, max_params=1, aggregate=True),
"quantiles": HogQLFunctionMeta("quantiles", 1, None, aggregate=True),
"quantilesIf": HogQLFunctionMeta("quantilesIf", 2, 2, min_params=1, max_params=1, aggregate=True),
# "quantileExact": HogQLFunctionMeta("quantileExact", 1, 1, aggregate=True),
# "quantileExactIf": HogQLFunctionMeta("quantileExactIf", 2, 2, aggregate=True),
Expand Down
118 changes: 118 additions & 0 deletions posthog/hogql_queries/insights/trends/breakdown.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
from typing import List, Tuple
from posthog.hogql import ast
from posthog.hogql_queries.insights.trends.breakdown_values import BreakdownValues
from posthog.hogql_queries.insights.trends.utils import series_event_name
from posthog.hogql_queries.utils.query_date_range import QueryDateRange
from posthog.models.filters.mixins.utils import cached_property
from posthog.models.team.team import Team
from posthog.schema import ActionsNode, EventsNode, TrendsQuery


class Breakdown:
query: TrendsQuery
team: Team
series: EventsNode | ActionsNode
query_date_range: QueryDateRange

def __init__(
self, team: Team, query: TrendsQuery, series: EventsNode | ActionsNode, query_date_range: QueryDateRange
):
self.team = team
self.query = query
self.series = series
self.query_date_range = query_date_range

@cached_property
def enabled(self):
return self.query.breakdown is not None and self.query.breakdown.breakdown is not None

@cached_property
def is_histogram_breakdown(self):
return self.enabled and self.query.breakdown.breakdown_histogram_bin_count is not None

def placeholders(self):
values = self._get_breakdown_buckets_ast() if self.is_histogram_breakdown else self._get_breakdown_values_ast

return {"cross_join_breakdown_values": ast.Alias(alias="breakdown_value", expr=values)}

def events_select(self):
if self.is_histogram_breakdown:
return ast.Alias(alias="breakdown_value", expr=self._get_breakdown_histogram_multi_if())

return ast.Alias(alias="breakdown_value", expr=ast.Field(chain=["properties", self.query.breakdown.breakdown]))

def events_where_filter(self):
return ast.CompareOperation(
left=ast.Field(chain=["properties", self.query.breakdown.breakdown]),
op=ast.CompareOperationOp.In,
right=self._get_breakdown_values_ast,
)

def _get_breakdown_buckets_ast(self) -> ast.Array:
buckets = self._get_breakdown_histogram_buckets()
values = list(map(lambda t: f"[{t[0]},{t[1]}]", buckets))
values.append('["",""]')

return ast.Array(exprs=list(map(lambda v: ast.Constant(value=v), values)))

@cached_property
def _get_breakdown_values_ast(self) -> ast.Array:
return ast.Array(exprs=list(map(lambda v: ast.Constant(value=v), self._get_breakdown_values)))

@cached_property
def _get_breakdown_values(self) -> ast.Array:
breakdown = BreakdownValues(
team=self.team,
event_name=series_event_name(self.series),
breakdown_field=self.query.breakdown.breakdown,
query_date_range=self.query_date_range,
histogram_bin_count=self.query.breakdown.breakdown_histogram_bin_count,
)
return breakdown.get_breakdown_values()

def _get_breakdown_histogram_buckets(self) -> List[Tuple[float, float]]:
buckets = []
values = self._get_breakdown_values

if len(values) == 1:
values = [values[0], values[0]]

for i in range(len(values) - 1):
last_value = i == len(values) - 2
lower_bound = values[i]
upper_bound = values[i + 1] + 0.01 if last_value else values[i + 1]
buckets.append((lower_bound, upper_bound))

return buckets

def _get_breakdown_histogram_multi_if(self) -> ast.Expr:
multi_if_exprs: List[ast.Expr] = []

buckets = self._get_breakdown_histogram_buckets()

for lower_bound, upper_bound in buckets:

multi_if_exprs.extend(
[
ast.And(
exprs=[
ast.CompareOperation(
left=ast.Field(chain=["properties", self.query.breakdown.breakdown]),
op=ast.CompareOperationOp.GtEq,
right=ast.Constant(value=lower_bound),
),
ast.CompareOperation(
left=ast.Field(chain=["properties", self.query.breakdown.breakdown]),
op=ast.CompareOperationOp.Lt,
right=ast.Constant(value=upper_bound),
),
]
),
ast.Constant(value=f"[{lower_bound},{upper_bound}]"),
]
)

# `else` block of the multi-if
multi_if_exprs.append(ast.Constant(value='["",""]'))

return ast.Call(name="multiIf", args=multi_if_exprs)
35 changes: 30 additions & 5 deletions posthog/hogql_queries/insights/trends/breakdown_values.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
from typing import List
from typing import List, Optional
from posthog.hogql import ast
from posthog.hogql.parser import parse_expr, parse_select
from posthog.hogql.query import execute_hogql_query
Expand All @@ -11,12 +11,21 @@ class BreakdownValues:
event_name: str
breakdown_field: str
query_date_range: QueryDateRange
histogram_bin_count: Optional[int]

def __init__(self, team: Team, event_name: str, breakdown_field: str, query_date_range: QueryDateRange):
def __init__(
self,
team: Team,
event_name: str,
breakdown_field: str,
query_date_range: QueryDateRange,
histogram_bin_count: Optional[float] = None,
):
self.team = team
self.event_name = event_name
self.breakdown_field = breakdown_field
self.query_date_range = query_date_range
self.histogram_bin_count = int(histogram_bin_count) if histogram_bin_count is not None else None

def get_breakdown_values(self) -> List[str]:
select_field = ast.Alias(alias="value", expr=ast.Field(chain=["properties", self.breakdown_field]))
Expand All @@ -36,8 +45,6 @@ def get_breakdown_values(self) -> List[str]:
ORDER BY
count DESC,
value DESC
LIMIT 25
OFFSET 0
)
""",
placeholders={
Expand All @@ -46,14 +53,17 @@ def get_breakdown_values(self) -> List[str]:
},
)

if self.histogram_bin_count is not None:
expr = self._to_bucketing_expression()
query.select = [expr]

response = execute_hogql_query(
query_type="TrendsQueryBreakdownValues",
query=query,
team=self.team,
)

values = response.results[0][0]

return values

def _where_filter(self) -> ast.Expr:
Expand All @@ -77,3 +87,18 @@ def _where_filter(self) -> ast.Expr:
filters.append(parse_expr("event = {event}", placeholders={"event": ast.Constant(value=self.event_name)}))

return ast.And(exprs=filters)

def _to_bucketing_expression(self) -> ast.Expr:
assert isinstance(self.histogram_bin_count, int)

if self.histogram_bin_count <= 1:
qunatile_expression = "quantiles(0,1)(value)"
else:
quantiles = []
bin_size = 1.0 / self.histogram_bin_count
for i in range(self.histogram_bin_count + 1):
quantiles.append(i * bin_size)

qunatile_expression = f"quantiles({','.join([f'{quantile:.2f}' for quantile in quantiles])})(value)"

return parse_expr(f"arrayCompact(arrayMap(x -> floor(x, 2), {qunatile_expression}))")
52 changes: 17 additions & 35 deletions posthog/hogql_queries/insights/trends/query_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,8 @@
from posthog.hogql import ast
from posthog.hogql.parser import parse_expr, parse_select
from posthog.hogql.property import property_to_expr
from posthog.hogql_queries.insights.trends.breakdown_values import BreakdownValues
from posthog.hogql_queries.insights.trends.breakdown import Breakdown
from posthog.hogql_queries.insights.trends.utils import series_event_name
from posthog.hogql_queries.utils.query_date_range import QueryDateRange
from posthog.models.filters.mixins.utils import cached_property
from posthog.models.team.team import Team
Expand Down Expand Up @@ -35,7 +36,7 @@ def build_query(self) -> ast.SelectUnionQuery:
return full_query

def _get_date_subqueries(self) -> List[ast.SelectQuery]:
if not self._breakdown_enabled():
if not self._breakdown.enabled:
return [
parse_select(
"""
Expand Down Expand Up @@ -83,15 +84,15 @@ def _get_date_subqueries(self) -> List[ast.SelectQuery]:
CROSS JOIN (
SELECT breakdown_value
FROM (
SELECT {breakdown_values}
SELECT {cross_join_breakdown_values}
)
ARRAY JOIN breakdown_value as breakdown_value
) as sec
ORDER BY breakdown_value, day_start
""",
placeholders={
**self.query_date_range.to_placeholders(),
"breakdown_values": ast.Alias(alias="breakdown_value", expr=self._get_breakdown_values_ast),
**self._breakdown.placeholders(),
},
)
]
Expand All @@ -115,10 +116,8 @@ def _get_events_subquery(self) -> ast.SelectQuery:
},
)

if self._breakdown_enabled():
query.select.append(
ast.Alias(alias="breakdown_value", expr=ast.Field(chain=["properties", self.query.breakdown.breakdown]))
)
if self._breakdown.enabled:
query.select.append(self._breakdown.events_select())
query.group_by.append(ast.Field(chain=["breakdown_value"]))

return query
Expand All @@ -134,7 +133,7 @@ def _outer_select_query(self, inner_query: ast.SelectQuery) -> ast.SelectQuery:
placeholders={"inner_query": inner_query},
)

if self._breakdown_enabled():
if self._breakdown.enabled:
query.select.append(ast.Field(chain=["breakdown_value"]))
query.group_by = [ast.Field(chain=["breakdown_value"])]
query.order_by = [ast.OrderExpr(expr=ast.Field(chain=["breakdown_value"]), order="ASC")]
Expand All @@ -154,7 +153,7 @@ def _inner_select_query(self, inner_query: ast.SelectUnionQuery) -> ast.SelectQu
placeholders={"inner_query": inner_query},
)

if self._breakdown_enabled():
if self._breakdown.enabled:
query.select.append(ast.Field(chain=["breakdown_value"]))
query.group_by.append(ast.Field(chain=["breakdown_value"]))
query.order_by.append(ast.OrderExpr(expr=ast.Field(chain=["breakdown_value"]), order="ASC"))
Expand All @@ -180,9 +179,11 @@ def _events_filter(self) -> ast.Expr:
)

# Series
if self._series_event_name() is not None:
if series_event_name(self.series) is not None:
filters.append(
parse_expr("event = {event}", placeholders={"event": ast.Constant(value=self._series_event_name())})
parse_expr(
"event = {event}", placeholders={"event": ast.Constant(value=series_event_name(self.series))}
)
)

# Filter Test Accounts
Expand All @@ -203,14 +204,8 @@ def _events_filter(self) -> ast.Expr:
filters.append(property_to_expr(series.properties, self.team))

# Breakdown
if self._breakdown_enabled():
filters.append(
ast.CompareOperation(
left=ast.Field(chain=["properties", self.query.breakdown.breakdown]),
op=ast.CompareOperationOp.In,
right=self._get_breakdown_values_ast,
)
)
if self._breakdown.enabled and not self._breakdown.is_histogram_breakdown:
filters.append(self._breakdown.events_where_filter())

if len(filters) == 0:
return ast.Constant(value=True)
Expand All @@ -232,19 +227,6 @@ def _sample_value(self) -> str:

return f"SAMPLE {self.query.samplingFactor}"

def _series_event_name(self) -> str | None:
if isinstance(self.series, EventsNode):
return self.series.event
return None

def _breakdown_enabled(self):
return self.query.breakdown is not None and self.query.breakdown.breakdown is not None

@cached_property
def _get_breakdown_values_ast(self) -> ast.Array:
breakdown = BreakdownValues(
self.team, self._series_event_name(), self.query.breakdown.breakdown, self.query_date_range
)
breakdown_values = breakdown.get_breakdown_values()

return ast.Array(exprs=list(map(lambda v: ast.Constant(value=v), breakdown_values)))
def _breakdown(self):
return Breakdown(team=self.team, query=self.query, series=self.series, query_date_range=self.query_date_range)
29 changes: 27 additions & 2 deletions posthog/hogql_queries/insights/trends/trends_query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
from posthog.hogql_queries.utils.query_previous_period_date_range import QueryPreviousPeriodDateRange
from posthog.models import Team
from posthog.models.filters.mixins.utils import cached_property
from posthog.models.property_definition import PropertyDefinition
from posthog.schema import ActionsNode, EventsNode, HogQLQueryResponse, TrendsQuery, TrendsQueryResponse


Expand Down Expand Up @@ -131,8 +132,13 @@ def build_series_response(self, response: HogQLQueryResponse, series: SeriesWith

# Modifications for when breakdowns are active
if self.query.breakdown is not None and self.query.breakdown.breakdown is not None:
series_object["breakdown_value"] = val[2]
series_object["label"] = "{} - {}".format(series_object["label"], val[2])
if self._is_breakdown_field_boolean():
remapped_label = self._convert_boolean(val[2])
series_object["label"] = "{} - {}".format(series_object["label"], remapped_label)
series_object["breakdown_value"] = remapped_label
else:
series_object["label"] = "{} - {}".format(series_object["label"], val[2])
series_object["breakdown_value"] = val[2]

res.append(series_object)
return res
Expand Down Expand Up @@ -191,3 +197,22 @@ def apply_formula(self, formula: str, results: List[Dict[str, Any]]) -> List[Dic
new_result["label"] = f"Formula ({formula})"

return [new_result]

def _is_breakdown_field_boolean(self):
field_type = self._event_properties.get(self.query.breakdown.breakdown)
return field_type == "Boolean"

def _convert_boolean(self, value: any):
bool_map = {1: "true", 0: "false", "": ""}
return bool_map.get(value) or value

@cached_property
def _event_properties(self):
event_property_values = PropertyDefinition.objects.filter(
team_id=self.team.pk,
type__in=[None, PropertyDefinition.Type.EVENT],
).values_list("name", "property_type")

event_properties = {name: property_type for name, property_type in event_property_values if property_type}

return event_properties
7 changes: 7 additions & 0 deletions posthog/hogql_queries/insights/trends/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from posthog.schema import ActionsNode, EventsNode


def series_event_name(series: EventsNode | ActionsNode) -> str | None:
if isinstance(series, EventsNode):
return series.event
return None

0 comments on commit 4b57daf

Please sign in to comment.