Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(hogql): time to convert funnel #20259

Merged
merged 11 commits into from
Feb 19, 2024
5 changes: 2 additions & 3 deletions frontend/src/queries/nodes/DataNode/dataNodeLogic.ts
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,6 @@ import {
InsightVizNode,
NodeKind,
PersonsNode,
QueryResponse,
QueryTiming,
} from '~/queries/schema'
import { isActorsQuery, isEventsQuery, isInsightActorsQuery, isInsightQueryNode, isPersonsNode } from '~/queries/utils'
Expand Down Expand Up @@ -218,7 +217,7 @@ export const dataNodeLogic = kea<dataNodeLogicType>([
if (isEventsQuery(props.query) || isActorsQuery(props.query)) {
const newResponse = (await query(values.nextQuery)) ?? null
actions.setElapsedTime(performance.now() - now)
const queryResponse = values.response as QueryResponse
const queryResponse = values.response as EventsQueryResponse | ActorsQueryResponse
return {
...queryResponse,
results: [...(queryResponse?.results ?? []), ...(newResponse?.results ?? [])],
Expand Down Expand Up @@ -394,7 +393,7 @@ export const dataNodeLogic = kea<dataNodeLogicType>([
if ((isEventsQuery(query) || isActorsQuery(query)) && !responseError && !dataLoading) {
if ((response as EventsQueryResponse | ActorsQueryResponse)?.hasMore) {
const sortKey = query.orderBy?.[0] ?? 'timestamp DESC'
const typedResults = (response as QueryResponse)?.results
const typedResults = (response as EventsQueryResponse | ActorsQueryResponse)?.results
if (isEventsQuery(query) && sortKey === 'timestamp DESC') {
const sortColumnIndex = query.select
.map((hql) => removeExpressionComment(hql))
Expand Down
59 changes: 43 additions & 16 deletions frontend/src/queries/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -1631,6 +1631,42 @@
"enum": ["total", "previous"],
"type": "string"
},
"FunnelStepsBreakdownResults": {
"items": {
"items": {
"type": "object"
},
"type": "array"
},
"type": "array"
},
"FunnelStepsResults": {
"items": {
"type": "object"
},
"type": "array"
},
"FunnelTimeToConvertResults": {
"additionalProperties": false,
"properties": {
"average_conversion_time": {
"type": "integer"
},
"bins": {
"items": {
"items": {
"type": "integer"
},
"maxItems": 2,
"minItems": 2,
"type": "array"
},
"type": "array"
}
},
"required": ["average_conversion_time", "bins"],
"type": "object"
},
"FunnelVizType": {
"enum": ["steps", "time_to_convert", "trends"],
"type": "string"
Expand All @@ -1639,7 +1675,7 @@
"additionalProperties": false,
"properties": {
"binCount": {
"$ref": "#/definitions/BinCountValue"
"type": "integer"
},
"breakdownAttributionType": {
"$ref": "#/definitions/BreakdownAttributionType"
Expand Down Expand Up @@ -1822,19 +1858,13 @@
"results": {
"anyOf": [
{
"items": {
"type": "object"
},
"type": "array"
"$ref": "#/definitions/FunnelStepsResults"
},
{
"items": {
"items": {
"type": "object"
},
"type": "array"
},
"type": "array"
"$ref": "#/definitions/FunnelStepsBreakdownResults"
},
{
"$ref": "#/definitions/FunnelTimeToConvertResults"
}
]
},
Expand Down Expand Up @@ -3191,10 +3221,7 @@
"next_allowed_client_refresh": {
"type": "string"
},
"results": {
"items": {},
"type": "array"
},
"results": {},
"timings": {
"items": {
"$ref": "#/definitions/QueryTiming"
Expand Down
14 changes: 12 additions & 2 deletions frontend/src/queries/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -675,6 +675,7 @@ export type FunnelExclusion = FunnelExclusionEventsNode | FunnelExclusionActions
export type FunnelsFilter = {
exclusions?: FunnelExclusion[]
layout?: FunnelsFilterLegacy['layout']
/** @asType integer */
binCount?: FunnelsFilterLegacy['bin_count']
breakdownAttributionType?: FunnelsFilterLegacy['breakdown_attribution_type']
/** @asType integer */
Expand Down Expand Up @@ -705,8 +706,17 @@ export interface FunnelsQuery extends InsightsQueryBase {
breakdownFilter?: BreakdownFilter
}

/** @asType integer */
type BinNumber = number
export type FunnelStepsResults = Record<string, any>[]
export type FunnelStepsBreakdownResults = Record<string, any>[][]
export type FunnelTimeToConvertResults = {
/** @asType integer */
average_conversion_time: number
bins: [BinNumber, BinNumber][]
}
export interface FunnelsQueryResponse extends QueryResponse {
results: Record<string, any>[] | Record<string, any>[][]
results: FunnelStepsResults | FunnelStepsBreakdownResults | FunnelTimeToConvertResults
}

/** `RetentionFilterType` minus everything inherited from `FilterType` */
Expand Down Expand Up @@ -853,7 +863,7 @@ export interface QueryRequest {
}

export interface QueryResponse {
results: unknown[]
results: unknown
timings?: QueryTiming[]
hogql?: string
is_cached?: boolean
Expand Down
2 changes: 2 additions & 0 deletions posthog/hogql/functions/mapping.py
Original file line number Diff line number Diff line change
Expand Up @@ -539,6 +539,8 @@ class HogQLFunctionMeta:
"formatReadableSize": HogQLFunctionMeta("formatReadableSize", 1, 1),
"formatReadableQuantity": HogQLFunctionMeta("formatReadableQuantity", 1, 1),
"formatReadableTimeDelta": HogQLFunctionMeta("formatReadableTimeDelta", 1, 2),
"least": HogQLFunctionMeta("least", 2, 2),
"greatest": HogQLFunctionMeta("greatest", 2, 2),
# time window
"tumble": HogQLFunctionMeta("tumble", 2, 2),
"hop": HogQLFunctionMeta("hop", 3, 3),
Expand Down
1 change: 1 addition & 0 deletions posthog/hogql_queries/insights/funnels/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,3 +2,4 @@
from .funnel import Funnel
from .funnel_strict import FunnelStrict
from .funnel_unordered import FunnelUnordered
from .funnel_time_to_convert import FunnelTimeToConvert
7 changes: 5 additions & 2 deletions posthog/hogql_queries/insights/funnels/base.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
from abc import ABC
from functools import cached_property
from typing import Any, Dict, List, Optional, Tuple, cast
from typing import Any, Dict, List, Optional, Tuple, Union, cast
import uuid
from posthog.clickhouse.materialized_columns.column import ColumnName
from posthog.constants import BREAKDOWN_VALUES_LIMIT
Expand All @@ -26,6 +26,7 @@
BreakdownType,
EventsNode,
FunnelExclusionActionsNode,
FunnelTimeToConvertResults,
StepOrderValue,
)
from posthog.types import EntityNode, ExclusionEntityNode
Expand Down Expand Up @@ -265,7 +266,9 @@ def _get_breakdown_expr(self) -> ast.Expr:
else:
raise ValidationError(detail=f"Unsupported breakdown type: {breakdownType}")

def _format_results(self, results) -> List[Dict[str, Any]] | List[List[Dict[str, Any]]]:
def _format_results(
self, results
) -> Union[FunnelTimeToConvertResults, List[Dict[str, Any]], List[List[Dict[str, Any]]]]:
breakdown = self.context.breakdown

if not results or len(results) == 0:
Expand Down
123 changes: 123 additions & 0 deletions posthog/hogql_queries/insights/funnels/funnel_time_to_convert.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
from rest_framework.exceptions import ValidationError

from posthog.constants import FUNNEL_TO_STEP
from posthog.hogql import ast
from posthog.hogql.parser import parse_select
from posthog.hogql_queries.insights.funnels.base import FunnelBase
from posthog.hogql_queries.insights.funnels.funnel_query_context import FunnelQueryContext
from posthog.hogql_queries.insights.funnels.utils import get_funnel_order_class
from posthog.schema import FunnelTimeToConvertResults


class FunnelTimeToConvert(FunnelBase):
def __init__(
self,
context: FunnelQueryContext,
):
super().__init__(context)

self.funnel_order = get_funnel_order_class(self.context.funnelsFilter)(context=self.context)

def _format_results(self, results: list) -> FunnelTimeToConvertResults:
return FunnelTimeToConvertResults(
bins=[[bin_from_seconds, person_count] for bin_from_seconds, person_count, _ in results],
average_conversion_time=results[0][2],
)

def get_query(self) -> ast.SelectQuery:
query, funnelsFilter = self.context.query, self.context.funnelsFilter

steps_per_person_query = self.funnel_order.get_step_counts_query()
# expects 1 person per row, whatever their max step is, and the step conversion times for this person

# Conversion from which step should be calculated
from_step = funnelsFilter.funnelFromStep or 0
# Conversion to which step should be calculated
to_step = funnelsFilter.funnelToStep or len(query.series) - 1

# Use custom bin_count if provided by user, otherwise infer an automatic one based on the number of samples
binCount = funnelsFilter.binCount
if binCount is not None:
# Custom count is clamped between 1 and 90
if binCount < 1:
binCount = 1
elif binCount > 90:
binCount = 90
bin_count_identifier = str(binCount)
bin_count_expression = None
else:
# Auto count is clamped between 1 and 60
bin_count_identifier = "bin_count"
bin_count_expression = f"""
count() AS sample_count,
least(60, greatest(1, ceil(cbrt(ifNull(sample_count, 0))))) AS {bin_count_identifier},
"""

if not (0 < to_step < len(query.series)):
raise ValidationError(
f'Filter parameter {FUNNEL_TO_STEP} can only be one of {", ".join(map(str, range(1, len(query.series))))} for time to convert!'
)

steps_average_conversion_time_identifiers = [
f"step_{step+1}_average_conversion_time_inner" for step in range(from_step, to_step)
]
steps_average_conversion_time_expression_sum = " + ".join(steps_average_conversion_time_identifiers)

time_to_convert_query = parse_select(
f"""
WITH
step_runs AS (
{{steps_per_person_query}}
),
histogram_params AS (
/* Binning ensures that each sample belongs to a bin in results */
/* If bin_count is not a custom number, it's calculated in bin_count_expression */
SELECT
ifNull(floor(min({steps_average_conversion_time_expression_sum})), 0) AS from_seconds,
ifNull(ceil(max({steps_average_conversion_time_expression_sum})), 1) AS to_seconds,
round(avg({steps_average_conversion_time_expression_sum}), 2) AS average_conversion_time,
{bin_count_expression or ""}
ceil((to_seconds - from_seconds) / {bin_count_identifier}) AS bin_width_seconds_raw,
/* Use 60 seconds as fallback bin width in case of only one sample */
if(bin_width_seconds_raw > 0, bin_width_seconds_raw, 60) AS bin_width_seconds
FROM step_runs
-- We only need to check step to_step here, because it depends on all the other ones being NOT NULL too
WHERE step_{to_step}_average_conversion_time_inner IS NOT NULL
),
/* Below CTEs make histogram_params columns available to the query below as straightforward identifiers */
( SELECT bin_width_seconds FROM histogram_params ) AS bin_width_seconds,
/* bin_count is only made available as an identifier if it had to be calculated */
{
f"( SELECT {bin_count_identifier} FROM histogram_params ) AS {bin_count_identifier},"
if bin_count_expression else ""
}
( SELECT from_seconds FROM histogram_params ) AS histogram_from_seconds,
( SELECT to_seconds FROM histogram_params ) AS histogram_to_seconds,
( SELECT average_conversion_time FROM histogram_params ) AS histogram_average_conversion_time
SELECT
fill.bin_from_seconds,
person_count,
histogram_average_conversion_time AS average_conversion_time
FROM (
/* Calculating bins from step runs */
SELECT
histogram_from_seconds + floor(({steps_average_conversion_time_expression_sum} - histogram_from_seconds) / bin_width_seconds) * bin_width_seconds AS bin_from_seconds,
count() AS person_count
FROM step_runs
GROUP BY bin_from_seconds
) results
RIGHT OUTER JOIN (
/* Making sure bin_count bins are returned */
/* Those not present in the results query due to lack of data simply get person_count 0 */
SELECT histogram_from_seconds + number * bin_width_seconds AS bin_from_seconds FROM numbers(ifNull({bin_count_identifier}, 0) + 1)
) fill
-- USING (bin_from_seconds)
ON results.bin_from_seconds = fill.bin_from_seconds
-- ORDER BY bin_from_seconds
ORDER BY fill.bin_from_seconds
""",
placeholders={"steps_per_person_query": steps_per_person_query},
)

assert isinstance(time_to_convert_query, ast.SelectQuery)
return time_to_convert_query
18 changes: 16 additions & 2 deletions posthog/hogql_queries/insights/funnels/funnels_query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,12 +15,14 @@
from posthog.hogql.query import execute_hogql_query
from posthog.hogql.timings import HogQLTimings
from posthog.hogql_queries.insights.funnels.funnel_query_context import FunnelQueryContext
from posthog.hogql_queries.insights.funnels.funnel_time_to_convert import FunnelTimeToConvert
from posthog.hogql_queries.insights.funnels.utils import get_funnel_order_class
from posthog.hogql_queries.query_runner import QueryRunner
from posthog.hogql_queries.utils.query_date_range import QueryDateRange
from posthog.models import Team
from posthog.models.filters.mixins.utils import cached_property
from posthog.schema import (
FunnelVizType,
FunnelsQuery,
FunnelsQueryResponse,
HogQLQueryModifiers,
Expand Down Expand Up @@ -69,7 +71,7 @@ def _refresh_frequency(self):
return refresh_frequency

def to_query(self) -> ast.SelectQuery:
return self.funnel_order_class.get_query()
return self.funnel_class.get_query()

def calculate(self):
query = self.to_query()
Expand All @@ -86,7 +88,7 @@ def calculate(self):
modifiers=self.modifiers,
)

results = self.funnel_order_class._format_results(response.results)
results = self.funnel_class._format_results(response.results)

if response.timings is not None:
timings.extend(response.timings)
Expand All @@ -97,6 +99,18 @@ def calculate(self):
def funnel_order_class(self):
return get_funnel_order_class(self.context.funnelsFilter)(context=self.context)

@cached_property
def funnel_class(self):
funnelVizType = self.context.funnelsFilter.funnelVizType

if funnelVizType == FunnelVizType.trends:
# return FunnelTrends(context=self.context)
return self.funnel_order_class
elif funnelVizType == FunnelVizType.time_to_convert:
return FunnelTimeToConvert(context=self.context)
else:
return self.funnel_order_class

@cached_property
def query_date_range(self):
return QueryDateRange(
Expand Down
Loading
Loading