Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(hogql): Add funnels to paths insight #21044

Closed
wants to merge 9 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -507,7 +507,25 @@ describe('filtersToQueryNode', () => {
end_point: 'b',
path_groupings: ['c', 'd'],
funnel_paths: FunnelPathType.between,
funnel_filter: { a: 1 },
funnel_filter: {
events: [
{
id: '$pageview',
name: '$pageview',
order: 0,
type: 'events',
},
{
id: null,
order: 1,
type: 'events',
},
],
exclusions: [],
funnel_step: 1,
funnel_viz_type: 'steps',
insight: 'FUNNELS',
},
exclude_events: ['e', 'f'],
step_limit: 1,
path_start_key: 'g',
Expand All @@ -530,7 +548,46 @@ describe('filtersToQueryNode', () => {
endPoint: 'b',
pathGroupings: ['c', 'd'],
funnelPaths: FunnelPathType.between,
funnelFilter: { a: 1 },
funnelFilter: {
events: [
{
id: '$pageview',
name: '$pageview',
order: 0,
type: 'events',
},
{
id: null,
order: 1,
type: 'events',
},
],
exclusions: [],
funnel_step: 1,
funnel_viz_type: 'steps',
insight: 'FUNNELS',
},
funnelActorsQuery: {
funnelStep: 1,
kind: NodeKind.FunnelsActorsQuery,
source: {
funnelsFilter: {
funnelVizType: FunnelVizType.Steps,
},
kind: NodeKind.FunnelsQuery,
series: [
{
event: '$pageview',
kind: NodeKind.EventsNode,
name: '$pageview',
},
{
event: null,
kind: NodeKind.EventsNode,
},
],
},
},
excludeEvents: ['e', 'f'],
stepLimit: 1,
pathReplacements: true,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import {
EventsNode,
FunnelExclusionActionsNode,
FunnelExclusionEventsNode,
FunnelsActorsQuery,
FunnelsFilter,
InsightNodeKind,
InsightQueryNode,
Expand Down Expand Up @@ -474,6 +475,14 @@ export const pathsFilterToQuery = (filters: Partial<PathsFilterType>): PathsFilt
pathGroupings: filters.path_groupings,
funnelPaths: filters.funnel_paths,
funnelFilter: filters.funnel_filter,
funnelActorsQuery:
filters.funnel_filter && Object.keys(filters.funnel_filter).length > 0
? ({
kind: NodeKind.FunnelsActorsQuery,
source: filtersToQueryNode(filters.funnel_filter),
funnelStep: filters.funnel_filter.funnel_step,
} as FunnelsActorsQuery)
: undefined,
excludeEvents: filters.exclude_events,
stepLimit: filters.step_limit,
pathReplacements: filters.path_replacements,
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -282,6 +282,7 @@ export const queryNodeToFilter = (query: InsightQueryNode): Partial<FilterType>
delete queryCopy.pathsFilter?.maxEdgeWeight
delete queryCopy.pathsFilter?.funnelPaths
delete queryCopy.pathsFilter?.funnelFilter
delete queryCopy.pathsFilter?.funnelActorsQuery
thmsobrmlr marked this conversation as resolved.
Show resolved Hide resolved
} else if (isStickinessQuery(queryCopy)) {
camelCasedStickinessProps.show_legend = queryCopy.stickinessFilter?.showLegend
camelCasedStickinessProps.show_values_on_series = queryCopy.stickinessFilter?.showValuesOnSeries
Expand Down
3 changes: 3 additions & 0 deletions frontend/src/queries/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -3240,6 +3240,9 @@
},
"type": "array"
},
"funnelActorsQuery": {
"$ref": "#/definitions/FunnelsActorsQuery"
},
"funnelFilter": {
"type": "object"
},
Expand Down
1 change: 1 addition & 0 deletions frontend/src/queries/schema.ts
Original file line number Diff line number Diff line change
Expand Up @@ -787,6 +787,7 @@ export type PathsFilter = {
maxEdgeWeight?: PathsFilterLegacy['max_edge_weight']
funnelPaths?: PathsFilterLegacy['funnel_paths']
funnelFilter?: PathsFilterLegacy['funnel_filter']
thmsobrmlr marked this conversation as resolved.
Show resolved Hide resolved
funnelActorsQuery?: FunnelsActorsQuery

/** Relevant only within actors query */
pathStartKey?: string
Expand Down
2 changes: 1 addition & 1 deletion posthog/hogql_queries/insights/funnels/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,7 +57,7 @@ def funnel_window_interval_unit_to_sql(
elif funnelWindowIntervalUnit == "day":
return "DAY"
else:
raise ValidationError("{funnelWindowIntervalUnit} not supported")
raise ValidationError(f"{funnelWindowIntervalUnit} not supported")
thmsobrmlr marked this conversation as resolved.
Show resolved Hide resolved


def get_breakdown_expr(
Expand Down
135 changes: 125 additions & 10 deletions posthog/hogql_queries/insights/paths_query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
from posthog.hogql.property import property_to_expr
from posthog.hogql.query import execute_hogql_query
from posthog.hogql.timings import HogQLTimings
from posthog.hogql_queries.insights.funnels.utils import funnel_window_interval_unit_to_sql
from posthog.hogql_queries.query_runner import QueryRunner
from posthog.hogql_queries.utils.query_date_range import QueryDateRange
from posthog.models import Team
Expand All @@ -27,6 +28,8 @@
PathCleaningFilter,
PathsFilter,
PathType,
FunnelPathType,
FunnelConversionWindowTimeUnit,
)
from posthog.schema import PathsQuery

Expand Down Expand Up @@ -126,16 +129,113 @@

return event_hogql

def handle_funnel(self) -> tuple[list, Optional[ast.Expr]]:
if (
not self.query.pathsFilter.funnelActorsQuery
or not self.query.pathsFilter.funnelActorsQuery.source.funnelsFilter
):
raise ValueError("Funnel actors query is required for funnel paths")

if self.query.pathsFilter.funnelPaths in (
FunnelPathType.funnel_path_after_step,
FunnelPathType.funnel_path_before_step,
):
funnel_fields = [
ast.Alias(alias="target_timestamp", expr=ast.Field(chain=["funnel_actors", "timestamp"])),
]
interval = self.query.pathsFilter.funnelActorsQuery.source.funnelsFilter.funnelWindowInterval or 14
unit = self.query.pathsFilter.funnelActorsQuery.source.funnelsFilter.funnelWindowIntervalUnit
interval_unit = funnel_window_interval_unit_to_sql(unit)
operator = ">=" if self.query.pathsFilter.funnelPaths == FunnelPathType.funnel_path_after_step else "<="
default_case = f"events.timestamp {operator} {{target_timestamp}}"
if (
self.query.pathsFilter.funnelPaths == FunnelPathType.funnel_path_after_step
and self.query.pathsFilter.funnelActorsQuery.funnelStep
and self.query.pathsFilter.funnelActorsQuery.funnelStep < 0
):
default_case += f" + INTERVAL {interval} {interval_unit}"
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Just the other day I noticed an issue with HogQL and adding intervals. Basically ClickHouse requires the date to be in UTC, otherwise it will have an extra/missing hour for periods spanning a DST transition. See my fix and associated tests here: #21042. The solution for this is to wrap the relevant date in toTimeZone(mydate, 'UTC')

event_filter = parse_expr(
default_case, {"target_timestamp": ast.Field(chain=["funnel_actors", "timestamp"])}
)
return funnel_fields, event_filter
elif self.query.pathsFilter.funnelPaths == FunnelPathType.funnel_path_between_steps:
funnel_fields = [
ast.Alias(alias="min_timestamp", expr=ast.Field(chain=["funnel_actors", "min_timestamp"])),
ast.Alias(alias="max_timestamp", expr=ast.Field(chain=["funnel_actors", "max_timestamp"])),
]
event_filter = ast.And(
exprs=[
ast.CompareOperation(
op=ast.CompareOperationOp.GtEq,
left=ast.Field(chain=["events", "timestamp"]),
right=ast.Field(chain=["funnel_actors", "min_timestamp"]),
),
ast.CompareOperation(
op=ast.CompareOperationOp.LtEq,
left=ast.Field(chain=["events", "timestamp"]),
right=ast.Field(chain=["funnel_actors", "max_timestamp"]),
),
]
)
return funnel_fields, event_filter

return [], None

def funnel_join(self) -> ast.JoinExpr:
if not self.query.pathsFilter.funnelActorsQuery:
raise ValueError("Funnel actors query is required for funnel paths")

from posthog.hogql_queries.insights.insight_actors_query_runner import InsightActorsQueryRunner

actors_query_runner = InsightActorsQueryRunner(
query=self.query.pathsFilter.funnelActorsQuery,
team=self.team,
timings=self.timings,
modifiers=self.modifiers,
limit_context=self.limit_context,
)

assert actors_query_runner.source_runner.context is not None

Check failure on line 198 in posthog/hogql_queries/insights/paths_query_runner.py

View workflow job for this annotation

GitHub Actions / Python code quality checks

"QueryRunner" has no attribute "context"
actors_query_runner.source_runner.context.includeTimestamp = self.query.pathsFilter.funnelPaths in (

Check failure on line 199 in posthog/hogql_queries/insights/paths_query_runner.py

View workflow job for this annotation

GitHub Actions / Python code quality checks

"QueryRunner" has no attribute "context"
FunnelPathType.funnel_path_after_step,
FunnelPathType.funnel_path_before_step,
)
actors_query_runner.source_runner.context.includePrecedingTimestamp = (

Check failure on line 203 in posthog/hogql_queries/insights/paths_query_runner.py

View workflow job for this annotation

GitHub Actions / Python code quality checks

"QueryRunner" has no attribute "context"
self.query.pathsFilter.funnelPaths == FunnelPathType.funnel_path_between_steps
)
actors_query = actors_query_runner.to_query()

return ast.JoinExpr(
table=ast.Field(chain=["events"]),
next_join=ast.JoinExpr(
table=actors_query,
join_type="INNER JOIN",
alias="funnel_actors",
constraint=ast.JoinConstraint(
expr=ast.CompareOperation(
op=ast.CompareOperationOp.Eq,
left=ast.Field(chain=["events", "person_id"]),
right=ast.Field(chain=["funnel_actors", "actor_id"]),
),
),
),
)

def paths_events_query(self) -> ast.SelectQuery:
event_filters = []
pathReplacements: list[PathCleaningFilter] = []

event_hogql = self.construct_event_hogql()
event_conditional = parse_expr("ifNull({event_hogql}, '') AS path_item_ungrouped", {"event_hogql": event_hogql})

funnel_fields, funnel_event_filter = self.handle_funnel()
if funnel_event_filter:
event_filters.append(funnel_event_filter)

fields = [
ast.Field(chain=["events", "timestamp"]),
ast.Field(chain=["events", "person_id"]),
*funnel_fields,
event_conditional,
*[ast.Field(chain=["events", field]) for field in self.extra_event_fields],
*[
Expand Down Expand Up @@ -226,10 +326,13 @@
where=ast.And(exprs=event_filters + self._get_event_query()),
order_by=[
ast.OrderExpr(expr=ast.Field(chain=["person_id"])),
ast.OrderExpr(expr=ast.Field(chain=["timestamp"])),
ast.OrderExpr(expr=ast.Field(chain=["events", "timestamp"])),
],
)

if funnel_fields:
query.select_from = self.funnel_join()

if self.query.samplingFactor is not None and isinstance(self.query.samplingFactor, float) and query.select_from:
query.select_from.sample = ast.SampleExpr(
sample_value=ast.RatioExpr(left=ast.Constant(value=self.query.samplingFactor))
Expand Down Expand Up @@ -408,6 +511,25 @@
else:
return self.get_filtered_path_ordering()

def get_session_threshold_clause(self) -> ast.Expr:
if self.query.pathsFilter.funnelPaths:
interval = 14
interval_unit = FunnelConversionWindowTimeUnit.day

if self.query.pathsFilter.funnelActorsQuery.source.funnelsFilter.funnelWindowInterval:

Check failure on line 519 in posthog/hogql_queries/insights/paths_query_runner.py

View workflow job for this annotation

GitHub Actions / Python code quality checks

Item "None" of "FunnelsActorsQuery | None" has no attribute "source"

Check failure on line 519 in posthog/hogql_queries/insights/paths_query_runner.py

View workflow job for this annotation

GitHub Actions / Python code quality checks

Item "None" of "FunnelsFilter | Any | None" has no attribute "funnelWindowInterval"
interval = self.query.pathsFilter.funnelActorsQuery.source.funnelsFilter.funnelWindowInterval

Check failure on line 520 in posthog/hogql_queries/insights/paths_query_runner.py

View workflow job for this annotation

GitHub Actions / Python code quality checks

Item "None" of "FunnelsActorsQuery | None" has no attribute "source"

Check failure on line 520 in posthog/hogql_queries/insights/paths_query_runner.py

View workflow job for this annotation

GitHub Actions / Python code quality checks

Item "None" of "FunnelsFilter | Any | None" has no attribute "funnelWindowInterval"
unit = self.query.pathsFilter.funnelActorsQuery.source.funnelsFilter.funnelWindowIntervalUnit

Check failure on line 521 in posthog/hogql_queries/insights/paths_query_runner.py

View workflow job for this annotation

GitHub Actions / Python code quality checks

Item "None" of "FunnelsActorsQuery | None" has no attribute "source"

Check failure on line 521 in posthog/hogql_queries/insights/paths_query_runner.py

View workflow job for this annotation

GitHub Actions / Python code quality checks

Item "None" of "FunnelsFilter | Any | None" has no attribute "funnelWindowIntervalUnit"
interval_unit = funnel_window_interval_unit_to_sql(unit)

Check failure on line 522 in posthog/hogql_queries/insights/paths_query_runner.py

View workflow job for this annotation

GitHub Actions / Python code quality checks

Incompatible types in assignment (expression has type "Literal['DAY', 'SECOND', 'MINUTE', 'HOUR', 'WEEK', 'MONTH']", variable has type "FunnelConversionWindowTimeUnit")

return parse_expr(
f"arraySplit(x -> if(toDateTime('2018-01-01') + toIntervalSecond(_toInt64(x.3)) < toDateTime('2018-01-01') + INTERVAL {interval} {interval_unit}, 0, 1), paths_tuple)"
)

return parse_expr(
"arraySplit(x -> if(x.3 < ({session_time_threshold}), 0, 1), paths_tuple)",
{"session_time_threshold": ast.Constant(value=SESSION_TIME_THRESHOLD_DEFAULT_SECONDS)},
)

def paths_per_person_query(self) -> ast.SelectQuery:
target_point = self.query.pathsFilter.endPoint or self.query.pathsFilter.startPoint
target_point = (
Expand All @@ -431,15 +553,8 @@
"path_event_query": self.paths_events_query(),
"boundary_event_filter": ast.Constant(value=None),
"target_point": ast.Constant(value=target_point),
"session_threshold_clause": ast.Constant(value=None),
"session_time_threshold": ast.Constant(value=SESSION_TIME_THRESHOLD_DEFAULT_SECONDS),
"session_threshold_clause": self.get_session_threshold_clause(),
"path_tuples_expr": path_tuples_expr,
# TODO: "extra_final_select_statements": ast.Constant(value=None),
"extra_joined_path_tuple_select_statements": ast.Constant(value=None),
"extra_array_filter_select_statements": ast.Constant(value=None),
"extra_limited_path_tuple_elements": ast.Constant(value=None),
"extra_path_time_tuple_select_statements": ast.Constant(value=None),
"extra_group_array_select_statements": ast.Constant(value=None),
}
select = cast(
ast.SelectQuery,
Expand Down Expand Up @@ -475,7 +590,7 @@
/* path_time_tuple.x added below if required */
session_index,
{path_tuples_expr} as paths_tuple,
arraySplit(x -> if(x.3 < ({session_time_threshold}), 0, 1), paths_tuple) as session_paths
{session_threshold_clause} as session_paths
FROM (
SELECT
person_id,
Expand Down
Loading
Loading