Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(insights): Lifecycle insight to HogQL #17135

Closed
wants to merge 25 commits into from
Closed
Show file tree
Hide file tree
Changes from 8 commits
Commits
Show all changes
25 commits
Select commit Hold shift + click to select a range
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion frontend/src/queries/query.ts
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@ import {
isTimeToSeeDataSessionsNode,
isHogQLQuery,
isInsightVizNode,
isLifecycleQuery,
} from './utils'
import api, { ApiMethodOptions } from 'lib/api'
import { getCurrentTeamId } from 'lib/utils/logics'
Expand Down Expand Up @@ -107,7 +108,7 @@ export async function query<N extends DataNode = DataNode>(
try {
if (isPersonsNode(queryNode)) {
response = await api.get(getPersonsEndpoint(queryNode), methodOptions)
} else if (isInsightQueryNode(queryNode)) {
} else if (isInsightQueryNode(queryNode) && !isLifecycleQuery(queryNode)) {
const filters = queryNodeToFilter(queryNode)
const params = {
...filters,
Expand Down
13 changes: 11 additions & 2 deletions posthog/api/query.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,13 +26,15 @@
from posthog.hogql.metadata import get_hogql_metadata
from posthog.hogql.query import execute_hogql_query
from posthog.models import Team
from posthog.models.event.events_query import run_events_query
from posthog.models.user import User
from posthog.nodes.events_query import run_events_query

from posthog.nodes.lifecycle_hogql_query import run_lifecycle_query
from posthog.permissions import ProjectMembershipNecessaryPermissions, TeamMemberAccessPermission
from posthog.queries.time_to_see_data.serializers import SessionEventsQuerySerializer, SessionsQuerySerializer
from posthog.queries.time_to_see_data.sessions import get_session_events, get_sessions
from posthog.rate_limit import AIBurstRateThrottle, AISustainedRateThrottle, TeamRateThrottle
from posthog.schema import EventsQuery, HogQLQuery, HogQLMetadata
from posthog.schema import EventsQuery, HogQLQuery, HogQLMetadata, LifecycleQuery


class QueryThrottle(TeamRateThrottle):
Expand Down Expand Up @@ -215,6 +217,13 @@ def process_query(team: Team, query_json: Dict, default_limit: Optional[int] = N
metadata_query = HogQLMetadata.parse_obj(query_json)
response = get_hogql_metadata(query=metadata_query, team=team)
return _unwrap_pydantic_dict(response)
elif query_kind == "LifecycleQuery":
try:
lifecycle_query = LifecycleQuery.parse_obj(query_json)
response = run_lifecycle_query(query=lifecycle_query, team=team)
return _unwrap_pydantic_dict(response)
except Exception as e:
raise ValidationError(str(e))
Fixed Show fixed Hide fixed
elif query_kind == "DatabaseSchemaQuery":
database = create_hogql_database(team.pk)
return serialize_database(database)
Expand Down
2 changes: 1 addition & 1 deletion posthog/hogql/ast.py
Original file line number Diff line number Diff line change
Expand Up @@ -482,7 +482,7 @@ class JoinConstraint(Expr):
@dataclass(kw_only=True)
class JoinExpr(Expr):
# :TRICKY: When adding new fields, make sure they're handled in visitor.py and resolver.py
type: Optional[TableOrSelectType]
type: Optional[TableOrSelectType] = field(default=None)

join_type: Optional[str] = None
table: Optional[Union["SelectQuery", "SelectUnionQuery", Field]] = None
Expand Down
2 changes: 1 addition & 1 deletion posthog/hogql/placeholders.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ def __init__(self, placeholders: Optional[Dict[str, ast.Expr]]):
def visit_placeholder(self, node):
if not self.placeholders:
raise HogQLException(f"Placeholders, such as {{{node.field}}}, are not supported in this context")
if node.field in self.placeholders:
if node.field in self.placeholders and self.placeholders[node.field]:
new_node = self.placeholders[node.field]
new_node.start = node.start
new_node.end = node.end
Expand Down
File renamed without changes.
179 changes: 179 additions & 0 deletions posthog/nodes/lifecycle_hogql_query.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,179 @@
from typing import Dict, Any

from django.utils.timezone import datetime

from posthog.hogql import ast
from posthog.hogql.parser import parse_expr, parse_select
from posthog.hogql.query import execute_hogql_query
from posthog.models import Team
from posthog.nodes.query_date_range import QueryDateRange
from posthog.schema import LifecycleQuery


def create_time_filter(date_range: QueryDateRange, interval: str) -> (ast.Expr, ast.Expr, ast.Expr):
# don't need timezone here, as HogQL will use the project timezone automatically
date_from_s = f"assumeNotNull(toDateTime('{date_range.date_from}'))"
date_from = parse_expr(date_from_s)
date_to_s = f"assumeNotNull(toDateTime('{date_range.date_to}'))"
date_to = parse_expr(date_to_s)

# :TRICKY: We fetch all data even for the period before the graph starts up until the end of the last period
# TODO use placeholders, for some reason I couldn't get this to work properly
time_filter = parse_expr(
f"""
(timestamp >= dateTrunc('{interval}', {date_from_s}) - INTERVAL 1 {interval})
AND
(timestamp < dateTrunc('{interval}', {date_to_s}) + INTERVAL 1 {interval})
"""
)

return time_filter, date_from, date_to


def create_events_query(interval: str, event_filter: ast.Expr):
one_interval_period = parse_expr(f"toInterval{interval.capitalize()}(1)")

if not event_filter:
event_filter = ast.Constant(value=True)

placeholders = {
"event_filter": event_filter,
"interval": ast.Constant(value=interval),
"one_interval_period": one_interval_period,
}

events_query = parse_select(
"""
SELECT
events.person.id as person_id,
min(events.person.created_at) AS created_at,
arraySort(groupUniqArray(dateTrunc({interval}, events.timestamp))) AS all_activity,
arrayPopBack(arrayPushFront(all_activity, dateTrunc({interval}, created_at))) as previous_activity,
arrayPopFront(arrayPushBack(all_activity, dateTrunc({interval}, toDateTime('1970-01-01 00:00:00')))) as following_activity,
arrayMap((previous, current, index) -> (previous = current ? 'new' : ((current - {one_interval_period}) = previous AND index != 1) ? 'returning' : 'resurrecting'), previous_activity, all_activity, arrayEnumerate(all_activity)) as initial_status,
arrayMap((current, next) -> (current + {one_interval_period} = next ? '' : 'dormant'), all_activity, following_activity) as dormant_status,
arrayMap(x -> x + {one_interval_period}, arrayFilter((current, is_dormant) -> is_dormant = 'dormant', all_activity, dormant_status)) as dormant_periods,
arrayMap(x -> 'dormant', dormant_periods) as dormant_label,
arrayConcat(arrayZip(all_activity, initial_status), arrayZip(dormant_periods, dormant_label)) as temp_concat,
arrayJoin(temp_concat) as period_status_pairs,
period_status_pairs.1 as start_of_period,
period_status_pairs.2 as status
FROM events
WHERE {event_filter}
GROUP BY person_id
""",
placeholders=placeholders,
)
return events_query


def run_lifecycle_query(
team: Team,
query: LifecycleQuery,
) -> Dict[str, Any]:
now_dt = datetime.now()

try:
interval = query.interval.name
except AttributeError:
interval = "day"
if interval not in ["minute", "hour", "day", "week", "month", "quarter", "year"]:
raise ValueError(f"Invalid interval: {interval}")
one_interval_period = parse_expr(f"toInterval{interval.capitalize()}(1)")
number_interval_period = parse_expr(f"toInterval{interval.capitalize()}(number)")

query_date_range = QueryDateRange(date_range=query.dateRange, team=team, interval=query.interval, now=now_dt)

time_filter, date_from, date_to = create_time_filter(query_date_range, interval=interval)
event_filter = time_filter # TODO: add all other filters

placeholders = {
"interval": ast.Constant(value=interval),
"one_interval_period": one_interval_period,
"number_interval_period": number_interval_period,
"event_filter": event_filter,
"date_from": date_from,
"date_to": date_to,
}

events_query = create_events_query(interval=interval, event_filter=event_filter)

periods = parse_select(
"""
SELECT (
dateTrunc({interval}, {date_to}) - {number_interval_period}
) AS start_of_period
FROM numbers(
dateDiff(
{interval},
dateTrunc({interval}, {date_from}),
dateTrunc({interval}, {date_to} + {one_interval_period})
)
)
""",
placeholders=placeholders,
)

lifecycle_sql = parse_select(
"""
SELECT groupArray(start_of_period) AS date,
groupArray(counts) AS total,
status
FROM (
SELECT
status = 'dormant' ? negate(sum(counts)) : negate(negate(sum(counts))) as counts,
start_of_period,
status
FROM (
SELECT
periods.start_of_period as start_of_period,
0 AS counts,
status
FROM {periods} as periods
CROSS JOIN (
SELECT status
FROM (SELECT 1)
ARRAY JOIN ['new', 'returning', 'resurrecting', 'dormant'] as status
) as sec
ORDER BY status, start_of_period
UNION ALL
SELECT
start_of_period, count(DISTINCT person_id) AS counts, status
FROM {events_query}
GROUP BY start_of_period, status
)
WHERE start_of_period <= dateTrunc({interval}, {date_to})
AND start_of_period >= dateTrunc({interval}, {date_from})
GROUP BY start_of_period, status
ORDER BY start_of_period ASC
)
GROUP BY status
""",
{**placeholders, "periods": periods, "events_query": events_query},
)

response = execute_hogql_query(
team=team,
query=lifecycle_sql,
query_type="LifecycleQuery",
)

res = []
for val in response.results:
counts = val[1]
labels = [item.strftime("%-d-%b-%Y{}".format(" %H:%M" if interval == "hour" else "")) for item in val[0]]
days = [item.strftime("%Y-%m-%d{}".format(" %H:%M:%S" if interval == "hour" else "")) for item in val[0]]

label = "{} - {}".format("", val[2]) # entity.name
additional_values = {"label": label, "status": val[2]}
res.append(
{
"data": [float(c) for c in counts],
"count": float(sum(counts)),
"labels": labels,
"days": days,
**additional_values,
}
)

return {"result": res}
Loading