Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat(insights): HogQL stickiness comparisons #19947

Merged
merged 14 commits into from
Feb 6, 2024
1 change: 0 additions & 1 deletion mypy-baseline.txt
Original file line number Diff line number Diff line change
Expand Up @@ -704,7 +704,6 @@ posthog/queries/trends/test/test_person.py:0: error: "str" has no attribute "get
posthog/queries/trends/test/test_person.py:0: error: Invalid index type "int" for "HttpResponse"; expected type "str | bytes" [index]
posthog/queries/trends/test/test_person.py:0: error: "str" has no attribute "get" [attr-defined]
posthog/queries/trends/test/test_person.py:0: error: Invalid index type "int" for "HttpResponse"; expected type "str | bytes" [index]
posthog/management/commands/compare_hogql_insights.py:0: error: "BaseModel" has no attribute "results" [attr-defined]
posthog/hogql/test/test_query.py:0: error: Argument 1 to "len" has incompatible type "list[Any] | None"; expected "Sized" [arg-type]
posthog/hogql/test/test_query.py:0: error: Value of type "list[QueryTiming] | None" is not indexable [index]
posthog/hogql/test/test_query.py:0: error: Value of type "list[QueryTiming] | None" is not indexable [index]
Expand Down
17 changes: 15 additions & 2 deletions posthog/hogql_queries/insights/stickiness_query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def to_query(self) -> List[ast.SelectQuery]: # type: ignore

interval_addition = ast.Call(
name=f"toInterval{date_range.interval_name.capitalize()}",
args=[ast.Constant(value=1)],
args=[ast.Constant(value=0 if date_range.interval_name == "week" else 1)],
)

select_query = parse_select(
Expand Down Expand Up @@ -288,6 +288,16 @@ def where_clause(self, series_with_extra: SeriesWithExtras) -> ast.Expr:
if series.properties is not None and series.properties != []:
filters.append(property_to_expr(series.properties, self.team))

# Ignore empty groups
if series.math == "unique_group" and series.math_group_type_index is not None:
filters.append(
ast.CompareOperation(
op=ast.CompareOperationOp.NotEq,
left=ast.Field(chain=["e", f"$group_{int(series.math_group_type_index)}"]),
right=ast.Constant(value=""),
)
)

if len(filters) == 0:
return ast.Constant(value=True)
elif len(filters) == 1:
Expand All @@ -311,7 +321,10 @@ def series_event(self, series: EventsNode | ActionsNode) -> str | None:

def intervals_num(self):
delta = self.query_date_range.date_to() - self.query_date_range.date_from()
return delta.days + 1
if self.query_date_range.interval_name == "day":
return delta.days + 1
else:
return delta.days

def setup_series(self) -> List[SeriesWithExtras]:
series_with_extras = [
Expand Down
109 changes: 69 additions & 40 deletions posthog/management/commands/compare_hogql_insights.py
Original file line number Diff line number Diff line change
@@ -1,55 +1,84 @@
from django.core.management.base import BaseCommand

from posthog.schema import HogQLQueryModifiers, MaterializationMode


class Command(BaseCommand):
help = "Test if HogQL insights match their legacy counterparts"

def handle(self, *args, **options):
from posthog.models import Insight, Filter
from typing import cast
from posthog.schema import HogQLQueryModifiers, HogQLQueryResponse, MaterializationMode
from posthog.models import Insight, Filter, RetentionFilter
from posthog.models.filters import StickinessFilter
from posthog.queries.retention import Retention
from posthog.queries.trends.trends import Trends
from posthog.queries.stickiness.stickiness import Stickiness
from posthog.hogql_queries.legacy_compatibility.filter_to_query import filter_to_query
from posthog.hogql_queries.query_runner import get_query_runner

insights = (
Insight.objects.filter(filters__contains={"insight": "LIFECYCLE"}, saved=True, deleted=False)
Insight.objects.filter(filters__contains={"insight": "STICKINESS"}, saved=True, deleted=False)
.order_by("id")
.all()
)

for insight in insights[0:10]:
print("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++") # noqa: T201
print( # noqa: T201
f"Checking Lifecycle Insight {insight.id} {insight.short_id} - {insight.name} "
f"(team {insight.team_id})... Interval: {insight.filters.get('interval')}"
)
if insight.filters.get("aggregation_group_type_index", None) is not None:
del insight.filters["aggregation_group_type_index"]
filter = Filter(insight.filters, team=insight.team)
legacy_results = Trends().run(filter, insight.team)
for row in legacy_results:
if row.get("persons_urls"):
del row["persons_urls"]
query = filter_to_query(insight.filters)
modifiers = HogQLQueryModifiers(materializationMode=MaterializationMode.legacy_null_as_string)
query_runner = get_query_runner(query, insight.team, modifiers=modifiers)
hogql_results = query_runner.calculate().results
order = {"new": 1, "returning": 2, "resurrecting": 3, "dormant": 4}
legacy_results = sorted(legacy_results, key=lambda k: order[k["status"]])
hogql_results = sorted(hogql_results, key=lambda k: order[k["status"]])
all_ok = True
for legacy_result, hogql_result in zip(legacy_results, hogql_results):
fields = ["data", "days", "count", "labels", "label", "status"]
for field in fields:
if legacy_result.get(field) != hogql_result.get(field):
print( # noqa: T201
f"Insight https://app.posthog.com/insights/{insight.short_id}/edit"
f" ({insight.id}). MISMATCH in {legacy_result.get('status')} row, field {field}"
)
print("Legacy:", legacy_result.get(field)) # noqa: T201
print("HogQL:", hogql_result.get(field)) # noqa: T201
print("") # noqa: T201
all_ok = False
if all_ok:
print("ALL OK!") # noqa: T201
for insight in insights[200:300]:
try:
print("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++") # noqa: T201
insight_type = insight.filters.get("insight")
print( # noqa: T201
f"Checking {insight_type} Insight {insight.id} {insight.short_id} - {insight.name} "
f"(team {insight.team_id})... Interval: {insight.filters.get('interval')}"
)
if insight.filters.get("aggregation_group_type_index", None) is not None:
del insight.filters["aggregation_group_type_index"]
if insight_type == "STICKINESS":
sticky_filter = StickinessFilter(insight.filters, team=insight.team)
legacy_results = Stickiness().run(sticky_filter, insight.team)
elif insight_type == "RETENTION":
retention_filter = RetentionFilter(insight.filters, team=insight.team)
legacy_results = Retention().run(retention_filter, insight.team)
else:
filter = Filter(insight.filters, team=insight.team)
legacy_results = Trends().run(filter, insight.team)
for row in legacy_results:
if row.get("persons_urls"):
del row["persons_urls"]
query = filter_to_query(insight.filters)
modifiers = HogQLQueryModifiers(materializationMode=MaterializationMode.legacy_null_as_string)
query_runner = get_query_runner(query, insight.team, modifiers=modifiers)
hogql_results = cast(HogQLQueryResponse, query_runner.calculate()).results or []
all_ok = True
for legacy_result, hogql_result in zip(legacy_results, hogql_results):
if insight_type == "LIFECYCLE":
fields = ["data", "days", "count", "labels", "label", "status"]
elif insight_type == "RETENTION":
if legacy_result.get("date") != hogql_result.date:
all_ok = False
print("Date: ", legacy_result.get("date"), hogql_result.date) # noqa: T201
if legacy_result.get("label") != hogql_result.label:
all_ok = False
print("Label: ", legacy_result.get("label"), hogql_result.label) # noqa: T201
legacy_values = [c.get("count") for c in legacy_result.get("values") or []]
hogql_values = [c.count for c in hogql_result.values or []]
if legacy_values != hogql_values:
all_ok = False
print("Values: ", legacy_values, hogql_values) # noqa: T201
continue
elif insight_type == "STICKINESS":
fields = ["label", "count", "data", "days"]
else:
fields = ["label", "count", "data", "labels", "days"]
for field in fields:
if legacy_result.get(field) != hogql_result.get(field):
print( # noqa: T201
f"Insight https://us.posthog.com/project/{insight.team_id}/insights/{insight.short_id}/edit"
f" ({insight.id}). MISMATCH in {legacy_result.get('status')} row, field {field}"
)
print("Legacy:", legacy_result.get(field)) # noqa: T201
print("HogQL:", hogql_result.get(field)) # noqa: T201
print("") # noqa: T201
all_ok = False
if all_ok:
print("ALL OK!") # noqa: T201
except Exception as e:
url = f"https://us.posthog.com/project/{insight.team_id}/insights/{insight.short_id}/edit"
print(f"Insight {url} ({insight.id}). ERROR: {e}") # noqa: T201
Loading