Skip to content

Commit

Permalink
feat(insights): HogQL stickiness comparisons (#19947)
Browse files Browse the repository at this point in the history
  • Loading branch information
mariusandra authored Feb 6, 2024
1 parent 8d6ebe7 commit 9a38a3d
Show file tree
Hide file tree
Showing 3 changed files with 84 additions and 43 deletions.
1 change: 0 additions & 1 deletion mypy-baseline.txt
Original file line number Diff line number Diff line change
Expand Up @@ -703,7 +703,6 @@ posthog/queries/trends/test/test_person.py:0: error: "str" has no attribute "get
posthog/queries/trends/test/test_person.py:0: error: Invalid index type "int" for "HttpResponse"; expected type "str | bytes" [index]
posthog/queries/trends/test/test_person.py:0: error: "str" has no attribute "get" [attr-defined]
posthog/queries/trends/test/test_person.py:0: error: Invalid index type "int" for "HttpResponse"; expected type "str | bytes" [index]
posthog/management/commands/compare_hogql_insights.py:0: error: "BaseModel" has no attribute "results" [attr-defined]
posthog/hogql/test/test_query.py:0: error: Argument 1 to "len" has incompatible type "list[Any] | None"; expected "Sized" [arg-type]
posthog/hogql/test/test_query.py:0: error: Value of type "list[QueryTiming] | None" is not indexable [index]
posthog/hogql/test/test_query.py:0: error: Value of type "list[QueryTiming] | None" is not indexable [index]
Expand Down
17 changes: 15 additions & 2 deletions posthog/hogql_queries/insights/stickiness_query_runner.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ def to_query(self) -> List[ast.SelectQuery]: # type: ignore

interval_addition = ast.Call(
name=f"toInterval{date_range.interval_name.capitalize()}",
args=[ast.Constant(value=1)],
args=[ast.Constant(value=0 if date_range.interval_name == "week" else 1)],
)

select_query = parse_select(
Expand Down Expand Up @@ -288,6 +288,16 @@ def where_clause(self, series_with_extra: SeriesWithExtras) -> ast.Expr:
if series.properties is not None and series.properties != []:
filters.append(property_to_expr(series.properties, self.team))

# Ignore empty groups
if series.math == "unique_group" and series.math_group_type_index is not None:
filters.append(
ast.CompareOperation(
op=ast.CompareOperationOp.NotEq,
left=ast.Field(chain=["e", f"$group_{int(series.math_group_type_index)}"]),
right=ast.Constant(value=""),
)
)

if len(filters) == 0:
return ast.Constant(value=True)
elif len(filters) == 1:
Expand All @@ -311,7 +321,10 @@ def series_event(self, series: EventsNode | ActionsNode) -> str | None:

def intervals_num(self):
delta = self.query_date_range.date_to() - self.query_date_range.date_from()
return delta.days + 1
if self.query_date_range.interval_name == "day":
return delta.days + 1
else:
return delta.days

def setup_series(self) -> List[SeriesWithExtras]:
series_with_extras = [
Expand Down
109 changes: 69 additions & 40 deletions posthog/management/commands/compare_hogql_insights.py
Original file line number Diff line number Diff line change
@@ -1,55 +1,84 @@
from django.core.management.base import BaseCommand

from posthog.schema import HogQLQueryModifiers, MaterializationMode


class Command(BaseCommand):
help = "Test if HogQL insights match their legacy counterparts"

def handle(self, *args, **options):
from posthog.models import Insight, Filter
from typing import cast
from posthog.schema import HogQLQueryModifiers, HogQLQueryResponse, MaterializationMode
from posthog.models import Insight, Filter, RetentionFilter
from posthog.models.filters import StickinessFilter
from posthog.queries.retention import Retention
from posthog.queries.trends.trends import Trends
from posthog.queries.stickiness.stickiness import Stickiness
from posthog.hogql_queries.legacy_compatibility.filter_to_query import filter_to_query
from posthog.hogql_queries.query_runner import get_query_runner

insights = (
Insight.objects.filter(filters__contains={"insight": "LIFECYCLE"}, saved=True, deleted=False)
Insight.objects.filter(filters__contains={"insight": "STICKINESS"}, saved=True, deleted=False)
.order_by("id")
.all()
)

for insight in insights[0:10]:
print("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++") # noqa: T201
print( # noqa: T201
f"Checking Lifecycle Insight {insight.id} {insight.short_id} - {insight.name} "
f"(team {insight.team_id})... Interval: {insight.filters.get('interval')}"
)
if insight.filters.get("aggregation_group_type_index", None) is not None:
del insight.filters["aggregation_group_type_index"]
filter = Filter(insight.filters, team=insight.team)
legacy_results = Trends().run(filter, insight.team)
for row in legacy_results:
if row.get("persons_urls"):
del row["persons_urls"]
query = filter_to_query(insight.filters)
modifiers = HogQLQueryModifiers(materializationMode=MaterializationMode.legacy_null_as_string)
query_runner = get_query_runner(query, insight.team, modifiers=modifiers)
hogql_results = query_runner.calculate().results
order = {"new": 1, "returning": 2, "resurrecting": 3, "dormant": 4}
legacy_results = sorted(legacy_results, key=lambda k: order[k["status"]])
hogql_results = sorted(hogql_results, key=lambda k: order[k["status"]])
all_ok = True
for legacy_result, hogql_result in zip(legacy_results, hogql_results):
fields = ["data", "days", "count", "labels", "label", "status"]
for field in fields:
if legacy_result.get(field) != hogql_result.get(field):
print( # noqa: T201
f"Insight https://app.posthog.com/insights/{insight.short_id}/edit"
f" ({insight.id}). MISMATCH in {legacy_result.get('status')} row, field {field}"
)
print("Legacy:", legacy_result.get(field)) # noqa: T201
print("HogQL:", hogql_result.get(field)) # noqa: T201
print("") # noqa: T201
all_ok = False
if all_ok:
print("ALL OK!") # noqa: T201
for insight in insights[200:300]:
try:
print("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++") # noqa: T201
insight_type = insight.filters.get("insight")
print( # noqa: T201
f"Checking {insight_type} Insight {insight.id} {insight.short_id} - {insight.name} "
f"(team {insight.team_id})... Interval: {insight.filters.get('interval')}"
)
if insight.filters.get("aggregation_group_type_index", None) is not None:
del insight.filters["aggregation_group_type_index"]
if insight_type == "STICKINESS":
sticky_filter = StickinessFilter(insight.filters, team=insight.team)
legacy_results = Stickiness().run(sticky_filter, insight.team)
elif insight_type == "RETENTION":
retention_filter = RetentionFilter(insight.filters, team=insight.team)
legacy_results = Retention().run(retention_filter, insight.team)
else:
filter = Filter(insight.filters, team=insight.team)
legacy_results = Trends().run(filter, insight.team)
for row in legacy_results:
if row.get("persons_urls"):
del row["persons_urls"]
query = filter_to_query(insight.filters)
modifiers = HogQLQueryModifiers(materializationMode=MaterializationMode.legacy_null_as_string)
query_runner = get_query_runner(query, insight.team, modifiers=modifiers)
hogql_results = cast(HogQLQueryResponse, query_runner.calculate()).results or []
all_ok = True
for legacy_result, hogql_result in zip(legacy_results, hogql_results):
if insight_type == "LIFECYCLE":
fields = ["data", "days", "count", "labels", "label", "status"]
elif insight_type == "RETENTION":
if legacy_result.get("date") != hogql_result.date:
all_ok = False
print("Date: ", legacy_result.get("date"), hogql_result.date) # noqa: T201
if legacy_result.get("label") != hogql_result.label:
all_ok = False
print("Label: ", legacy_result.get("label"), hogql_result.label) # noqa: T201
legacy_values = [c.get("count") for c in legacy_result.get("values") or []]
hogql_values = [c.count for c in hogql_result.values or []]
if legacy_values != hogql_values:
all_ok = False
print("Values: ", legacy_values, hogql_values) # noqa: T201
continue
elif insight_type == "STICKINESS":
fields = ["label", "count", "data", "days"]
else:
fields = ["label", "count", "data", "labels", "days"]
for field in fields:
if legacy_result.get(field) != hogql_result.get(field):
print( # noqa: T201
f"Insight https://us.posthog.com/project/{insight.team_id}/insights/{insight.short_id}/edit"
f" ({insight.id}). MISMATCH in {legacy_result.get('status')} row, field {field}"
)
print("Legacy:", legacy_result.get(field)) # noqa: T201
print("HogQL:", hogql_result.get(field)) # noqa: T201
print("") # noqa: T201
all_ok = False
if all_ok:
print("ALL OK!") # noqa: T201
except Exception as e:
url = f"https://us.posthog.com/project/{insight.team_id}/insights/{insight.short_id}/edit"
print(f"Insight {url} ({insight.id}). ERROR: {e}") # noqa: T201

0 comments on commit 9a38a3d

Please sign in to comment.