From 9a38a3dfdeced21ad78b9239c22e50a5b08b4009 Mon Sep 17 00:00:00 2001 From: Marius Andra Date: Tue, 6 Feb 2024 17:05:32 +0100 Subject: [PATCH] feat(insights): HogQL stickiness comparisons (#19947) --- mypy-baseline.txt | 1 - .../insights/stickiness_query_runner.py | 17 ++- .../commands/compare_hogql_insights.py | 109 +++++++++++------- 3 files changed, 84 insertions(+), 43 deletions(-) diff --git a/mypy-baseline.txt b/mypy-baseline.txt index 91d6e4406701f..30af502f44d26 100644 --- a/mypy-baseline.txt +++ b/mypy-baseline.txt @@ -703,7 +703,6 @@ posthog/queries/trends/test/test_person.py:0: error: "str" has no attribute "get posthog/queries/trends/test/test_person.py:0: error: Invalid index type "int" for "HttpResponse"; expected type "str | bytes" [index] posthog/queries/trends/test/test_person.py:0: error: "str" has no attribute "get" [attr-defined] posthog/queries/trends/test/test_person.py:0: error: Invalid index type "int" for "HttpResponse"; expected type "str | bytes" [index] -posthog/management/commands/compare_hogql_insights.py:0: error: "BaseModel" has no attribute "results" [attr-defined] posthog/hogql/test/test_query.py:0: error: Argument 1 to "len" has incompatible type "list[Any] | None"; expected "Sized" [arg-type] posthog/hogql/test/test_query.py:0: error: Value of type "list[QueryTiming] | None" is not indexable [index] posthog/hogql/test/test_query.py:0: error: Value of type "list[QueryTiming] | None" is not indexable [index] diff --git a/posthog/hogql_queries/insights/stickiness_query_runner.py b/posthog/hogql_queries/insights/stickiness_query_runner.py index 3b311789f9267..34478fac1dd32 100644 --- a/posthog/hogql_queries/insights/stickiness_query_runner.py +++ b/posthog/hogql_queries/insights/stickiness_query_runner.py @@ -136,7 +136,7 @@ def to_query(self) -> List[ast.SelectQuery]: # type: ignore interval_addition = ast.Call( name=f"toInterval{date_range.interval_name.capitalize()}", - args=[ast.Constant(value=1)], + args=[ast.Constant(value=0 if date_range.interval_name == "week" else 1)], ) select_query = parse_select( @@ -288,6 +288,16 @@ def where_clause(self, series_with_extra: SeriesWithExtras) -> ast.Expr: if series.properties is not None and series.properties != []: filters.append(property_to_expr(series.properties, self.team)) + # Ignore empty groups + if series.math == "unique_group" and series.math_group_type_index is not None: + filters.append( + ast.CompareOperation( + op=ast.CompareOperationOp.NotEq, + left=ast.Field(chain=["e", f"$group_{int(series.math_group_type_index)}"]), + right=ast.Constant(value=""), + ) + ) + if len(filters) == 0: return ast.Constant(value=True) elif len(filters) == 1: @@ -311,7 +321,10 @@ def series_event(self, series: EventsNode | ActionsNode) -> str | None: def intervals_num(self): delta = self.query_date_range.date_to() - self.query_date_range.date_from() - return delta.days + 1 + if self.query_date_range.interval_name == "day": + return delta.days + 1 + else: + return delta.days def setup_series(self) -> List[SeriesWithExtras]: series_with_extras = [ diff --git a/posthog/management/commands/compare_hogql_insights.py b/posthog/management/commands/compare_hogql_insights.py index 0a1f071c2f38c..0d1cef6c67cc9 100644 --- a/posthog/management/commands/compare_hogql_insights.py +++ b/posthog/management/commands/compare_hogql_insights.py @@ -1,55 +1,84 @@ from django.core.management.base import BaseCommand -from posthog.schema import HogQLQueryModifiers, MaterializationMode - class Command(BaseCommand): help = "Test if HogQL insights match their legacy counterparts" def handle(self, *args, **options): - from posthog.models import Insight, Filter + from typing import cast + from posthog.schema import HogQLQueryModifiers, HogQLQueryResponse, MaterializationMode + from posthog.models import Insight, Filter, RetentionFilter + from posthog.models.filters import StickinessFilter + from posthog.queries.retention import Retention from posthog.queries.trends.trends import Trends + from posthog.queries.stickiness.stickiness import Stickiness from posthog.hogql_queries.legacy_compatibility.filter_to_query import filter_to_query from posthog.hogql_queries.query_runner import get_query_runner insights = ( - Insight.objects.filter(filters__contains={"insight": "LIFECYCLE"}, saved=True, deleted=False) + Insight.objects.filter(filters__contains={"insight": "STICKINESS"}, saved=True, deleted=False) .order_by("id") .all() ) - - for insight in insights[0:10]: - print("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++") # noqa: T201 - print( # noqa: T201 - f"Checking Lifecycle Insight {insight.id} {insight.short_id} - {insight.name} " - f"(team {insight.team_id})... Interval: {insight.filters.get('interval')}" - ) - if insight.filters.get("aggregation_group_type_index", None) is not None: - del insight.filters["aggregation_group_type_index"] - filter = Filter(insight.filters, team=insight.team) - legacy_results = Trends().run(filter, insight.team) - for row in legacy_results: - if row.get("persons_urls"): - del row["persons_urls"] - query = filter_to_query(insight.filters) - modifiers = HogQLQueryModifiers(materializationMode=MaterializationMode.legacy_null_as_string) - query_runner = get_query_runner(query, insight.team, modifiers=modifiers) - hogql_results = query_runner.calculate().results - order = {"new": 1, "returning": 2, "resurrecting": 3, "dormant": 4} - legacy_results = sorted(legacy_results, key=lambda k: order[k["status"]]) - hogql_results = sorted(hogql_results, key=lambda k: order[k["status"]]) - all_ok = True - for legacy_result, hogql_result in zip(legacy_results, hogql_results): - fields = ["data", "days", "count", "labels", "label", "status"] - for field in fields: - if legacy_result.get(field) != hogql_result.get(field): - print( # noqa: T201 - f"Insight https://app.posthog.com/insights/{insight.short_id}/edit" - f" ({insight.id}). MISMATCH in {legacy_result.get('status')} row, field {field}" - ) - print("Legacy:", legacy_result.get(field)) # noqa: T201 - print("HogQL:", hogql_result.get(field)) # noqa: T201 - print("") # noqa: T201 - all_ok = False - if all_ok: - print("ALL OK!") # noqa: T201 + for insight in insights[200:300]: + try: + print("++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++") # noqa: T201 + insight_type = insight.filters.get("insight") + print( # noqa: T201 + f"Checking {insight_type} Insight {insight.id} {insight.short_id} - {insight.name} " + f"(team {insight.team_id})... Interval: {insight.filters.get('interval')}" + ) + if insight.filters.get("aggregation_group_type_index", None) is not None: + del insight.filters["aggregation_group_type_index"] + if insight_type == "STICKINESS": + sticky_filter = StickinessFilter(insight.filters, team=insight.team) + legacy_results = Stickiness().run(sticky_filter, insight.team) + elif insight_type == "RETENTION": + retention_filter = RetentionFilter(insight.filters, team=insight.team) + legacy_results = Retention().run(retention_filter, insight.team) + else: + filter = Filter(insight.filters, team=insight.team) + legacy_results = Trends().run(filter, insight.team) + for row in legacy_results: + if row.get("persons_urls"): + del row["persons_urls"] + query = filter_to_query(insight.filters) + modifiers = HogQLQueryModifiers(materializationMode=MaterializationMode.legacy_null_as_string) + query_runner = get_query_runner(query, insight.team, modifiers=modifiers) + hogql_results = cast(HogQLQueryResponse, query_runner.calculate()).results or [] + all_ok = True + for legacy_result, hogql_result in zip(legacy_results, hogql_results): + if insight_type == "LIFECYCLE": + fields = ["data", "days", "count", "labels", "label", "status"] + elif insight_type == "RETENTION": + if legacy_result.get("date") != hogql_result.date: + all_ok = False + print("Date: ", legacy_result.get("date"), hogql_result.date) # noqa: T201 + if legacy_result.get("label") != hogql_result.label: + all_ok = False + print("Label: ", legacy_result.get("label"), hogql_result.label) # noqa: T201 + legacy_values = [c.get("count") for c in legacy_result.get("values") or []] + hogql_values = [c.count for c in hogql_result.values or []] + if legacy_values != hogql_values: + all_ok = False + print("Values: ", legacy_values, hogql_values) # noqa: T201 + continue + elif insight_type == "STICKINESS": + fields = ["label", "count", "data", "days"] + else: + fields = ["label", "count", "data", "labels", "days"] + for field in fields: + if legacy_result.get(field) != hogql_result.get(field): + print( # noqa: T201 + f"Insight https://us.posthog.com/project/{insight.team_id}/insights/{insight.short_id}/edit" + f" ({insight.id}). MISMATCH in {legacy_result.get('status')} row, field {field}" + ) + print("Legacy:", legacy_result.get(field)) # noqa: T201 + print("HogQL:", hogql_result.get(field)) # noqa: T201 + print("") # noqa: T201 + all_ok = False + if all_ok: + print("ALL OK!") # noqa: T201 + except Exception as e: + url = f"https://us.posthog.com/project/{insight.team_id}/insights/{insight.short_id}/edit" + print(f"Insight {url} ({insight.id}). ERROR: {e}") # noqa: T201