From 08e77ec5f6821ed960b2a88adc0702084d04ef63 Mon Sep 17 00:00:00 2001 From: Alexander Spicer Date: Sat, 22 Jun 2024 15:42:43 -0700 Subject: [PATCH] source id generalization --- posthog/hogql/test/test_resolver.py | 2 +- posthog/hogql_queries/actors_query_runner.py | 2 +- .../insights/stickiness_query_runner.py | 7 +- .../test_insight_actors_query_runner.ambr | 403 ------------------ 4 files changed, 5 insertions(+), 409 deletions(-) diff --git a/posthog/hogql/test/test_resolver.py b/posthog/hogql/test/test_resolver.py index c0555a8bebe5e..e6854e6730834 100644 --- a/posthog/hogql/test/test_resolver.py +++ b/posthog/hogql/test/test_resolver.py @@ -416,7 +416,7 @@ def test_visit_hogqlx_tag_source(self): "(SELECT id, properties.email AS email FROM " "(SELECT DISTINCT person_id FROM events) " "AS source INNER JOIN " - "persons ON equals(persons.id, source.person_id) ORDER BY id ASC) " + "filterable_persons ON equals(filterable_persons.id, source.person_id) ORDER BY id ASC) " f"LIMIT {MAX_SELECT_RETURNED_ROWS}" ) diff --git a/posthog/hogql_queries/actors_query_runner.py b/posthog/hogql_queries/actors_query_runner.py index c2104de70aab4..f40aeae63a89e 100644 --- a/posthog/hogql_queries/actors_query_runner.py +++ b/posthog/hogql_queries/actors_query_runner.py @@ -268,7 +268,7 @@ def to_query(self) -> ast.SelectQuery: NonSerializableTags.FILTERABLE_PERSONS.value: ast.CompareOperation( left=ast.Field(chain=["id"]), right=ast.SelectQuery( - select=[ast.Field(chain=[source_alias, "actor_id"])], + select=[ast.Field(chain=[source_alias, *self.source_id_column(source_query)])], select_from=ast.JoinExpr(table=source_query, alias=source_alias), ), op=ast.CompareOperationOp.In, diff --git a/posthog/hogql_queries/insights/stickiness_query_runner.py b/posthog/hogql_queries/insights/stickiness_query_runner.py index b38ef7e26a6c8..c885397f68d97 100644 --- a/posthog/hogql_queries/insights/stickiness_query_runner.py +++ b/posthog/hogql_queries/insights/stickiness_query_runner.py @@ -182,10 +182,9 @@ def to_actors_query(self, interval_num: Optional[int] = None) -> ast.SelectQuery for series in self.series: events_query = self._events_query(series) aggregation_alias = "actor_id" - # if series.series.math == "hogql" and series.series.math_hogql is not None: - # aggregation_alias = "actor_id" - # elif series.series.math == "unique_group" and series.series.math_group_type_index is not None: - if series.series.math == "unique_group" and series.series.math_group_type_index is not None: + if series.series.math == "hogql" and series.series.math_hogql is not None: + aggregation_alias = "actor_id" + elif series.series.math == "unique_group" and series.series.math_group_type_index is not None: aggregation_alias = "group_key" events_query.select = [ast.Alias(alias=aggregation_alias, expr=ast.Field(chain=["aggregation_target"]))] events_query.group_by = None diff --git a/posthog/hogql_queries/insights/test/__snapshots__/test_insight_actors_query_runner.ambr b/posthog/hogql_queries/insights/test/__snapshots__/test_insight_actors_query_runner.ambr index 2d7574764ccb2..c5fe45ec08e24 100644 --- a/posthog/hogql_queries/insights/test/__snapshots__/test_insight_actors_query_runner.ambr +++ b/posthog/hogql_queries/insights/test/__snapshots__/test_insight_actors_query_runner.ambr @@ -1,202 +1,4 @@ # serializer version: 1 -# name: TestInsightActorsQueryRunner.test_insight_persons_funnels_query - ''' - SELECT name AS name - FROM - (SELECT filterable_persons.properties___name AS name - FROM - (SELECT aggregation_target AS actor_id - FROM - (SELECT aggregation_target AS aggregation_target, - steps AS steps, - avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, - median(step_1_conversion_time) AS step_1_median_conversion_time_inner - FROM - (SELECT aggregation_target AS aggregation_target, - steps AS steps, - max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, - step_1_conversion_time AS step_1_conversion_time - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - latest_1 AS latest_1, - if(and(ifNull(less(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1) AS steps, - if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time - FROM - (SELECT aggregation_target AS aggregation_target, - timestamp AS timestamp, - step_0 AS step_0, - latest_0 AS latest_0, - step_1 AS step_1, - min(latest_1) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS latest_1 - FROM - (SELECT toTimeZone(e.timestamp, 'US/Pacific') AS timestamp, - e__pdi.person_id AS aggregation_target, - if(equals(e.event, '$pageview'), 1, 0) AS step_0, - if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, - if(equals(e.event, '$pageview'), 1, 0) AS step_1, - if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 - FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'US/Pacific'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'US/Pacific')), lessOrEquals(toTimeZone(e.timestamp, 'US/Pacific'), toDateTime64('2020-01-19 23:59:59.999999', 6, 'US/Pacific'))), in(e.event, tuple('$pageview'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) - WHERE ifNull(equals(step_0, 1), 0))) - GROUP BY aggregation_target, - steps - HAVING ifNull(equals(steps, max_steps), isNull(steps) - and isNull(max_steps))) - WHERE ifNull(in(steps, [2]), 0) - ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, - query_cache_ttl=600) AS source - INNER JOIN - (SELECT person.id AS id, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'name'), ''), 'null'), '^"|"$', '') AS properties___name - FROM person - WHERE and(equals(person.team_id, 2), in(tuple(person.id, person.version), - (SELECT person.id AS id, max(person.version) AS version - FROM person - WHERE and(equals(person.team_id, 2), in(person.id, - (SELECT source.actor_id AS actor_id - FROM - (SELECT aggregation_target AS actor_id - FROM - (SELECT aggregation_target AS aggregation_target, steps AS steps, avg(step_1_conversion_time) AS step_1_average_conversion_time_inner, median(step_1_conversion_time) AS step_1_median_conversion_time_inner - FROM - (SELECT aggregation_target AS aggregation_target, steps AS steps, max(steps) OVER (PARTITION BY aggregation_target) AS max_steps, step_1_conversion_time AS step_1_conversion_time - FROM - (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, step_1 AS step_1, latest_1 AS latest_1, if(and(ifNull(less(latest_0, latest_1), 0), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), 2, 1) AS steps, if(and(isNotNull(latest_1), ifNull(lessOrEquals(latest_1, plus(toTimeZone(latest_0, 'UTC'), toIntervalDay(14))), 0)), dateDiff('second', latest_0, latest_1), NULL) AS step_1_conversion_time - FROM - (SELECT aggregation_target AS aggregation_target, timestamp AS timestamp, step_0 AS step_0, latest_0 AS latest_0, step_1 AS step_1, min(latest_1) OVER (PARTITION BY aggregation_target - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 1 PRECEDING) AS latest_1 - FROM - (SELECT toTimeZone(e.timestamp, 'US/Pacific') AS timestamp, e__pdi.person_id AS aggregation_target, if(equals(e.event, '$pageview'), 1, 0) AS step_0, if(ifNull(equals(step_0, 1), 0), timestamp, NULL) AS latest_0, if(equals(e.event, '$pageview'), 1, 0) AS step_1, if(ifNull(equals(step_1, 1), 0), timestamp, NULL) AS latest_1 - FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), and(and(greaterOrEquals(toTimeZone(e.timestamp, 'US/Pacific'), toDateTime64('2020-01-01 00:00:00.000000', 6, 'US/Pacific')), lessOrEquals(toTimeZone(e.timestamp, 'US/Pacific'), toDateTime64('2020-01-19 23:59:59.999999', 6, 'US/Pacific'))), in(e.event, tuple('$pageview'))), or(ifNull(equals(step_0, 1), 0), ifNull(equals(step_1, 1), 0))))) - WHERE ifNull(equals(step_0, 1), 0))) - GROUP BY aggregation_target, steps - HAVING ifNull(equals(steps, max_steps), isNull(steps) - and isNull(max_steps))) - WHERE ifNull(in(steps, [2]), 0) - ORDER BY aggregation_target ASC SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'US/Pacific'), person.version), plus(now64(6, 'US/Pacific'), toIntervalDay(1))), 0))))) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) - ORDER BY filterable_persons.properties___name ASC) - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288, - max_bytes_before_external_group_by=0 - ''' -# --- -# name: TestInsightActorsQueryRunner.test_insight_persons_lifecycle_query - ''' - SELECT n AS n - FROM - (SELECT filterable_persons.properties___name AS n - FROM - (SELECT DISTINCT actor_id AS actor_id - FROM - (SELECT min(events__pdi__person.created_at) AS created_at, - arraySort(groupUniqArray(toStartOfDay(toTimeZone(events.timestamp, 'US/Pacific')))) AS all_activity, - arrayPopBack(arrayPushFront(all_activity, toStartOfDay(created_at))) AS previous_activity, - arrayPopFront(arrayPushBack(all_activity, toStartOfDay(parseDateTime64BestEffortOrNull('1970-01-01 00:00:00', 6, 'US/Pacific')))) AS following_activity, - arrayMap((previous, current, index) -> if(ifNull(equals(previous, current), isNull(previous) - and isNull(current)), 'new', if(and(ifNull(equals(minus(current, toIntervalDay(1)), previous), isNull(minus(current, toIntervalDay(1))) - and isNull(previous)), ifNull(notEquals(index, 1), 1)), 'returning', 'resurrecting')), previous_activity, all_activity, arrayEnumerate(all_activity)) AS initial_status, - arrayMap((current, next) -> if(ifNull(equals(plus(current, toIntervalDay(1)), next), isNull(plus(current, toIntervalDay(1))) - and isNull(next)), '', 'dormant'), all_activity, following_activity) AS dormant_status, - arrayMap(x -> plus(x, toIntervalDay(1)), arrayFilter((current, is_dormant) -> ifNull(equals(is_dormant, 'dormant'), 0), all_activity, dormant_status)) AS dormant_periods, - arrayMap(x -> 'dormant', dormant_periods) AS dormant_label, - arrayConcat(arrayZip(all_activity, initial_status), arrayZip(dormant_periods, dormant_label)) AS temp_concat, - arrayJoin(temp_concat) AS period_status_pairs, - period_status_pairs.1 AS start_of_period, - period_status_pairs.2 AS status, - events__pdi.person_id AS actor_id - FROM events - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS events__pdi___person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) - LEFT JOIN - (SELECT argMax(toTimeZone(person.created_at, 'US/Pacific'), person.version) AS created_at, - person.id AS id - FROM person - WHERE equals(person.team_id, 2) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'US/Pacific'), person.version), plus(now64(6, 'US/Pacific'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.events__pdi___person_id, events__pdi__person.id) - WHERE and(equals(events.team_id, 2), greaterOrEquals(toTimeZone(events.timestamp, 'US/Pacific'), minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-09 00:00:00', 6, 'US/Pacific'))), toIntervalDay(1))), less(toTimeZone(events.timestamp, 'US/Pacific'), plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'US/Pacific'))), toIntervalDay(1))), equals(events.event, '$pageview')) - GROUP BY actor_id) - WHERE and(ifNull(equals(start_of_period, toStartOfDay(parseDateTime64BestEffortOrNull('2020-01-12', 6, 'US/Pacific'))), isNull(start_of_period) - and isNull(toStartOfDay(parseDateTime64BestEffortOrNull('2020-01-12', 6, 'US/Pacific')))), ifNull(equals(status, 'returning'), 0)) SETTINGS use_query_cache=1, - query_cache_ttl=600) AS source - INNER JOIN - (SELECT person.id AS id, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'name'), ''), 'null'), '^"|"$', '') AS properties___name - FROM person - WHERE and(equals(person.team_id, 2), in(tuple(person.id, person.version), - (SELECT person.id AS id, max(person.version) AS version - FROM person - WHERE and(equals(person.team_id, 2), in(person.id, - (SELECT source.actor_id AS actor_id - FROM - (SELECT DISTINCT actor_id AS actor_id - FROM - (SELECT min(events__pdi__person.created_at) AS created_at, arraySort(groupUniqArray(toStartOfDay(toTimeZone(events.timestamp, 'US/Pacific')))) AS all_activity, arrayPopBack(arrayPushFront(all_activity, toStartOfDay(created_at))) AS previous_activity, arrayPopFront(arrayPushBack(all_activity, toStartOfDay(parseDateTime64BestEffortOrNull('1970-01-01 00:00:00', 6, 'US/Pacific')))) AS following_activity, arrayMap((previous, current, index) -> if(ifNull(equals(previous, current), isNull(previous) - and isNull(current)), 'new', if(and(ifNull(equals(minus(current, toIntervalDay(1)), previous), isNull(minus(current, toIntervalDay(1))) - and isNull(previous)), ifNull(notEquals(index, 1), 1)), 'returning', 'resurrecting')), previous_activity, all_activity, arrayEnumerate(all_activity)) AS initial_status, arrayMap((current, next) -> if(ifNull(equals(plus(current, toIntervalDay(1)), next), isNull(plus(current, toIntervalDay(1))) - and isNull(next)), '', 'dormant'), all_activity, following_activity) AS dormant_status, arrayMap(x -> plus(x, toIntervalDay(1)), arrayFilter((current, is_dormant) -> ifNull(equals(is_dormant, 'dormant'), 0), all_activity, dormant_status)) AS dormant_periods, arrayMap(x -> 'dormant', dormant_periods) AS dormant_label, arrayConcat(arrayZip(all_activity, initial_status), arrayZip(dormant_periods, dormant_label)) AS temp_concat, arrayJoin(temp_concat) AS period_status_pairs, period_status_pairs.1 AS start_of_period, period_status_pairs.2 AS status, events__pdi.person_id AS actor_id - FROM events - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS events__pdi___person_id, person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS events__pdi ON equals(events.distinct_id, events__pdi.distinct_id) - LEFT JOIN - (SELECT argMax(toTimeZone(person.created_at, 'US/Pacific'), person.version) AS created_at, person.id AS id - FROM person - WHERE equals(person.team_id, 2) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'US/Pacific'), person.version), plus(now64(6, 'US/Pacific'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS events__pdi__person ON equals(events__pdi.events__pdi___person_id, events__pdi__person.id) - WHERE and(equals(events.team_id, 2), greaterOrEquals(toTimeZone(events.timestamp, 'US/Pacific'), minus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-09 00:00:00', 6, 'US/Pacific'))), toIntervalDay(1))), less(toTimeZone(events.timestamp, 'US/Pacific'), plus(toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'US/Pacific'))), toIntervalDay(1))), equals(events.event, '$pageview')) - GROUP BY actor_id) - WHERE and(ifNull(equals(start_of_period, toStartOfDay(parseDateTime64BestEffortOrNull('2020-01-12', 6, 'US/Pacific'))), isNull(start_of_period) - and isNull(toStartOfDay(parseDateTime64BestEffortOrNull('2020-01-12', 6, 'US/Pacific')))), ifNull(equals(status, 'returning'), 0)) SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'US/Pacific'), person.version), plus(now64(6, 'US/Pacific'), toIntervalDay(1))), 0))))) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) - ORDER BY filterable_persons.properties___name ASC) - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288, - max_bytes_before_external_group_by=0 - ''' -# --- # name: TestInsightActorsQueryRunner.test_insight_persons_stickiness_groups_query ''' SELECT name AS name @@ -230,64 +32,6 @@ max_bytes_before_external_group_by=0 ''' # --- -# name: TestInsightActorsQueryRunner.test_insight_persons_stickiness_query - ''' - SELECT name AS name - FROM - (SELECT filterable_persons.properties___name AS name - FROM - (SELECT aggregation_target AS actor_id - FROM - (SELECT e__pdi.person_id AS aggregation_target, - count(DISTINCT toStartOfDay(toTimeZone(e.timestamp, 'US/Pacific'))) AS num_intervals - FROM events AS e SAMPLE 1 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'US/Pacific'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-09 00:00:00', 6, 'US/Pacific')))), lessOrEquals(toTimeZone(e.timestamp, 'US/Pacific'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'US/Pacific'))), equals(e.event, '$pageview')) - GROUP BY aggregation_target) - WHERE ifNull(equals(num_intervals, 2), 0) SETTINGS use_query_cache=1, - query_cache_ttl=600) AS source - INNER JOIN - (SELECT person.id AS id, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'name'), ''), 'null'), '^"|"$', '') AS properties___name - FROM person - WHERE and(equals(person.team_id, 2), in(tuple(person.id, person.version), - (SELECT person.id AS id, max(person.version) AS version - FROM person - WHERE and(equals(person.team_id, 2), in(person.id, - (SELECT source.actor_id AS actor_id - FROM - (SELECT aggregation_target AS actor_id - FROM - (SELECT e__pdi.person_id AS aggregation_target, count(DISTINCT toStartOfDay(toTimeZone(e.timestamp, 'US/Pacific'))) AS num_intervals - FROM events AS e SAMPLE 1 - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - WHERE and(equals(e.team_id, 2), greaterOrEquals(toTimeZone(e.timestamp, 'US/Pacific'), toStartOfDay(assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-09 00:00:00', 6, 'US/Pacific')))), lessOrEquals(toTimeZone(e.timestamp, 'US/Pacific'), assumeNotNull(parseDateTime64BestEffortOrNull('2020-01-19 23:59:59', 6, 'US/Pacific'))), equals(e.event, '$pageview')) - GROUP BY aggregation_target) - WHERE ifNull(equals(num_intervals, 2), 0) SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'US/Pacific'), person.version), plus(now64(6, 'US/Pacific'), toIntervalDay(1))), 0))))) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) - ORDER BY filterable_persons.properties___name ASC) - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288, - max_bytes_before_external_group_by=0 - ''' -# --- # name: TestInsightActorsQueryRunner.test_insight_persons_trends_groups_query ''' SELECT name AS name @@ -322,150 +66,3 @@ max_bytes_before_external_group_by=0 ''' # --- -# name: TestInsightActorsQueryRunner.test_insight_persons_trends_query_with_argmaxV1 - ''' - SELECT name AS name - FROM - (SELECT filterable_persons.properties___name AS name - FROM - (SELECT actor_id AS actor_id, - count() AS event_count - FROM - (SELECT e__pdi.person_id AS actor_id, - toTimeZone(e.timestamp, 'US/Pacific') AS timestamp, - e.uuid AS uuid - FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - LEFT JOIN - (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', ''), person.version) AS properties___email, - person.id AS id - FROM person - WHERE equals(person.team_id, 2) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'US/Pacific'), person.version), plus(now64(6, 'US/Pacific'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) - WHERE and(equals(e.team_id, 2), equals(e.event, '$pageview'), ifNull(notEquals(e__pdi__person.properties___email, 'tom@posthog.com'), 1), greaterOrEquals(toTimeZone(e.timestamp, 'US/Pacific'), toDateTime64('2020-01-09 00:00:00.000000', 6, 'US/Pacific')), less(toTimeZone(e.timestamp, 'US/Pacific'), toDateTime64('2020-01-10 00:00:00.000000', 6, 'US/Pacific')))) - GROUP BY actor_id SETTINGS use_query_cache=1, - query_cache_ttl=600) AS source - INNER JOIN - (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'name'), ''), 'null'), '^"|"$', ''), person.version) AS properties___name, - person.id AS id - FROM person - WHERE and(equals(person.team_id, 2), in(id, - (SELECT source.actor_id AS actor_id - FROM - (SELECT actor_id AS actor_id, count() AS event_count - FROM - (SELECT e__pdi.person_id AS actor_id, toTimeZone(e.timestamp, 'US/Pacific') AS timestamp, e.uuid AS uuid - FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - LEFT JOIN - (SELECT argMax(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', ''), person.version) AS properties___email, person.id AS id - FROM person - WHERE equals(person.team_id, 2) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'US/Pacific'), person.version), plus(now64(6, 'US/Pacific'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) - WHERE and(equals(e.team_id, 2), equals(e.event, '$pageview'), ifNull(notEquals(e__pdi__person.properties___email, 'tom@posthog.com'), 1), greaterOrEquals(toTimeZone(e.timestamp, 'US/Pacific'), toDateTime64('2020-01-09 00:00:00.000000', 6, 'US/Pacific')), less(toTimeZone(e.timestamp, 'US/Pacific'), toDateTime64('2020-01-10 00:00:00.000000', 6, 'US/Pacific')))) - GROUP BY actor_id SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'US/Pacific'), person.version), plus(now64(6, 'US/Pacific'), toIntervalDay(1))), 0)) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) - ORDER BY filterable_persons.properties___name ASC) - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288, - max_bytes_before_external_group_by=0 - ''' -# --- -# name: TestInsightActorsQueryRunner.test_insight_persons_trends_query_with_argmaxV2 - ''' - SELECT name AS name - FROM - (SELECT filterable_persons.properties___name AS name - FROM - (SELECT actor_id AS actor_id, - count() AS event_count - FROM - (SELECT e__pdi.person_id AS actor_id, - toTimeZone(e.timestamp, 'US/Pacific') AS timestamp, - e.uuid AS uuid - FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, - argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, - person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - LEFT JOIN - (SELECT person.id AS id, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', '') AS properties___email - FROM person - WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), - (SELECT person.id AS id, max(person.version) AS version - FROM person - WHERE equals(person.team_id, 2) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'US/Pacific'), person.version), plus(now64(6, 'US/Pacific'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) - WHERE and(equals(e.team_id, 2), equals(e.event, '$pageview'), ifNull(notEquals(e__pdi__person.properties___email, 'tom@posthog.com'), 1), greaterOrEquals(toTimeZone(e.timestamp, 'US/Pacific'), toDateTime64('2020-01-09 00:00:00.000000', 6, 'US/Pacific')), less(toTimeZone(e.timestamp, 'US/Pacific'), toDateTime64('2020-01-10 00:00:00.000000', 6, 'US/Pacific')))) - GROUP BY actor_id SETTINGS use_query_cache=1, - query_cache_ttl=600) AS source - INNER JOIN - (SELECT person.id AS id, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'name'), ''), 'null'), '^"|"$', '') AS properties___name - FROM person - WHERE and(equals(person.team_id, 2), in(tuple(person.id, person.version), - (SELECT person.id AS id, max(person.version) AS version - FROM person - WHERE and(equals(person.team_id, 2), in(person.id, - (SELECT source.actor_id AS actor_id - FROM - (SELECT actor_id AS actor_id, count() AS event_count - FROM - (SELECT e__pdi.person_id AS actor_id, toTimeZone(e.timestamp, 'US/Pacific') AS timestamp, e.uuid AS uuid - FROM events AS e - INNER JOIN - (SELECT argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS person_id, argMax(person_distinct_id2.person_id, person_distinct_id2.version) AS e__pdi___person_id, person_distinct_id2.distinct_id AS distinct_id - FROM person_distinct_id2 - WHERE equals(person_distinct_id2.team_id, 2) - GROUP BY person_distinct_id2.distinct_id - HAVING ifNull(equals(argMax(person_distinct_id2.is_deleted, person_distinct_id2.version), 0), 0) SETTINGS optimize_aggregation_in_order=1) AS e__pdi ON equals(e.distinct_id, e__pdi.distinct_id) - LEFT JOIN - (SELECT person.id AS id, replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(person.properties, 'email'), ''), 'null'), '^"|"$', '') AS properties___email - FROM person - WHERE and(equals(person.team_id, 2), ifNull(in(tuple(person.id, person.version), - (SELECT person.id AS id, max(person.version) AS version - FROM person - WHERE equals(person.team_id, 2) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'US/Pacific'), person.version), plus(now64(6, 'US/Pacific'), toIntervalDay(1))), 0)))), 0)) SETTINGS optimize_aggregation_in_order=1) AS e__pdi__person ON equals(e__pdi.e__pdi___person_id, e__pdi__person.id) - WHERE and(equals(e.team_id, 2), equals(e.event, '$pageview'), ifNull(notEquals(e__pdi__person.properties___email, 'tom@posthog.com'), 1), greaterOrEquals(toTimeZone(e.timestamp, 'US/Pacific'), toDateTime64('2020-01-09 00:00:00.000000', 6, 'US/Pacific')), less(toTimeZone(e.timestamp, 'US/Pacific'), toDateTime64('2020-01-10 00:00:00.000000', 6, 'US/Pacific')))) - GROUP BY actor_id SETTINGS use_query_cache=1, query_cache_ttl=600) AS source))) - GROUP BY person.id - HAVING and(ifNull(equals(argMax(person.is_deleted, person.version), 0), 0), ifNull(less(argMax(toTimeZone(person.created_at, 'US/Pacific'), person.version), plus(now64(6, 'US/Pacific'), toIntervalDay(1))), 0))))) SETTINGS optimize_aggregation_in_order=1) AS filterable_persons ON equals(filterable_persons.id, source.actor_id) - ORDER BY filterable_persons.properties___name ASC) - LIMIT 100 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1, - format_csv_allow_double_quotes=0, - max_ast_elements=1000000, - max_expanded_ast_elements=1000000, - max_query_size=524288, - max_bytes_before_external_group_by=0 - ''' -# ---