From 3caeab0d428e9ef2b4485bfe53eb87b6dc4fc628 Mon Sep 17 00:00:00 2001 From: Sandy Spicer Date: Wed, 3 Jul 2024 07:57:56 -0700 Subject: [PATCH] fix: strip uuid in schema_helpers with the other fe only fields (#23376) --- .../test_clickhouse_experiments.ambr | 206 +++--------------- posthog/schema_helpers.py | 4 + posthog/test/test_schema_helpers.py | 10 + 3 files changed, 46 insertions(+), 174 deletions(-) diff --git a/ee/clickhouse/views/test/__snapshots__/test_clickhouse_experiments.ambr b/ee/clickhouse/views/test/__snapshots__/test_clickhouse_experiments.ambr index 072402ffe0638..370df601365f6 100644 --- a/ee/clickhouse/views/test/__snapshots__/test_clickhouse_experiments.ambr +++ b/ee/clickhouse/views/test/__snapshots__/test_clickhouse_experiments.ambr @@ -441,97 +441,26 @@ # --- # name: ClickhouseTestFunnelExperimentResults.test_experiment_flow_with_event_results_for_three_test_variants.1 ''' - /* user_id:0 request:_snapshot_ */ - SELECT array(replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', '')) AS value, - count(*) as count - FROM events e - WHERE team_id = 2 - AND event IN ['$pageleave', '$pageview'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 + /* celery:posthog.tasks.tasks.sync_insight_caching_state */ + SELECT team_id, + date_diff('second', max(timestamp), now()) AS age + FROM events + WHERE timestamp > date_sub(DAY, 3, now()) + AND timestamp < now() + GROUP BY team_id + ORDER BY age; ''' # --- # name: ClickhouseTestFunnelExperimentResults.test_experiment_flow_with_event_results_for_three_test_variants.2 ''' - /* user_id:0 request:_snapshot_ */ - SELECT countIf(steps = 1) step_1, - countIf(steps = 2) step_2, - avg(step_1_average_conversion_time_inner) step_1_average_conversion_time, - median(step_1_median_conversion_time_inner) step_1_median_conversion_time, - prop - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - prop - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target, - prop) as max_steps, - step_1_conversion_time , - prop - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 , - if(has([[''], ['test_1'], ['test'], ['control'], ['unknown_3'], ['unknown_2'], ['unknown_1'], ['test_2']], prop), prop, ['Other']) as prop - FROM - (SELECT *, - if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, ['']) as prop - FROM - (SELECT e.timestamp as timestamp, - pdi.person_id as aggregation_target, - pdi.person_id as person_id, - if(event = '$pageview', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = '$pageleave', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - array(replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', '')) AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, notEmpty(arrayFilter(x -> notEmpty(x), prop))) over (PARTITION by aggregation_target) as prop_vals - FROM events e - INNER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 2 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 2 - AND event IN ['$pageleave', '$pageview'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 2 - AND event IN ['$pageleave', '$pageview'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') - AND (step_0 = 1 - OR step_1 = 1) ))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps, - prop - HAVING steps = max_steps) - GROUP BY prop + /* celery:posthog.tasks.tasks.sync_insight_caching_state */ + SELECT team_id, + date_diff('second', max(timestamp), now()) AS age + FROM events + WHERE timestamp > date_sub(DAY, 3, now()) + AND timestamp < now() + GROUP BY team_id + ORDER BY age; ''' # --- # name: ClickhouseTestFunnelExperimentResults.test_experiment_flow_with_event_results_for_three_test_variants.3 @@ -655,97 +584,26 @@ # --- # name: ClickhouseTestFunnelExperimentResults.test_experiment_flow_with_event_results_with_hogql_aggregation.1 ''' - /* user_id:0 request:_snapshot_ */ - SELECT array(replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', '')) AS value, - count(*) as count - FROM events e - WHERE team_id = 2 - AND event IN ['$pageleave', '$pageview'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') - GROUP BY value - ORDER BY count DESC, value DESC - LIMIT 26 - OFFSET 0 + /* celery:posthog.tasks.tasks.sync_insight_caching_state */ + SELECT team_id, + date_diff('second', max(timestamp), now()) AS age + FROM events + WHERE timestamp > date_sub(DAY, 3, now()) + AND timestamp < now() + GROUP BY team_id + ORDER BY age; ''' # --- # name: ClickhouseTestFunnelExperimentResults.test_experiment_flow_with_event_results_with_hogql_aggregation.2 ''' - /* user_id:0 request:_snapshot_ */ - SELECT countIf(steps = 1) step_1, - countIf(steps = 2) step_2, - avg(step_1_average_conversion_time_inner) step_1_average_conversion_time, - median(step_1_median_conversion_time_inner) step_1_median_conversion_time, - prop - FROM - (SELECT aggregation_target, - steps, - avg(step_1_conversion_time) step_1_average_conversion_time_inner, - median(step_1_conversion_time) step_1_median_conversion_time_inner , - prop - FROM - (SELECT aggregation_target, - steps, - max(steps) over (PARTITION BY aggregation_target, - prop) as max_steps, - step_1_conversion_time , - prop - FROM - (SELECT *, - if(latest_0 <= latest_1 - AND latest_1 <= latest_0 + INTERVAL 14 DAY, 2, 1) AS steps , - if(isNotNull(latest_1) - AND latest_1 <= latest_0 + INTERVAL 14 DAY, dateDiff('second', toDateTime(latest_0), toDateTime(latest_1)), NULL) step_1_conversion_time, - prop - FROM - (SELECT aggregation_target, timestamp, step_0, - latest_0, - step_1, - min(latest_1) over (PARTITION by aggregation_target, - prop - ORDER BY timestamp DESC ROWS BETWEEN UNBOUNDED PRECEDING AND 0 PRECEDING) latest_1 , - if(has([['test'], ['control'], ['']], prop), prop, ['Other']) as prop - FROM - (SELECT *, - if(notEmpty(arrayFilter(x -> notEmpty(x), prop_vals)), prop_vals, ['']) as prop - FROM - (SELECT e.timestamp as timestamp, - replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(properties, '$account_id'), ''), 'null'), '^"|"$', '') as aggregation_target, - pdi.person_id as person_id, - if(event = '$pageview', 1, 0) as step_0, - if(step_0 = 1, timestamp, null) as latest_0, - if(event = '$pageleave', 1, 0) as step_1, - if(step_1 = 1, timestamp, null) as latest_1, - array(replaceRegexpAll(JSONExtractRaw(properties, '$feature/a-b-test'), '^"|"$', '')) AS prop_basic, - prop_basic as prop, - argMinIf(prop, timestamp, notEmpty(arrayFilter(x -> notEmpty(x), prop))) over (PARTITION by aggregation_target) as prop_vals - FROM events e - INNER JOIN - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 2 - AND distinct_id IN - (SELECT distinct_id - FROM events - WHERE team_id = 2 - AND event IN ['$pageleave', '$pageview'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') ) - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) AS pdi ON e.distinct_id = pdi.distinct_id - WHERE team_id = 2 - AND event IN ['$pageleave', '$pageview'] - AND toTimeZone(timestamp, 'UTC') >= toDateTime('2020-01-01 00:00:00', 'UTC') - AND toTimeZone(timestamp, 'UTC') <= toDateTime('2020-01-06 00:00:00', 'UTC') - AND (step_0 = 1 - OR step_1 = 1) ))) - WHERE step_0 = 1 )) - GROUP BY aggregation_target, - steps, - prop - HAVING steps = max_steps) - GROUP BY prop + /* celery:posthog.tasks.tasks.sync_insight_caching_state */ + SELECT team_id, + date_diff('second', max(timestamp), now()) AS age + FROM events + WHERE timestamp > date_sub(DAY, 3, now()) + AND timestamp < now() + GROUP BY team_id + ORDER BY age; ''' # --- # name: ClickhouseTestFunnelExperimentResults.test_experiment_flow_with_event_results_with_hogql_aggregation.3 diff --git a/posthog/schema_helpers.py b/posthog/schema_helpers.py index a3a96fec062cc..356e49c98a063 100644 --- a/posthog/schema_helpers.py +++ b/posthog/schema_helpers.py @@ -74,6 +74,10 @@ def serialize_query(self, next_serializer): ] } + for key in ("targetEntity", "returningEntity"): + if key in dumped[insightFilterKey] and "uuid" in dumped[insightFilterKey][key]: + del dumped[insightFilterKey][key]["uuid"] + # use a canonical value for each display category if "display" in dumped[insightFilterKey]: canonical_display = grouped_chart_display_types(dumped[insightFilterKey]["display"]) diff --git a/posthog/test/test_schema_helpers.py b/posthog/test/test_schema_helpers.py index a0ca3ec5d789d..a7a7303f57a20 100644 --- a/posthog/test/test_schema_helpers.py +++ b/posthog/test/test_schema_helpers.py @@ -17,6 +17,7 @@ StepOrderValue, BreakdownAttributionType, TrendsQuery, + RetentionQuery, ) from posthog.schema_helpers import to_dict @@ -91,6 +92,15 @@ def test_serializes_insight_filter_without_frontend_only_props(self): self.assertEqual(result_dict, {"kind": "TrendsQuery", "series": []}) + def test_serializes_retention_filter_without_frontend_only_props(self): + query = RetentionQuery(**{"retentionFilter": {"targetEntity": {"uuid": "1"}, "returningEntity": {"uuid": "2"}}}) + + result_dict = to_dict(query) + + self.assertEqual( + result_dict, {"kind": "RetentionQuery", "retentionFilter": {"targetEntity": {}, "returningEntity": {}}} + ) + def test_serializes_display_with_canonic_alternatives(self): # time series (gets removed as ActionsLineGraph is the default) query = TrendsQuery(**{**base_trends, "trendsFilter": {"display": "ActionsAreaGraph"}})