From 905ac6b3469fd3d1018b1453498a81de405f12cd Mon Sep 17 00:00:00 2001 From: Daniel Bachhuber Date: Fri, 13 Dec 2024 12:47:47 -0800 Subject: [PATCH 1/6] Restore ability to filter out internal and test users --- .../experiments/Metrics/PrimaryGoalTrends.tsx | 80 ++++++++----------- .../test_experiment_trends_query_runner.py | 44 ++++++++-- 2 files changed, 70 insertions(+), 54 deletions(-) diff --git a/frontend/src/scenes/experiments/Metrics/PrimaryGoalTrends.tsx b/frontend/src/scenes/experiments/Metrics/PrimaryGoalTrends.tsx index 70e41c599abba..0ce1cb72e33da 100644 --- a/frontend/src/scenes/experiments/Metrics/PrimaryGoalTrends.tsx +++ b/frontend/src/scenes/experiments/Metrics/PrimaryGoalTrends.tsx @@ -24,10 +24,6 @@ export function PrimaryGoalTrends(): JSX.Element { const metricIdx = 0 const currentMetric = experiment.metrics[metricIdx] as ExperimentTrendsQuery - // :FLAG: CLEAN UP AFTER MIGRATION - const isDataWarehouseMetric = - featureFlags[FEATURE_FLAGS.EXPERIMENTS_HOGQL] && - currentMetric.count_query.series[0].kind === NodeKind.DataWarehouseNode return ( <> @@ -63,18 +59,10 @@ export function PrimaryGoalTrends(): JSX.Element { MathAvailability.All ) - if (series[0].kind === NodeKind.DataWarehouseNode) { - setTrendsMetric({ - metricIdx, - series, - filterTestAccounts: false, - }) - } else { - setTrendsMetric({ - metricIdx, - series, - }) - } + setTrendsMetric({ + metricIdx, + series, + }) } else { if (actions?.length) { setExperiment({ @@ -113,37 +101,35 @@ export function PrimaryGoalTrends(): JSX.Element { showNumericalPropsOnly={true} {...commonActionFilterProps} /> - {!isDataWarehouseMetric && ( -
- { - // :FLAG: CLEAN UP AFTER MIGRATION - if (featureFlags[FEATURE_FLAGS.EXPERIMENTS_HOGQL]) { - const val = currentMetric.count_query?.filterTestAccounts - return hasFilters ? !!val : false - } - return hasFilters ? !!experiment.filters.filter_test_accounts : false - })()} - onChange={(checked: boolean) => { - // :FLAG: CLEAN UP AFTER MIGRATION - if (featureFlags[FEATURE_FLAGS.EXPERIMENTS_HOGQL]) { - setTrendsMetric({ - metricIdx, - filterTestAccounts: checked, - }) - } else { - setExperiment({ - filters: { - ...experiment.filters, - filter_test_accounts: checked, - }, - }) - } - }} - fullWidth - /> -
- )} +
+ { + // :FLAG: CLEAN UP AFTER MIGRATION + if (featureFlags[FEATURE_FLAGS.EXPERIMENTS_HOGQL]) { + const val = currentMetric.count_query?.filterTestAccounts + return hasFilters ? !!val : false + } + return hasFilters ? !!experiment.filters.filter_test_accounts : false + })()} + onChange={(checked: boolean) => { + // :FLAG: CLEAN UP AFTER MIGRATION + if (featureFlags[FEATURE_FLAGS.EXPERIMENTS_HOGQL]) { + setTrendsMetric({ + metricIdx, + filterTestAccounts: checked, + }) + } else { + setExperiment({ + filters: { + ...experiment.filters, + filter_test_accounts: checked, + }, + }) + } + }} + fullWidth + /> +
{isExperimentRunning && ( Preview insights are generated based on {EXPERIMENT_DEFAULT_DURATION} days of data. This can cause a diff --git a/posthog/hogql_queries/experiments/test/test_experiment_trends_query_runner.py b/posthog/hogql_queries/experiments/test/test_experiment_trends_query_runner.py index cc18a27bd702d..39b7a821d36aa 100644 --- a/posthog/hogql_queries/experiments/test/test_experiment_trends_query_runner.py +++ b/posthog/hogql_queries/experiments/test/test_experiment_trends_query_runner.py @@ -17,7 +17,7 @@ OBJECT_STORAGE_SECRET_ACCESS_KEY, XDIST_SUFFIX, ) -from posthog.test.base import APIBaseTest, ClickhouseTestMixin, _create_event, flush_persons_and_events +from posthog.test.base import APIBaseTest, ClickhouseTestMixin, _create_event, _create_person, flush_persons_and_events from freezegun import freeze_time from typing import cast from django.utils import timezone @@ -198,10 +198,12 @@ def create_data_warehouse_table_with_usage(self): path_to_s3_object = "s3://" + OBJECT_STORAGE_BUCKET + f"/{TEST_BUCKET}" - id = pa.array(["1", "2", "3", "4", "5"]) - date = pa.array(["2023-01-01", "2023-01-02", "2023-01-03", "2023-01-06", "2023-01-07"]) - user_id = pa.array(["user_control_0", "user_test_1", "user_test_2", "user_test_3", "user_extra"]) - usage = pa.array([1000, 500, 750, 800, 900]) + id = pa.array(["1", "2", "3", "4", "5", "6"]) + date = pa.array(["2023-01-01", "2023-01-02", "2023-01-03", "2023-01-04", "2023-01-06", "2023-01-07"]) + user_id = pa.array( + ["user_control_0", "user_test_1", "user_test_2", "internal_test_1", "user_test_3", "user_extra"] + ) + usage = pa.array([1000, 500, 750, 100000, 800, 900]) names = ["id", "ds", "userid", "usage"] pq.write_to_dataset( @@ -244,6 +246,15 @@ def create_data_warehouse_table_with_usage(self): field_name="events", configuration={"experiments_optimized": True, "experiments_timestamp_key": "ds"}, ) + + DataWarehouseJoin.objects.create( + team=self.team, + source_table_name=table_name, + source_table_key="userid", + joining_table_name="persons", + joining_table_key="properties.$user_id", + field_name="person", + ) return table_name @freeze_time("2020-01-01T12:00:00Z") @@ -816,7 +827,8 @@ def test_query_runner_with_data_warehouse_series_no_end_date_and_nested_id(self) math_property="usage", math_property_type="data_warehouse_properties", ) - ] + ], + filterTestAccounts=True, ) exposure_query = TrendsQuery(series=[EventsNode(event="$feature_flag_called")]) @@ -846,6 +858,24 @@ def test_query_runner_with_data_warehouse_series_no_end_date_and_nested_id(self) timestamp=datetime(2023, 1, i + 1), ) + _create_person( + team=self.team, + distinct_ids=["internal_test_1"], + properties={"$email": "internal_test_1@posthog.com"}, + ) + + _create_event( + team=self.team, + event="$feature_flag_called", + distinct_id="internal_test_1", + properties={ + feature_flag_property: "test", + "$feature_flag_response": "test", + "$feature_flag": feature_flag.key, + }, + timestamp=datetime(2023, 1, 3), + ) + # "user_test_3" first exposure (feature_flag_property="control") is on 2023-01-03 # "user_test_3" relevant exposure (feature_flag_property="test") is on 2023-01-04 # "user_test_3" other event (feature_flag_property="control" is on 2023-01-05 @@ -906,7 +936,7 @@ def test_query_runner_with_data_warehouse_series_no_end_date_and_nested_id(self) ) # Assert the expected join condition in the clickhouse SQL - expected_join_condition = f"and(equals(events.team_id, {query_runner.count_query_runner.team.id}), equals(event, %(hogql_val_8)s), greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_9)s, 6, %(hogql_val_10)s))), lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_11)s, 6, %(hogql_val_12)s))))) AS e__events ON" + expected_join_condition = f"and(equals(events.team_id, {query_runner.count_query_runner.team.id}), equals(event, %(hogql_val_12)s), greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_13)s, 6, %(hogql_val_14)s))), lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_15)s, 6, %(hogql_val_16)s))))) AS e__events ON" self.assertIn(expected_join_condition, str(response.clickhouse)) result = query_runner.calculate() From a705f476fc9eecaff968093609c039da2ea4f4be Mon Sep 17 00:00:00 2001 From: Daniel Bachhuber Date: Fri, 13 Dec 2024 12:56:47 -0800 Subject: [PATCH 2/6] Get to a properly failing test case --- .../experiments/test/test_experiment_trends_query_runner.py | 1 + 1 file changed, 1 insertion(+) diff --git a/posthog/hogql_queries/experiments/test/test_experiment_trends_query_runner.py b/posthog/hogql_queries/experiments/test/test_experiment_trends_query_runner.py index 39b7a821d36aa..600eb85ee0bc0 100644 --- a/posthog/hogql_queries/experiments/test/test_experiment_trends_query_runner.py +++ b/posthog/hogql_queries/experiments/test/test_experiment_trends_query_runner.py @@ -872,6 +872,7 @@ def test_query_runner_with_data_warehouse_series_no_end_date_and_nested_id(self) feature_flag_property: "test", "$feature_flag_response": "test", "$feature_flag": feature_flag.key, + "$user_id": "internal_test_1", }, timestamp=datetime(2023, 1, 3), ) From 725f3bcb5ac88227a7c3f4ca7994842768e26e89 Mon Sep 17 00:00:00 2001 From: Daniel Bachhuber Date: Fri, 13 Dec 2024 12:58:43 -0800 Subject: [PATCH 3/6] Bit more data that would be helpful --- .../experiments/test/test_experiment_trends_query_runner.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/posthog/hogql_queries/experiments/test/test_experiment_trends_query_runner.py b/posthog/hogql_queries/experiments/test/test_experiment_trends_query_runner.py index 600eb85ee0bc0..f7b7592907198 100644 --- a/posthog/hogql_queries/experiments/test/test_experiment_trends_query_runner.py +++ b/posthog/hogql_queries/experiments/test/test_experiment_trends_query_runner.py @@ -861,7 +861,7 @@ def test_query_runner_with_data_warehouse_series_no_end_date_and_nested_id(self) _create_person( team=self.team, distinct_ids=["internal_test_1"], - properties={"$email": "internal_test_1@posthog.com"}, + properties={"$email": "internal_test_1@posthog.com", "$user_id": "internal_test_1"}, ) _create_event( From 69ff3bf68059cf372e177a2c28bc3675cff2b997 Mon Sep 17 00:00:00 2001 From: Daniel Bachhuber Date: Fri, 13 Dec 2024 13:27:21 -0800 Subject: [PATCH 4/6] Fix test data again --- .../test/test_experiment_trends_query_runner.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/posthog/hogql_queries/experiments/test/test_experiment_trends_query_runner.py b/posthog/hogql_queries/experiments/test/test_experiment_trends_query_runner.py index f7b7592907198..65f857b5129ac 100644 --- a/posthog/hogql_queries/experiments/test/test_experiment_trends_query_runner.py +++ b/posthog/hogql_queries/experiments/test/test_experiment_trends_query_runner.py @@ -815,6 +815,15 @@ def test_query_runner_with_data_warehouse_series_no_end_date_and_nested_id(self) feature_flag_property = f"$feature/{feature_flag.key}" + self.team.test_account_filters = [ + { + "key": "email", + "value": "@posthog.com", + "operator": "not_icontains", + "type": "person", + } + ] + self.team.save() count_query = TrendsQuery( series=[ DataWarehouseNode( @@ -830,7 +839,7 @@ def test_query_runner_with_data_warehouse_series_no_end_date_and_nested_id(self) ], filterTestAccounts=True, ) - exposure_query = TrendsQuery(series=[EventsNode(event="$feature_flag_called")]) + exposure_query = TrendsQuery(series=[EventsNode(event="$feature_flag_called")], filterTestAccounts=True) experiment_query = ExperimentTrendsQuery( experiment_id=experiment.id, @@ -861,7 +870,7 @@ def test_query_runner_with_data_warehouse_series_no_end_date_and_nested_id(self) _create_person( team=self.team, distinct_ids=["internal_test_1"], - properties={"$email": "internal_test_1@posthog.com", "$user_id": "internal_test_1"}, + properties={"email": "internal_test_1@posthog.com", "$user_id": "internal_test_1"}, ) _create_event( From 9fb42106efccfb8f6904a2924feea0e81cedf6cc Mon Sep 17 00:00:00 2001 From: Daniel Bachhuber Date: Mon, 16 Dec 2024 04:18:56 -0800 Subject: [PATCH 5/6] Provide a helpful message when the tests fail --- .../test/test_experiment_trends_query_runner.py | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/posthog/hogql_queries/experiments/test/test_experiment_trends_query_runner.py b/posthog/hogql_queries/experiments/test/test_experiment_trends_query_runner.py index 65f857b5129ac..0b345bed0a931 100644 --- a/posthog/hogql_queries/experiments/test/test_experiment_trends_query_runner.py +++ b/posthog/hogql_queries/experiments/test/test_experiment_trends_query_runner.py @@ -947,7 +947,11 @@ def test_query_runner_with_data_warehouse_series_no_end_date_and_nested_id(self) # Assert the expected join condition in the clickhouse SQL expected_join_condition = f"and(equals(events.team_id, {query_runner.count_query_runner.team.id}), equals(event, %(hogql_val_12)s), greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_13)s, 6, %(hogql_val_14)s))), lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_15)s, 6, %(hogql_val_16)s))))) AS e__events ON" - self.assertIn(expected_join_condition, str(response.clickhouse)) + self.assertIn( + expected_join_condition, + str(response.clickhouse), + "Please check to make sure the timestamp statements are included in the ASOF LEFT JOIN select statement. This may also fail if the placeholder numbers have changed.", + ) result = query_runner.calculate() @@ -1044,7 +1048,11 @@ def test_query_runner_with_data_warehouse_series_expected_query(self): # Assert the expected join condition in the clickhouse SQL expected_join_condition = f"and(equals(events.team_id, {query_runner.count_query_runner.team.id}), equals(event, %(hogql_val_7)s), greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_8)s, 6, %(hogql_val_9)s))), lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_10)s, 6, %(hogql_val_11)s))))) AS e__events ON" - self.assertIn(expected_join_condition, str(response.clickhouse)) + self.assertIn( + expected_join_condition, + str(response.clickhouse), + "Please check to make sure the timestamp statements are included in the ASOF LEFT JOIN select statement. This may also fail if the placeholder numbers have changed.", + ) result = query_runner.calculate() From 5ca3fd30d6e45e9000823e15679cede5139ddbd2 Mon Sep 17 00:00:00 2001 From: Daniel Bachhuber Date: Mon, 16 Dec 2024 05:12:49 -0800 Subject: [PATCH 6/6] Add a set of assertions for `filterTestAccounts=False` --- .../test_experiment_trends_query_runner.py | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) diff --git a/posthog/hogql_queries/experiments/test/test_experiment_trends_query_runner.py b/posthog/hogql_queries/experiments/test/test_experiment_trends_query_runner.py index 0b345bed0a931..bba1366c33418 100644 --- a/posthog/hogql_queries/experiments/test/test_experiment_trends_query_runner.py +++ b/posthog/hogql_queries/experiments/test/test_experiment_trends_query_runner.py @@ -979,6 +979,65 @@ def test_query_runner_with_data_warehouse_series_no_end_date_and_nested_id(self) [0.0, 500.0, 1250.0, 1250.0, 1250.0, 2050.0, 2050.0, 2050.0, 2050.0, 2050.0], ) + # Run the query again with filter_test_accounts=False + # as a point of comparison to above + count_query = TrendsQuery( + series=[ + DataWarehouseNode( + id=table_name, + distinct_id_field="userid", + id_field="id", + table_name=table_name, + timestamp_field="ds", + math="avg", + math_property="usage", + math_property_type="data_warehouse_properties", + ) + ], + filterTestAccounts=False, + ) + exposure_query = TrendsQuery(series=[EventsNode(event="$feature_flag_called")], filterTestAccounts=False) + + experiment_query = ExperimentTrendsQuery( + experiment_id=experiment.id, + kind="ExperimentTrendsQuery", + count_query=count_query, + exposure_query=exposure_query, + ) + + experiment.metrics = [{"type": "primary", "query": experiment_query.model_dump()}] + experiment.save() + + query_runner = ExperimentTrendsQueryRunner( + query=ExperimentTrendsQuery(**experiment.metrics[0]["query"]), team=self.team + ) + with freeze_time("2023-01-07"): + result = query_runner.calculate() + + trend_result = cast(ExperimentTrendsQueryResponse, result) + + self.assertEqual(len(result.variants), 2) + + control_result = next(variant for variant in trend_result.variants if variant.key == "control") + test_result = next(variant for variant in trend_result.variants if variant.key == "test") + + control_insight = next(variant for variant in trend_result.insight if variant["breakdown_value"] == "control") + test_insight = next(variant for variant in trend_result.insight if variant["breakdown_value"] == "test") + + self.assertEqual(control_result.count, 1000) + self.assertEqual(test_result.count, 102050) + self.assertEqual(control_result.absolute_exposure, 1) + self.assertEqual(test_result.absolute_exposure, 4) + + self.assertEqual( + control_insight["data"][:10], + [1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0, 1000.0], + ) + self.assertEqual( + test_insight["data"][:10], + [0.0, 500.0, 1250.0, 101250.0, 101250.0, 102050.0, 102050.0, 102050.0, 102050.0, 102050.0], + ) + def test_query_runner_with_data_warehouse_series_expected_query(self): table_name = self.create_data_warehouse_table_with_payments()