-
Notifications
You must be signed in to change notification settings - Fork 1.4k
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
feat(experiments): Restore internal user filter for dw #26911
Changes from 4 commits
905ac6b
a705f47
725f3bc
69ff3bf
864dee5
9fb4210
5ca3fd3
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -17,7 +17,7 @@ | |
OBJECT_STORAGE_SECRET_ACCESS_KEY, | ||
XDIST_SUFFIX, | ||
) | ||
from posthog.test.base import APIBaseTest, ClickhouseTestMixin, _create_event, flush_persons_and_events | ||
from posthog.test.base import APIBaseTest, ClickhouseTestMixin, _create_event, _create_person, flush_persons_and_events | ||
from freezegun import freeze_time | ||
from typing import cast | ||
from django.utils import timezone | ||
|
@@ -198,10 +198,12 @@ def create_data_warehouse_table_with_usage(self): | |
|
||
path_to_s3_object = "s3://" + OBJECT_STORAGE_BUCKET + f"/{TEST_BUCKET}" | ||
|
||
id = pa.array(["1", "2", "3", "4", "5"]) | ||
date = pa.array(["2023-01-01", "2023-01-02", "2023-01-03", "2023-01-06", "2023-01-07"]) | ||
user_id = pa.array(["user_control_0", "user_test_1", "user_test_2", "user_test_3", "user_extra"]) | ||
usage = pa.array([1000, 500, 750, 800, 900]) | ||
id = pa.array(["1", "2", "3", "4", "5", "6"]) | ||
date = pa.array(["2023-01-01", "2023-01-02", "2023-01-03", "2023-01-04", "2023-01-06", "2023-01-07"]) | ||
user_id = pa.array( | ||
["user_control_0", "user_test_1", "user_test_2", "internal_test_1", "user_test_3", "user_extra"] | ||
) | ||
usage = pa.array([1000, 500, 750, 100000, 800, 900]) | ||
names = ["id", "ds", "userid", "usage"] | ||
|
||
pq.write_to_dataset( | ||
|
@@ -244,6 +246,15 @@ def create_data_warehouse_table_with_usage(self): | |
field_name="events", | ||
configuration={"experiments_optimized": True, "experiments_timestamp_key": "ds"}, | ||
) | ||
|
||
DataWarehouseJoin.objects.create( | ||
team=self.team, | ||
source_table_name=table_name, | ||
source_table_key="userid", | ||
joining_table_name="persons", | ||
joining_table_key="properties.$user_id", | ||
field_name="person", | ||
) | ||
return table_name | ||
|
||
@freeze_time("2020-01-01T12:00:00Z") | ||
|
@@ -804,6 +815,15 @@ def test_query_runner_with_data_warehouse_series_no_end_date_and_nested_id(self) | |
|
||
feature_flag_property = f"$feature/{feature_flag.key}" | ||
|
||
self.team.test_account_filters = [ | ||
{ | ||
"key": "email", | ||
"value": "@posthog.com", | ||
"operator": "not_icontains", | ||
"type": "person", | ||
} | ||
] | ||
self.team.save() | ||
count_query = TrendsQuery( | ||
series=[ | ||
DataWarehouseNode( | ||
|
@@ -816,9 +836,10 @@ def test_query_runner_with_data_warehouse_series_no_end_date_and_nested_id(self) | |
math_property="usage", | ||
math_property_type="data_warehouse_properties", | ||
) | ||
] | ||
], | ||
filterTestAccounts=True, | ||
) | ||
exposure_query = TrendsQuery(series=[EventsNode(event="$feature_flag_called")]) | ||
exposure_query = TrendsQuery(series=[EventsNode(event="$feature_flag_called")], filterTestAccounts=True) | ||
|
||
experiment_query = ExperimentTrendsQuery( | ||
experiment_id=experiment.id, | ||
|
@@ -846,6 +867,25 @@ def test_query_runner_with_data_warehouse_series_no_end_date_and_nested_id(self) | |
timestamp=datetime(2023, 1, i + 1), | ||
) | ||
|
||
_create_person( | ||
team=self.team, | ||
distinct_ids=["internal_test_1"], | ||
properties={"email": "[email protected]", "$user_id": "internal_test_1"}, | ||
) | ||
|
||
_create_event( | ||
team=self.team, | ||
event="$feature_flag_called", | ||
distinct_id="internal_test_1", | ||
properties={ | ||
feature_flag_property: "test", | ||
"$feature_flag_response": "test", | ||
"$feature_flag": feature_flag.key, | ||
"$user_id": "internal_test_1", | ||
}, | ||
timestamp=datetime(2023, 1, 3), | ||
) | ||
|
||
# "user_test_3" first exposure (feature_flag_property="control") is on 2023-01-03 | ||
# "user_test_3" relevant exposure (feature_flag_property="test") is on 2023-01-04 | ||
# "user_test_3" other event (feature_flag_property="control" is on 2023-01-05 | ||
|
@@ -906,7 +946,7 @@ def test_query_runner_with_data_warehouse_series_no_end_date_and_nested_id(self) | |
) | ||
|
||
# Assert the expected join condition in the clickhouse SQL | ||
expected_join_condition = f"and(equals(events.team_id, {query_runner.count_query_runner.team.id}), equals(event, %(hogql_val_8)s), greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_9)s, 6, %(hogql_val_10)s))), lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_11)s, 6, %(hogql_val_12)s))))) AS e__events ON" | ||
expected_join_condition = f"and(equals(events.team_id, {query_runner.count_query_runner.team.id}), equals(event, %(hogql_val_12)s), greaterOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_13)s, 6, %(hogql_val_14)s))), lessOrEquals(timestamp, assumeNotNull(parseDateTime64BestEffortOrNull(%(hogql_val_15)s, 6, %(hogql_val_16)s))))) AS e__events ON" | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Just thinking out loud - how fragile is it to have a test set up like this, where we rely on checking the generated Clickhouse SQL? This could break if the hogql_val names change, even though it wouldn't actually affect our functionality. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Correct: it does break when the I added a more descriptive exception message for anyone who runs into it though: 9fb4210 |
||
self.assertIn(expected_join_condition, str(response.clickhouse)) | ||
|
||
result = query_runner.calculate() | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I’d prefer a clearer test to explicitly verify that this event is excluded from the returned data. It might be better to have a separate method specifically for testing the filtering of test users.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
How might you suggest constructing the test?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Something like:
My point is: we’re creating a test user and an event generated by them. Am I missing something, or what exactly are we doing with this user and event later on? How are we ensuring that this event is excluded from the results?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
It's inferred. These values will change significantly if the event is included in results:
https://github.com/PostHog/posthog/pull/26911/files#diff-36098f224702c73f16f7fce4f65403a8d9f840e079324042dc4dabeac1176f4dL924-R971
The way I went about writing the test was:
filterTestAccounts=True
statement so the test is passing with the original conditions again.I think what you suggest is marginally more explicit, but could also be unexpectedly erroneous in some other ways.
I updated the test to also run the query with
filterTestAccounts=false
and assert the results: 5ca3fd3There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Sounds good 👍