diff --git a/posthog/hogql/test/utils.py b/posthog/hogql/test/utils.py index 2d2e2f4d27a33..305cdbd369cc5 100644 --- a/posthog/hogql/test/utils.py +++ b/posthog/hogql/test/utils.py @@ -5,6 +5,8 @@ from pydantic import BaseModel +from posthog.test.base import clean_varying_query_parts + def pretty_print_in_tests(query: str, team_id: int) -> str: query = ( @@ -27,7 +29,7 @@ def pretty_print_response_in_tests(response: Any, team_id: int) -> str: clickhouse = response.clickhouse hogql = response.hogql query = "-- ClickHouse\n" + clickhouse + "\n\n-- HogQL\n" + hogql - return pretty_print_in_tests(query, team_id) + return clean_varying_query_parts(pretty_print_in_tests(query, team_id), False) def pretty_dataclasses(obj, seen=None, indent=0): diff --git a/posthog/hogql/transforms/test/__snapshots__/test_in_cohort.ambr b/posthog/hogql/transforms/test/__snapshots__/test_in_cohort.ambr index 06e46e0ca30b0..e4eec90a1a7d7 100644 --- a/posthog/hogql/transforms/test/__snapshots__/test_in_cohort.ambr +++ b/posthog/hogql/transforms/test/__snapshots__/test_in_cohort.ambr @@ -7,8 +7,8 @@ FROM events LEFT JOIN ( SELECT cohortpeople.person_id AS cohort_person_id, 1 AS matched, cohortpeople.cohort_id AS cohort_id FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 420), equals(cohortpeople.cohort_id, XX), equals(cohortpeople.version, 0))) AS __in_cohort ON equals(__in_cohort.cohort_person_id, events.person_id) - WHERE and(equals(events.team_id, 420), and(1, equals(events.event, %(hogql_val_0)s)), ifNull(equals(__in_cohort.matched, 1), 0)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, XX), equals(cohortpeople.version, 0))) AS __in_cohort ON equals(__in_cohort.cohort_person_id, events.person_id) + WHERE and(equals(events.team_id, 2), and(1, equals(events.event, %(hogql_val_0)s)), ifNull(equals(__in_cohort.matched, 1), 0)) LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1, format_csv_allow_double_quotes=0, max_ast_elements=4000000, max_expanded_ast_elements=4000000, max_bytes_before_external_group_by=0 @@ -31,8 +31,8 @@ FROM events LEFT JOIN ( SELECT person_static_cohort.person_id AS cohort_person_id, 1 AS matched, person_static_cohort.cohort_id AS cohort_id FROM person_static_cohort - WHERE and(equals(person_static_cohort.team_id, 420), in(person_static_cohort.cohort_id, [6]))) AS __in_cohort ON equals(__in_cohort.cohort_person_id, events.person_id) - WHERE and(equals(events.team_id, 420), 1, ifNull(equals(__in_cohort.matched, 1), 0)) + WHERE and(equals(person_static_cohort.team_id, 2), in(person_static_cohort.cohort_id, [1, 2, 3, 4, 5 /* ... */]))) AS __in_cohort ON equals(__in_cohort.cohort_person_id, events.person_id) + WHERE and(equals(events.team_id, 2), 1, ifNull(equals(__in_cohort.matched, 1), 0)) LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1, format_csv_allow_double_quotes=0, max_ast_elements=4000000, max_expanded_ast_elements=4000000, max_bytes_before_external_group_by=0 @@ -42,7 +42,7 @@ FROM events LEFT JOIN ( SELECT person_id AS cohort_person_id, 1 AS matched, cohort_id FROM static_cohort_people - WHERE in(cohort_id, [6])) AS __in_cohort ON equals(__in_cohort.cohort_person_id, person_id) + WHERE in(cohort_id, [2])) AS __in_cohort ON equals(__in_cohort.cohort_person_id, person_id) WHERE and(1, equals(__in_cohort.matched, 1)) LIMIT 100 ''' @@ -55,8 +55,8 @@ FROM events LEFT JOIN ( SELECT person_static_cohort.person_id AS cohort_person_id, 1 AS matched, person_static_cohort.cohort_id AS cohort_id FROM person_static_cohort - WHERE and(equals(person_static_cohort.team_id, 420), in(person_static_cohort.cohort_id, [7]))) AS __in_cohort ON equals(__in_cohort.cohort_person_id, events.person_id) - WHERE and(equals(events.team_id, 420), 1, ifNull(equals(__in_cohort.matched, 1), 0)) + WHERE and(equals(person_static_cohort.team_id, 2), in(person_static_cohort.cohort_id, [1, 2, 3, 4, 5 /* ... */]))) AS __in_cohort ON equals(__in_cohort.cohort_person_id, events.person_id) + WHERE and(equals(events.team_id, 2), 1, ifNull(equals(__in_cohort.matched, 1), 0)) LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1, format_csv_allow_double_quotes=0, max_ast_elements=4000000, max_expanded_ast_elements=4000000, max_bytes_before_external_group_by=0 @@ -66,7 +66,7 @@ FROM events LEFT JOIN ( SELECT person_id AS cohort_person_id, 1 AS matched, cohort_id FROM static_cohort_people - WHERE in(cohort_id, [7])) AS __in_cohort ON equals(__in_cohort.cohort_person_id, person_id) + WHERE in(cohort_id, [3])) AS __in_cohort ON equals(__in_cohort.cohort_person_id, person_id) WHERE and(1, equals(__in_cohort.matched, 1)) LIMIT 100 ''' @@ -79,10 +79,10 @@ FROM events LEFT JOIN ( SELECT cohortpeople.person_id AS person_id, 1 AS matched FROM cohortpeople - WHERE and(equals(cohortpeople.team_id, 420), equals(cohortpeople.cohort_id, XX)) + WHERE and(equals(cohortpeople.team_id, 2), equals(cohortpeople.cohort_id, XX)) GROUP BY cohortpeople.person_id, cohortpeople.cohort_id, cohortpeople.version HAVING ifNull(greater(sum(cohortpeople.sign), 0), 0)) AS in_cohort__XX ON equals(in_cohort__XX.person_id, events.person_id) - WHERE and(equals(events.team_id, 420), ifNull(equals(in_cohort__XX.matched, 1), 0), equals(events.event, %(hogql_val_0)s)) + WHERE and(equals(events.team_id, 2), ifNull(equals(in_cohort__XX.matched, 1), 0), equals(events.event, %(hogql_val_0)s)) LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1, format_csv_allow_double_quotes=0, max_ast_elements=4000000, max_expanded_ast_elements=4000000, max_bytes_before_external_group_by=0 @@ -107,8 +107,8 @@ FROM events LEFT JOIN ( SELECT person_static_cohort.person_id AS person_id, 1 AS matched FROM person_static_cohort - WHERE and(equals(person_static_cohort.team_id, 420), equals(person_static_cohort.cohort_id, XX))) AS in_cohort__XX ON equals(in_cohort__XX.person_id, events.person_id) - WHERE and(equals(events.team_id, 420), ifNull(equals(in_cohort__XX.matched, 1), 0)) + WHERE and(equals(person_static_cohort.team_id, 2), equals(person_static_cohort.cohort_id, XX))) AS in_cohort__XX ON equals(in_cohort__XX.person_id, events.person_id) + WHERE and(equals(events.team_id, 2), ifNull(equals(in_cohort__XX.matched, 1), 0)) LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1, format_csv_allow_double_quotes=0, max_ast_elements=4000000, max_expanded_ast_elements=4000000, max_bytes_before_external_group_by=0 @@ -131,8 +131,8 @@ FROM events LEFT JOIN ( SELECT person_static_cohort.person_id AS person_id, 1 AS matched FROM person_static_cohort - WHERE and(equals(person_static_cohort.team_id, 420), equals(person_static_cohort.cohort_id, XX))) AS in_cohort__XX ON equals(in_cohort__XX.person_id, events.person_id) - WHERE and(equals(events.team_id, 420), ifNull(equals(in_cohort__XX.matched, 1), 0)) + WHERE and(equals(person_static_cohort.team_id, 2), equals(person_static_cohort.cohort_id, XX))) AS in_cohort__XX ON equals(in_cohort__XX.person_id, events.person_id) + WHERE and(equals(events.team_id, 2), ifNull(equals(in_cohort__XX.matched, 1), 0)) LIMIT 100 SETTINGS readonly=2, max_execution_time=60, allow_experimental_object_type=1, format_csv_allow_double_quotes=0, max_ast_elements=4000000, max_expanded_ast_elements=4000000, max_bytes_before_external_group_by=0 diff --git a/posthog/test/base.py b/posthog/test/base.py index 326a1f00b5674..b1d135969eb99 100644 --- a/posthog/test/base.py +++ b/posthog/test/base.py @@ -105,6 +105,166 @@ unittest.util._MAX_LENGTH = 2000 # type: ignore +def clean_varying_query_parts(query, replace_all_numbers): + # :TRICKY: team_id changes every test, avoid it messing with snapshots. + if replace_all_numbers: + query = re.sub(r"(\"?) = \d+", r"\1 = 2", query) + query = re.sub(r"(\"?) IN \(\d+(, ?\d+)*\)", r"\1 IN (1, 2, 3, 4, 5 /* ... */)", query) + query = re.sub(r"(\"?) IN \[\d+(, ?\d+)*\]", r"\1 IN [1, 2, 3, 4, 5 /* ... */]", query) + # replace "uuid" IN ('00000000-0000-4000-8000-000000000001'::uuid) effectively: + query = re.sub( + r"\"uuid\" IN \('[0-9a-f-]{36}'(::uuid)?(, '[0-9a-f-]{36}'(::uuid)?)*\)", + r""""uuid" IN ('00000000-0000-0000-0000-000000000000'::uuid, '00000000-0000-0000-0000-000000000001'::uuid /* ... */)\n""", + query, + ) + + else: + query = re.sub(r"(team|cohort)_id(\"?) = \d+", r"\1_id\2 = 2", query) + query = re.sub(r"\d+ as (team|cohort)_id(\"?)", r"2 as \1_id\2", query) + # feature flag conditions use primary keys as columns in queries, so replace those always + query = re.sub(r"flag_\d+_condition", r"flag_X_condition", query) + query = re.sub(r"flag_\d+_super_condition", r"flag_X_super_condition", query) + # replace django cursors + query = re.sub(r"_django_curs_[0-9sync_]*\"", r'_django_curs_X"', query) + # hog ql checks some ids differently + query = re.sub( + r"equals\(([^.]+\.)?(team_id|cohort_id)?, \d+\)", + r"equals(\1\2, 2)", + query, + ) + # replace survey uuids + # replace arrays like "survey_id in ['017e12ef-9c00-0000-59bf-43ddb0bddea6', '017e12ef-9c00-0001-6df6-2cf1f217757f']" + query = re.sub( + r"survey_id in \['[0-9a-f-]{36}'(, '[0-9a-f-]{36}')*\]", + r"survey_id in ['00000000-0000-0000-0000-000000000000', '00000000-0000-0000-0000-000000000001' /* ... */]", + query, + ) + # replace arrays like "survey_id in ['017e12ef-9c00-0000-59bf-43ddb0bddea6', '017e12ef-9c00-0001-6df6-2cf1f217757f']" + query = re.sub( + r"\"posthog_survey_actions\".\"survey_id\" IN \('[^']+'::uuid, '[^']+'::uuid\)", + r"'posthog_survey_actions'.'survey_id' IN ('00000000-0000-0000-0000-000000000000'::uuid, '00000000-0000-0000-0000-000000000001'::uuid)", + query, + ) + # replace session uuids + # replace arrays like "in(s.session_id, ['ea376ce0-d365-4c75-8015-0407e71a1a28'])" + query = re.sub( + r"in\((?:s\.)?session_id, \['[0-9a-f-]{36}'(, '[0-9a-f-]{36}')*\]\)", + r"in(s.session_id, ['00000000-0000-0000-0000-000000000000', '00000000-0000-0000-0000-000000000001' /* ... */]", + query, + ) + #### Cohort replacements + # replace cohort id lists in queries too + query = re.sub( + r"in\(([^,]+\.?cohort_id), \[(\d+(, ?\d+)*)]\)", + r"in(\1, [1, 2, 3, 4, 5 /* ... */])", + query, + ) + # replace explicit timestamps in cohort queries + query = re.sub(r"timestamp > '20\d\d-\d\d-\d\d \d\d:\d\d:\d\d'", r"timestamp > 'explicit_timestamp'", query) + # replace cohort generated conditions + query = re.sub( + r"_condition_\d+_level", + r"_condition_X_level", + query, + ) + # replace cohort tuples + # like (tuple(cohortpeople.cohort_id, cohortpeople.version), [(35, 0)]) + query = re.sub( + r"\(tuple\((.*)\.cohort_id, (.*)\.version\), \[\(\d+, \d+\)\]\)", + r"(tuple(\1.cohort_id, \2.version), [(2, 0)])", + query, + ) + #### Cohort replacements end + # Replace organization_id and notebook_id lookups, for postgres + query = re.sub( + rf"""("organization_id"|"posthog_organization"\."id"|"posthog_notebook"."id") = '[^']+'::uuid""", + r"""\1 = '00000000-0000-0000-0000-000000000000'::uuid""", + query, + ) + query = re.sub( + rf"""("organization_id"|"posthog_organization"\."id"|"posthog_notebook"."id") IN \('[^']+'::uuid\)""", + r"""\1 IN ('00000000-0000-0000-0000-000000000000'::uuid)""", + query, + ) + # Replace notebook short_id lookups, for postgres + query = re.sub( + r"\"posthog_notebook\".\"short_id\" = '[a-zA-Z0-9]{8}'", + '"posthog_notebook"."short_id" = \'00000000\'', + query, + ) + # Replace person id (when querying session recording replay events) + query = re.sub( + "and person_id = '[0-9A-Fa-f]{8}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{12}'", + r"AND person_id = '00000000-0000-0000-0000-000000000000'", + query, + flags=re.IGNORECASE, + ) + # HogQL person id in session recording queries + # ifNull(equals(s__pdi.person_id, '0176be33-0398-0091-ec89-570d7768f2f4'), 0)) + # ifNull(equals(person_distinct_ids__person.id, '0176be33-0398-000c-0772-f78c97593bdd'), 0)))) + # equals(events.person_id, '0176be33-0398-0060-abed-8da43384e020') + query = re.sub( + r"equals\(([^.]+[._])?person.id, '[0-9a-f-]{36}'\)", + r"equals(\1person_id, '00000000-0000-0000-0000-000000000000')", + query, + ) + # equals(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), '0176be33-0398-0090-a0e7-7cd9139f8089') + query = re.sub( + r"events__override.person_id, events.person_id\), '[0-9a-f-]{36}'\)", + r"events__override.person_id, events.person_id), '00000000-0000-0000-0000-000000000000')", + query, + ) + query = re.sub( + "and current_person_id = '[0-9A-Fa-f]{8}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{12}'", + r"AND current_person_id = '00000000-0000-0000-0000-000000000000'", + query, + flags=re.IGNORECASE, + ) + # Replace tag id lookups for postgres + query = re.sub( + rf"""("posthog_tag"\."id") IN \(('[^']+'::uuid)+(, ('[^']+'::uuid)+)*\)""", + r"""\1 IN ('00000000-0000-0000-0000-000000000000'::uuid, '00000000-0000-0000-0000-000000000000'::uuid, '00000000-0000-0000-0000-000000000000'::uuid /* ... */)""", + query, + ) + query = re.sub( + rf"""user_id:([0-9]+) request:[a-zA-Z0-9-_]+""", + r"""user_id:0 request:_snapshot_""", + query, + ) + query = re.sub( + rf"""user_id:([0-9]+)""", + r"""user_id:0""", + query, + ) + # ee license check has varying datetime + # e.g. WHERE "ee_license"."valid_until" >= '2023-03-02T21:13:59.298031+00:00'::timestamptz + query = re.sub( + r"ee_license\"\.\"valid_until\" >= '\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d.\d{6}\+\d\d:\d\d'::timestamptz", + '"ee_license"."valid_until">=\'LICENSE-TIMESTAMP\'::timestamptz"', + query, + ) + # insight cache key varies with team id + query = re.sub( + r"WHERE \(\"posthog_insightcachingstate\".\"cache_key\" = 'cache_\w{32}'", + """WHERE ("posthog_insightcachingstate"."cache_key" = 'cache_THE_CACHE_KEY'""", + query, + ) + # replace Savepoint numbers + query = re.sub(r"SAVEPOINT \".+\"", "SAVEPOINT _snapshot_", query) + # test_formula has some values that change on every run + query = re.sub( + r"\SELECT \[\d+, \d+] as breakdown_value", + "SELECT [1, 2] as breakdown_value", + query, + ) + query = re.sub( + r"SELECT distinct_id,[\n\r\s]+\d+ as value", + "SELECT distinct_id, 1 as value", + query, + ) + return query + + def _setup_test_data(klass): klass.organization = Organization.objects.create(name=klass.CONFIG_ORGANIZATION_NAME) klass.project = Project.objects.create(id=Team.objects.increment_id_sequence(), organization=klass.organization) @@ -507,184 +667,7 @@ class QueryMatchingTest: # :NOTE: Update snapshots by passing --snapshot-update to bin/tests def assertQueryMatchesSnapshot(self, query, params=None, replace_all_numbers=False): - # :TRICKY: team_id changes every test, avoid it messing with snapshots. - if replace_all_numbers: - query = re.sub(r"(\"?) = \d+", r"\1 = 2", query) - query = re.sub(r"(\"?) IN \(\d+(, ?\d+)*\)", r"\1 IN (1, 2, 3, 4, 5 /* ... */)", query) - query = re.sub(r"(\"?) IN \[\d+(, ?\d+)*\]", r"\1 IN [1, 2, 3, 4, 5 /* ... */]", query) - # replace "uuid" IN ('00000000-0000-4000-8000-000000000001'::uuid) effectively: - query = re.sub( - r"\"uuid\" IN \('[0-9a-f-]{36}'(::uuid)?(, '[0-9a-f-]{36}'(::uuid)?)*\)", - r""""uuid" IN ('00000000-0000-0000-0000-000000000000'::uuid, '00000000-0000-0000-0000-000000000001'::uuid /* ... */)\n""", - query, - ) - - else: - query = re.sub(r"(team|cohort)_id(\"?) = \d+", r"\1_id\2 = 2", query) - query = re.sub(r"\d+ as (team|cohort)_id(\"?)", r"2 as \1_id\2", query) - - # feature flag conditions use primary keys as columns in queries, so replace those always - query = re.sub(r"flag_\d+_condition", r"flag_X_condition", query) - query = re.sub(r"flag_\d+_super_condition", r"flag_X_super_condition", query) - - # replace django cursors - query = re.sub(r"_django_curs_[0-9sync_]*\"", r'_django_curs_X"', query) - - # hog ql checks some ids differently - query = re.sub( - r"equals\(([^.]+\.)?(team_id|cohort_id)?, \d+\)", - r"equals(\1\2, 2)", - query, - ) - - # replace survey uuids - # replace arrays like "survey_id in ['017e12ef-9c00-0000-59bf-43ddb0bddea6', '017e12ef-9c00-0001-6df6-2cf1f217757f']" - query = re.sub( - r"survey_id in \['[0-9a-f-]{36}'(, '[0-9a-f-]{36}')*\]", - r"survey_id in ['00000000-0000-0000-0000-000000000000', '00000000-0000-0000-0000-000000000001' /* ... */]", - query, - ) - - # replace arrays like "survey_id in ['017e12ef-9c00-0000-59bf-43ddb0bddea6', '017e12ef-9c00-0001-6df6-2cf1f217757f']" - query = re.sub( - r"\"posthog_survey_actions\".\"survey_id\" IN \('[^']+'::uuid, '[^']+'::uuid\)", - r"'posthog_survey_actions'.'survey_id' IN ('00000000-0000-0000-0000-000000000000'::uuid, '00000000-0000-0000-0000-000000000001'::uuid)", - query, - ) - - # replace session uuids - # replace arrays like "in(s.session_id, ['ea376ce0-d365-4c75-8015-0407e71a1a28'])" - query = re.sub( - r"in\((?:s\.)?session_id, \['[0-9a-f-]{36}'(, '[0-9a-f-]{36}')*\]\)", - r"in(s.session_id, ['00000000-0000-0000-0000-000000000000', '00000000-0000-0000-0000-000000000001' /* ... */]", - query, - ) - - #### Cohort replacements - # replace cohort id lists in queries too - query = re.sub( - r"in\(([^,]+\.?cohort_id), \[(\d+(, ?\d+)*)]\)", - r"in(\1, [1, 2, 3, 4, 5 /* ... */])", - query, - ) - # replace explicit timestamps in cohort queries - query = re.sub(r"timestamp > '20\d\d-\d\d-\d\d \d\d:\d\d:\d\d'", r"timestamp > 'explicit_timestamp'", query) - - # replace cohort generated conditions - query = re.sub( - r"_condition_\d+_level", - r"_condition_X_level", - query, - ) - - # replace cohort tuples - # like (tuple(cohortpeople.cohort_id, cohortpeople.version), [(35, 0)]) - query = re.sub( - r"\(tuple\((.*)\.cohort_id, (.*)\.version\), \[\(\d+, \d+\)\]\)", - r"(tuple(\1.cohort_id, \2.version), [(2, 0)])", - query, - ) - - #### Cohort replacements end - - # Replace organization_id and notebook_id lookups, for postgres - query = re.sub( - rf"""("organization_id"|"posthog_organization"\."id"|"posthog_notebook"."id") = '[^']+'::uuid""", - r"""\1 = '00000000-0000-0000-0000-000000000000'::uuid""", - query, - ) - query = re.sub( - rf"""("organization_id"|"posthog_organization"\."id"|"posthog_notebook"."id") IN \('[^']+'::uuid\)""", - r"""\1 IN ('00000000-0000-0000-0000-000000000000'::uuid)""", - query, - ) - - # Replace notebook short_id lookups, for postgres - query = re.sub( - r"\"posthog_notebook\".\"short_id\" = '[a-zA-Z0-9]{8}'", - '"posthog_notebook"."short_id" = \'00000000\'', - query, - ) - - # Replace person id (when querying session recording replay events) - query = re.sub( - "and person_id = '[0-9A-Fa-f]{8}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{12}'", - r"AND person_id = '00000000-0000-0000-0000-000000000000'", - query, - flags=re.IGNORECASE, - ) - - # HogQL person id in session recording queries - # ifNull(equals(s__pdi.person_id, '0176be33-0398-0091-ec89-570d7768f2f4'), 0)) - # ifNull(equals(person_distinct_ids__person.id, '0176be33-0398-000c-0772-f78c97593bdd'), 0)))) - # equals(events.person_id, '0176be33-0398-0060-abed-8da43384e020') - query = re.sub( - r"equals\(([^.]+[._])?person.id, '[0-9a-f-]{36}'\)", - r"equals(\1person_id, '00000000-0000-0000-0000-000000000000')", - query, - ) - - # equals(if(not(empty(events__override.distinct_id)), events__override.person_id, events.person_id), '0176be33-0398-0090-a0e7-7cd9139f8089') - query = re.sub( - r"events__override.person_id, events.person_id\), '[0-9a-f-]{36}'\)", - r"events__override.person_id, events.person_id), '00000000-0000-0000-0000-000000000000')", - query, - ) - - query = re.sub( - "and current_person_id = '[0-9A-Fa-f]{8}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{4}-[0-9A-Fa-f]{12}'", - r"AND current_person_id = '00000000-0000-0000-0000-000000000000'", - query, - flags=re.IGNORECASE, - ) - - # Replace tag id lookups for postgres - query = re.sub( - rf"""("posthog_tag"\."id") IN \(('[^']+'::uuid)+(, ('[^']+'::uuid)+)*\)""", - r"""\1 IN ('00000000-0000-0000-0000-000000000000'::uuid, '00000000-0000-0000-0000-000000000000'::uuid, '00000000-0000-0000-0000-000000000000'::uuid /* ... */)""", - query, - ) - - query = re.sub( - rf"""user_id:([0-9]+) request:[a-zA-Z0-9-_]+""", - r"""user_id:0 request:_snapshot_""", - query, - ) - query = re.sub( - rf"""user_id:([0-9]+)""", - r"""user_id:0""", - query, - ) - - # ee license check has varying datetime - # e.g. WHERE "ee_license"."valid_until" >= '2023-03-02T21:13:59.298031+00:00'::timestamptz - query = re.sub( - r"ee_license\"\.\"valid_until\" >= '\d{4}-\d\d-\d\dT\d\d:\d\d:\d\d.\d{6}\+\d\d:\d\d'::timestamptz", - '"ee_license"."valid_until">=\'LICENSE-TIMESTAMP\'::timestamptz"', - query, - ) - - # insight cache key varies with team id - query = re.sub( - r"WHERE \(\"posthog_insightcachingstate\".\"cache_key\" = 'cache_\w{32}'", - """WHERE ("posthog_insightcachingstate"."cache_key" = 'cache_THE_CACHE_KEY'""", - query, - ) - - # replace Savepoint numbers - query = re.sub(r"SAVEPOINT \".+\"", "SAVEPOINT _snapshot_", query) - - # test_formula has some values that change on every run - query = re.sub( - r"\SELECT \[\d+, \d+] as breakdown_value", - "SELECT [1, 2] as breakdown_value", - query, - ) - query = re.sub( - r"SELECT distinct_id,[\n\r\s]+\d+ as value", - "SELECT distinct_id, 1 as value", - query, - ) + query = clean_varying_query_parts(query, replace_all_numbers) assert sqlparse.format(query, reindent=True) == self.snapshot, "\n".join(self.snapshot.get_assert_diff()) if params is not None: