From a2e8c7f8c3852765b805bfdabe1738ee66ff0284 Mon Sep 17 00:00:00 2001 From: Paul D'Ambra Date: Thu, 21 Dec 2023 17:40:27 +0000 Subject: [PATCH 1/7] fix: re2 dot all flag for non-hogql filters --- posthog/models/property/util.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/posthog/models/property/util.py b/posthog/models/property/util.py index c67a3d8cf3154..4d7d129615193 100644 --- a/posthog/models/property/util.py +++ b/posthog/models/property/util.py @@ -489,7 +489,13 @@ def prop_filter_json_extract( params = { "k{}_{}".format(prepend, idx): prop.key, - "v{}_{}".format(prepend, idx): prop.value, + # we follow re2 regex syntax and so does ClickHouse **except** + # For example, the string a\nb shouldn't match the pattern a.b, but it does in CH + # this is because According to the re2 docs, the s flag is false by default, + # but in CH it seems to be true by default. + # prepending (?-s) to the regex string will make it work as expected + # see https://github.com/ClickHouse/ClickHouse/issues/34603 + "v{}_{}".format(prepend, idx): f"(?-s){prop.value}", } return ( From 1bde0fda7ae8365d2a1e3f5fc9d5121030325217 Mon Sep 17 00:00:00 2001 From: Paul D'Ambra Date: Thu, 21 Dec 2023 17:43:35 +0000 Subject: [PATCH 2/7] add for hogql here? --- posthog/hogql/property.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/posthog/hogql/property.py b/posthog/hogql/property.py index ce4ea3bdfe14f..e65fe204cd216 100644 --- a/posthog/hogql/property.py +++ b/posthog/hogql/property.py @@ -202,12 +202,18 @@ def property_to_expr( left=field, right=ast.Constant(value=f"%{value}%"), ) + # we follow re2 regex syntax and so does ClickHouse **except** + # For example, the string a\nb shouldn't match the pattern a.b, but it does in CH + # this is because According to the re2 docs, the s flag is false by default, + # but in CH it seems to be true by default. + # prepending (?-s) to the regex string will make it work as expected + # see https://github.com/ClickHouse/ClickHouse/issues/34603 elif operator == PropertyOperator.regex: - return ast.Call(name="match", args=[field, ast.Constant(value=value)]) + return ast.Call(name="match", args=[field, ast.Constant(value=f"(?-s){value}")]) elif operator == PropertyOperator.not_regex: return ast.Call( name="not", - args=[ast.Call(name="match", args=[field, ast.Constant(value=value)])], + args=[ast.Call(name="match", args=[field, ast.Constant(value=f"(?-s){value}")])], ) elif operator == PropertyOperator.exact or operator == PropertyOperator.is_date_exact: op = ast.CompareOperationOp.Eq From c756fdac3af20eee4e03c12ecf650abee4e3dfaf Mon Sep 17 00:00:00 2001 From: Paul D'Ambra Date: Thu, 21 Dec 2023 18:19:17 +0000 Subject: [PATCH 3/7] some tests --- .../test/__snapshots__/test_property.ambr | 153 ------------------ .../api/test/__snapshots__/test_query.ambr | 26 +++ posthog/api/test/test_query.py | 57 +++++++ posthog/hogql/test/test_property.py | 6 +- 4 files changed, 86 insertions(+), 156 deletions(-) delete mode 100644 ee/clickhouse/models/test/__snapshots__/test_property.ambr diff --git a/ee/clickhouse/models/test/__snapshots__/test_property.ambr b/ee/clickhouse/models/test/__snapshots__/test_property.ambr deleted file mode 100644 index cc8e77f83a0dc..0000000000000 --- a/ee/clickhouse/models/test/__snapshots__/test_property.ambr +++ /dev/null @@ -1,153 +0,0 @@ -# name: TestPropFormat.test_parse_groups - ' - SELECT uuid - FROM events - WHERE team_id = 2 - AND ((has(['val_1'], replaceRegexpAll(JSONExtractRaw(properties, 'attr_1'), '^"|"$', '')) - AND has(['val_2'], replaceRegexpAll(JSONExtractRaw(properties, 'attr_2'), '^"|"$', ''))) - OR (has(['val_2'], replaceRegexpAll(JSONExtractRaw(properties, 'attr_1'), '^"|"$', '')))) - ' ---- -# name: TestPropFormat.test_parse_groups_persons - ' - SELECT uuid - FROM events - WHERE team_id = 2 - AND ((distinct_id IN - (SELECT distinct_id - FROM - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 2 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) - WHERE person_id IN - (SELECT id - FROM - (SELECT id, - argMax(properties, person._timestamp) as properties, - max(is_deleted) as is_deleted - FROM person - WHERE team_id = 2 - GROUP BY id - HAVING is_deleted = 0) - WHERE has(['1@posthog.com'], replaceRegexpAll(JSONExtractRaw(properties, 'email'), '^"|"$', '')) ) )) - OR (distinct_id IN - (SELECT distinct_id - FROM - (SELECT distinct_id, - argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 2 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0) - WHERE person_id IN - (SELECT id - FROM - (SELECT id, - argMax(properties, person._timestamp) as properties, - max(is_deleted) as is_deleted - FROM person - WHERE team_id = 2 - GROUP BY id - HAVING is_deleted = 0) - WHERE has(['2@posthog.com'], replaceRegexpAll(JSONExtractRaw(properties, 'email'), '^"|"$', '')) ) ))) - ' ---- -# name: test_parse_groups_persons_edge_case_with_single_filter - ( - 'AND ( has(%(vglobalperson_0)s, replaceRegexpAll(JSONExtractRaw(person_props, %(kglobalperson_0)s), \'^"|"$\', \'\')))', - { - 'kglobalperson_0': 'email', - 'vglobalperson_0': [ - '1@posthog.com', - ], - }, - ) ---- -# name: test_parse_prop_clauses_defaults - ( - ' - AND ( has(%(vglobal_0)s, replaceRegexpAll(JSONExtractRaw(properties, %(kglobal_0)s), '^"|"$', '')) AND distinct_id IN ( - SELECT distinct_id - FROM ( - - SELECT distinct_id, argMax(person_id, version) as person_id - FROM person_distinct_id2 - WHERE team_id = 1 - GROUP BY distinct_id - HAVING argMax(is_deleted, version) = 0 - - ) - WHERE person_id IN - ( - SELECT id - FROM ( - SELECT id, argMax(properties, person._timestamp) as properties, max(is_deleted) as is_deleted - FROM person - WHERE team_id = %(team_id)s - GROUP BY id - HAVING is_deleted = 0 - ) - WHERE replaceRegexpAll(JSONExtractRaw(properties, %(kglobalperson_1)s), '^"|"$', '') ILIKE %(vglobalperson_1)s - ) - )) - ', - { - 'kglobal_0': 'event_prop', - 'kglobalperson_1': 'email', - 'vglobal_0': [ - 'value', - ], - 'vglobalperson_1': '%posthog%', - }, - ) ---- -# name: test_parse_prop_clauses_defaults.1 - ( - 'AND ( has(%(vglobal_0)s, replaceRegexpAll(JSONExtractRaw(properties, %(kglobal_0)s), \'^"|"$\', \'\')) AND replaceRegexpAll(JSONExtractRaw(person_props, %(kglobalperson_1)s), \'^"|"$\', \'\') ILIKE %(vglobalperson_1)s)', - { - 'kglobal_0': 'event_prop', - 'kglobalperson_1': 'email', - 'vglobal_0': [ - 'value', - ], - 'vglobalperson_1': '%posthog%', - }, - ) ---- -# name: test_parse_prop_clauses_defaults.2 - ( - 'AND ( has(%(vglobal_0)s, replaceRegexpAll(JSONExtractRaw(properties, %(kglobal_0)s), \'^"|"$\', \'\')) AND argMax(person."pmat_email", version) ILIKE %(vpersonquery_global_1)s)', - { - 'kglobal_0': 'event_prop', - 'kpersonquery_global_1': 'email', - 'vglobal_0': [ - 'value', - ], - 'vpersonquery_global_1': '%posthog%', - }, - ) ---- -# name: test_parse_prop_clauses_funnel_step_element_prepend_regression - ( - 'AND ( (match(elements_chain, %(PREPEND__text_0_attributes_regex)s)))', - { - 'PREPEND__text_0_attributes_regex': '(text="Insights1")', - }, - ) ---- -# name: test_parse_prop_clauses_precalculated_cohort - ( - ' - AND ( pdi.person_id IN ( - SELECT DISTINCT person_id FROM cohortpeople WHERE team_id = %(team_id)s AND cohort_id = %(global_cohort_id_0)s AND version = %(global_version_0)s - )) - ', - { - 'global_cohort_id_0': 47, - 'global_version_0': None, - }, - ) ---- diff --git a/posthog/api/test/__snapshots__/test_query.ambr b/posthog/api/test/__snapshots__/test_query.ambr index 8d9a9a5c2c4c8..e2f4d71d39a1f 100644 --- a/posthog/api/test/__snapshots__/test_query.ambr +++ b/posthog/api/test/__snapshots__/test_query.ambr @@ -100,6 +100,32 @@ allow_experimental_object_type=1 ' --- +# name: TestQuery.test_event_property_regex_is_patched_for_dotall_setting + ' + /* user_id:0 request:_snapshot_ */ + SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'example_value'), ''), 'null'), '^"|"$', '') AS example_value + FROM events + WHERE and(equals(events.team_id, 2), match(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'example_value'), ''), 'null'), '^"|"$', ''), '(?-s)a.b'), equals(events.event, 'demonstrate dot all'), less(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-01-10 12:14:05.000000', 6, 'UTC')), greater(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-01-09 12:14:00.000000', 6, 'UTC'))) + ORDER BY replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'example_value'), ''), 'null'), '^"|"$', '') ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- +# name: TestQuery.test_event_property_regex_is_patched_for_dotall_setting_materialized + ' + /* user_id:0 request:_snapshot_ */ + SELECT nullIf(nullIf(events.mat_example_value, ''), 'null') AS example_value + FROM events + WHERE and(equals(events.team_id, 2), match(nullIf(nullIf(events.mat_example_value, ''), 'null'), '(?-s)a.b'), equals(events.event, 'demonstrate dot all'), less(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-01-10 12:14:05.000000', 6, 'UTC')), greater(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-01-09 12:14:00.000000', 6, 'UTC'))) + ORDER BY nullIf(nullIf(events.mat_example_value, ''), 'null') ASC + LIMIT 101 + OFFSET 0 SETTINGS readonly=2, + max_execution_time=60, + allow_experimental_object_type=1 + ' +--- # name: TestQuery.test_events_query_all_time_date ' /* user_id:0 request:_snapshot_ */ diff --git a/posthog/api/test/test_query.py b/posthog/api/test/test_query.py index d8bdb746a67b4..f46ebf3903a9e 100644 --- a/posthog/api/test/test_query.py +++ b/posthog/api/test/test_query.py @@ -277,6 +277,63 @@ def test_event_property_filter(self): response = self.client.post(f"/api/projects/{self.team.id}/query/", {"query": query.dict()}).json() self.assertEqual(len(response["results"]), 1) + @also_test_with_materialized_columns(event_properties=["example_value"]) + @snapshot_clickhouse_queries + def test_event_property_regex_is_patched_for_dotall_setting(self): + with freeze_time("2020-01-10 12:00:00"): + _create_person( + properties={"email": "tom@posthog.com"}, + distinct_ids=["2", "some-random-uid"], + team=self.team, + immediate=True, + ) + _create_event( + team=self.team, + event="demonstrate dot all", + distinct_id="2", + properties={ + "example_value": """a +b""" + }, + ) + with freeze_time("2020-01-10 12:11:00"): + _create_event( + team=self.team, + event="demonstrate dot all", + distinct_id="2", + # should match /a.b/ + properties={"example_value": "aab"}, + ) + with freeze_time("2020-01-10 12:11:00"): + _create_event( + team=self.team, + event="demonstrate dot all", + distinct_id="2", + # should match /a.b/ + properties={"example_value": "abb"}, + ) + with freeze_time("2020-01-10 12:11:00"): + _create_event( + team=self.team, + event="demonstrate dot all", + distinct_id="2", + # won't match /a.b/ + properties={"example_value": "abc"}, + ) + flush_persons_and_events() + + with freeze_time("2020-01-10 12:14:00"): + query = EventsQuery( + event="demonstrate dot all", + select=[ + "properties.example_value", + ], + properties=[{"key": "example_value", "value": "a.b", "operator": "regex", "type": "event"}], + ) + response = self.client.post(f"/api/projects/{self.team.id}/query/", {"query": query.dict()}).json() + # assert [x[0] for x in response["results"]] == ['aab', 'abb'] + assert [x[0] for x in response["results"]] == [] + @also_test_with_materialized_columns(event_properties=["key"], person_properties=["email"]) @snapshot_clickhouse_queries def test_person_property_filter(self): diff --git a/posthog/hogql/test/test_property.py b/posthog/hogql/test/test_property.py index 52e57d9c61c76..7ab951cecf082 100644 --- a/posthog/hogql/test/test_property.py +++ b/posthog/hogql/test/test_property.py @@ -141,7 +141,7 @@ def test_property_to_expr_event(self): ) self.assertEqual( self._property_to_expr({"type": "event", "key": "a", "value": ".*", "operator": "regex"}), - self._parse_expr("match(properties.a, '.*')"), + self._parse_expr("match(properties.a, '(?-s).*')"), ) self.assertEqual( self._property_to_expr({"type": "event", "key": "a", "value": ".*", "operator": "not_regex"}), @@ -203,7 +203,7 @@ def test_property_to_expr_event_list(self): ) self.assertEqual( self._property_to_expr({"type": "event", "key": "a", "value": ["b", "c"], "operator": "regex"}), - self._parse_expr("match(properties.a, 'b') or match(properties.a, 'c')"), + self._parse_expr("match(properties.a, '(?-s)b') or match(properties.a, '(?-s)c')"), ) # negative self.assertEqual( @@ -230,7 +230,7 @@ def test_property_to_expr_event_list(self): "operator": "not_regex", } ), - self._parse_expr("not(match(properties.a, 'b')) and not(match(properties.a, 'c'))"), + self._parse_expr("not(match(properties.a, '(?-s)b')) and not(match(properties.a, '(?-s)c'))"), ) def test_property_to_expr_feature(self): From 3d4c1af04477463b04c2be0e43a1cb1bb6a50e0d Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 21 Dec 2023 18:24:52 +0000 Subject: [PATCH 4/7] Update query snapshots --- .../models/test/__snapshots__/test_property.ambr | 11 +++++++++++ 1 file changed, 11 insertions(+) create mode 100644 ee/clickhouse/models/test/__snapshots__/test_property.ambr diff --git a/ee/clickhouse/models/test/__snapshots__/test_property.ambr b/ee/clickhouse/models/test/__snapshots__/test_property.ambr new file mode 100644 index 0000000000000..206fcec7ddd02 --- /dev/null +++ b/ee/clickhouse/models/test/__snapshots__/test_property.ambr @@ -0,0 +1,11 @@ +# name: test_parse_groups_persons_edge_case_with_single_filter + ( + 'AND ( has(%(vglobalperson_0)s, replaceRegexpAll(JSONExtractRaw(person_props, %(kglobalperson_0)s), \'^"|"$\', \'\')))', + { + 'kglobalperson_0': 'email', + 'vglobalperson_0': [ + '1@posthog.com', + ], + }, + ) +--- From 9c351b7911337fd8334d0b4c862acf6822e62c39 Mon Sep 17 00:00:00 2001 From: James Greenhill Date: Thu, 21 Dec 2023 10:28:25 -0800 Subject: [PATCH 5/7] add tests for old query building --- ee/clickhouse/models/test/test_property.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/ee/clickhouse/models/test/test_property.py b/ee/clickhouse/models/test/test_property.py index f55578878f91a..dc17feeafbdbe 100644 --- a/ee/clickhouse/models/test/test_property.py +++ b/ee/clickhouse/models/test/test_property.py @@ -1511,6 +1511,12 @@ def test_events(db, team) -> List[UUID]: "date_exact_including_seconds_and_milliseconds": f"{datetime(2021, 3, 31, 23, 59, 59, 12):%d/%m/%Y %H:%M:%S.%f}" }, ), + _create_event( + event="$pageview", + team=team, + distinct_id="whatever", + properties={"email": "test@post\nhog.com"}, + ), ] @@ -1749,6 +1755,8 @@ def clean_up_materialised_columns(): [20, 21], id="can match before date only values", ), + # Regression test, we were previously matching on newline characters + pytest.param(Property(key="email", value=r"test@post.hog.com", operator="regex"), []), ] From fdae0df39db93f4aad94ffc3e627899c538afa0e Mon Sep 17 00:00:00 2001 From: github-actions <41898282+github-actions[bot]@users.noreply.github.com> Date: Thu, 21 Dec 2023 18:38:19 +0000 Subject: [PATCH 6/7] Update query snapshots --- .../test_clickhouse_experiment_secondary_results.ambr | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ee/clickhouse/views/test/__snapshots__/test_clickhouse_experiment_secondary_results.ambr b/ee/clickhouse/views/test/__snapshots__/test_clickhouse_experiment_secondary_results.ambr index 5fa656c60136d..ddaff244437e8 100644 --- a/ee/clickhouse/views/test/__snapshots__/test_clickhouse_experiment_secondary_results.ambr +++ b/ee/clickhouse/views/test/__snapshots__/test_clickhouse_experiment_secondary_results.ambr @@ -1,6 +1,6 @@ # name: ClickhouseTestExperimentSecondaryResults.test_basic_secondary_metric_results ' - /* user_id:132 celery:posthog.celery.sync_insight_caching_state */ + /* user_id:135 celery:posthog.celery.sync_insight_caching_state */ SELECT team_id, date_diff('second', max(timestamp), now()) AS age FROM events From 3a512ab57e2f12127d2ecb89c675076f691890f5 Mon Sep 17 00:00:00 2001 From: Paul D'Ambra Date: Thu, 21 Dec 2023 19:48:30 +0000 Subject: [PATCH 7/7] failing tests --- .../test/__snapshots__/test_property.ambr | 11 -------- ee/clickhouse/models/test/test_property.py | 11 +++++++- .../api/test/__snapshots__/test_query.ambr | 26 ------------------- posthog/api/test/test_query.py | 4 +-- posthog/hogql/printer.py | 8 +++--- posthog/hogql/test/test_property.py | 2 +- 6 files changed, 17 insertions(+), 45 deletions(-) delete mode 100644 ee/clickhouse/models/test/__snapshots__/test_property.ambr diff --git a/ee/clickhouse/models/test/__snapshots__/test_property.ambr b/ee/clickhouse/models/test/__snapshots__/test_property.ambr deleted file mode 100644 index 206fcec7ddd02..0000000000000 --- a/ee/clickhouse/models/test/__snapshots__/test_property.ambr +++ /dev/null @@ -1,11 +0,0 @@ -# name: test_parse_groups_persons_edge_case_with_single_filter - ( - 'AND ( has(%(vglobalperson_0)s, replaceRegexpAll(JSONExtractRaw(person_props, %(kglobalperson_0)s), \'^"|"$\', \'\')))', - { - 'kglobalperson_0': 'email', - 'vglobalperson_0': [ - '1@posthog.com', - ], - }, - ) ---- diff --git a/ee/clickhouse/models/test/test_property.py b/ee/clickhouse/models/test/test_property.py index dc17feeafbdbe..ee3a768b37733 100644 --- a/ee/clickhouse/models/test/test_property.py +++ b/ee/clickhouse/models/test/test_property.py @@ -1515,8 +1515,16 @@ def test_events(db, team) -> List[UUID]: event="$pageview", team=team, distinct_id="whatever", + # new line character shouldn't be matched by a single regex dot properties={"email": "test@post\nhog.com"}, ), + _create_event( + event="$pageview", + team=team, + distinct_id="whatever", + # not a new line character - instead a single character - should match + properties={"email": "test@postnhog.com"}, + ), ] @@ -1756,7 +1764,8 @@ def clean_up_materialised_columns(): id="can match before date only values", ), # Regression test, we were previously matching on newline characters - pytest.param(Property(key="email", value=r"test@post.hog.com", operator="regex"), []), + # this should match one of two possibles (how are you supposed to figure out the expected index 🙈) + pytest.param(Property(key="email", value=r"test@post.hog.com", operator="regex"), [28]), ] diff --git a/posthog/api/test/__snapshots__/test_query.ambr b/posthog/api/test/__snapshots__/test_query.ambr index e2f4d71d39a1f..8d9a9a5c2c4c8 100644 --- a/posthog/api/test/__snapshots__/test_query.ambr +++ b/posthog/api/test/__snapshots__/test_query.ambr @@ -100,32 +100,6 @@ allow_experimental_object_type=1 ' --- -# name: TestQuery.test_event_property_regex_is_patched_for_dotall_setting - ' - /* user_id:0 request:_snapshot_ */ - SELECT replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'example_value'), ''), 'null'), '^"|"$', '') AS example_value - FROM events - WHERE and(equals(events.team_id, 2), match(replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'example_value'), ''), 'null'), '^"|"$', ''), '(?-s)a.b'), equals(events.event, 'demonstrate dot all'), less(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-01-10 12:14:05.000000', 6, 'UTC')), greater(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-01-09 12:14:00.000000', 6, 'UTC'))) - ORDER BY replaceRegexpAll(nullIf(nullIf(JSONExtractRaw(events.properties, 'example_value'), ''), 'null'), '^"|"$', '') ASC - LIMIT 101 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1 - ' ---- -# name: TestQuery.test_event_property_regex_is_patched_for_dotall_setting_materialized - ' - /* user_id:0 request:_snapshot_ */ - SELECT nullIf(nullIf(events.mat_example_value, ''), 'null') AS example_value - FROM events - WHERE and(equals(events.team_id, 2), match(nullIf(nullIf(events.mat_example_value, ''), 'null'), '(?-s)a.b'), equals(events.event, 'demonstrate dot all'), less(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-01-10 12:14:05.000000', 6, 'UTC')), greater(toTimeZone(events.timestamp, 'UTC'), toDateTime64('2020-01-09 12:14:00.000000', 6, 'UTC'))) - ORDER BY nullIf(nullIf(events.mat_example_value, ''), 'null') ASC - LIMIT 101 - OFFSET 0 SETTINGS readonly=2, - max_execution_time=60, - allow_experimental_object_type=1 - ' ---- # name: TestQuery.test_events_query_all_time_date ' /* user_id:0 request:_snapshot_ */ diff --git a/posthog/api/test/test_query.py b/posthog/api/test/test_query.py index f46ebf3903a9e..45e3570ffca71 100644 --- a/posthog/api/test/test_query.py +++ b/posthog/api/test/test_query.py @@ -331,8 +331,8 @@ def test_event_property_regex_is_patched_for_dotall_setting(self): properties=[{"key": "example_value", "value": "a.b", "operator": "regex", "type": "event"}], ) response = self.client.post(f"/api/projects/{self.team.id}/query/", {"query": query.dict()}).json() - # assert [x[0] for x in response["results"]] == ['aab', 'abb'] - assert [x[0] for x in response["results"]] == [] + assert "(?-s)" in response["hogql"] + assert [x[0] for x in response["results"]] == ["aab", "abb"] @also_test_with_materialized_columns(event_properties=["key"], person_properties=["email"]) @snapshot_clickhouse_queries diff --git a/posthog/hogql/printer.py b/posthog/hogql/printer.py index acade9b195878..367a323697da9 100644 --- a/posthog/hogql/printer.py +++ b/posthog/hogql/printer.py @@ -542,16 +542,16 @@ def visit_compare_operation(self, node: ast.CompareOperation): elif node.op == ast.CompareOperationOp.GlobalNotIn: op = f"globalNotIn({left}, {right})" elif node.op == ast.CompareOperationOp.Regex: - op = f"match({left}, {right})" + op = f"match({left}, concat('(?-s)', {right}))" value_if_both_sides_are_null = True elif node.op == ast.CompareOperationOp.NotRegex: - op = f"not(match({left}, {right}))" + op = f"not(match({left}, concat('(?-s)', {right})))" value_if_one_side_is_null = True elif node.op == ast.CompareOperationOp.IRegex: - op = f"match({left}, concat('(?i)', {right}))" + op = f"match({left}, concat('(?i-s)', {right}))" value_if_both_sides_are_null = True elif node.op == ast.CompareOperationOp.NotIRegex: - op = f"not(match({left}, concat('(?i)', {right})))" + op = f"not(match({left}, concat('(?i-s)', {right})))" value_if_one_side_is_null = True elif node.op == ast.CompareOperationOp.Gt: op = f"greater({left}, {right})" diff --git a/posthog/hogql/test/test_property.py b/posthog/hogql/test/test_property.py index 7ab951cecf082..7f7b179ee306e 100644 --- a/posthog/hogql/test/test_property.py +++ b/posthog/hogql/test/test_property.py @@ -145,7 +145,7 @@ def test_property_to_expr_event(self): ) self.assertEqual( self._property_to_expr({"type": "event", "key": "a", "value": ".*", "operator": "not_regex"}), - self._parse_expr("not(match(properties.a, '.*'))"), + self._parse_expr("not(match(properties.a, '(?-s).*'))"), ) self.assertEqual( self._property_to_expr({"type": "event", "key": "a", "value": [], "operator": "exact"}),