Skip to content

Commit

Permalink
fix(elements): Avoid matching element tag on classes (#20533)
Browse files Browse the repository at this point in the history
* fix(elements): Avoid matching element tag on classes

* Update test_property.py

* Make test a bit more robust

* Update query snapshots

* Update query snapshots

---------

Co-authored-by: github-actions <41898282+github-actions[bot]@users.noreply.github.com>
  • Loading branch information
Twixes and github-actions[bot] authored Mar 19, 2024
1 parent a6eb82c commit b6162a6
Show file tree
Hide file tree
Showing 4 changed files with 64 additions and 21 deletions.
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# serializer version: 1
# name: ClickhouseTestExperimentSecondaryResults.test_basic_secondary_metric_results
'''
/* user_id:107 celery:posthog.tasks.tasks.sync_insight_caching_state */
/* user_id:108 celery:posthog.tasks.tasks.sync_insight_caching_state */
SELECT team_id,
date_diff('second', max(timestamp), now()) AS age
FROM events
Expand Down
18 changes: 10 additions & 8 deletions posthog/hogql/test/test_property.py
Original file line number Diff line number Diff line change
Expand Up @@ -431,37 +431,39 @@ def test_tag_name_to_expr(self):
def test_selector_to_expr(self):
self.assertEqual(
self._selector_to_expr("div"),
clear_locations(elements_chain_match('div([-_a-zA-Z0-9\\.:"= ]*?)?($|;|:([^;^\\s]*(;|$|\\s)))')),
clear_locations(elements_chain_match('(^|;)div([-_a-zA-Z0-9\\.:"= ]*?)?($|;|:([^;^\\s]*(;|$|\\s)))')),
)
self.assertEqual(
self._selector_to_expr("div > div"),
clear_locations(
elements_chain_match(
'div([-_a-zA-Z0-9\\.:"= ]*?)?($|;|:([^;^\\s]*(;|$|\\s)))div([-_a-zA-Z0-9\\.:"= ]*?)?($|;|:([^;^\\s]*(;|$|\\s))).*'
'(^|;)div([-_a-zA-Z0-9\\.:"= ]*?)?($|;|:([^;^\\s]*(;|$|\\s)))div([-_a-zA-Z0-9\\.:"= ]*?)?($|;|:([^;^\\s]*(;|$|\\s))).*'
)
),
)
self.assertEqual(
self._selector_to_expr("a[href='boo']"),
clear_locations(
elements_chain_match('a.*?href="boo".*?([-_a-zA-Z0-9\\.:"= ]*?)?($|;|:([^;^\\s]*(;|$|\\s)))')
elements_chain_match('(^|;)a.*?href="boo".*?([-_a-zA-Z0-9\\.:"= ]*?)?($|;|:([^;^\\s]*(;|$|\\s)))')
),
)
self.assertEqual(
self._selector_to_expr(".class"),
clear_locations(elements_chain_match('.*?\\.class([-_a-zA-Z0-9\\.:"= ]*?)?($|;|:([^;^\\s]*(;|$|\\s)))')),
clear_locations(
elements_chain_match('(^|;).*?\\.class([-_a-zA-Z0-9\\.:"= ]*?)?($|;|:([^;^\\s]*(;|$|\\s)))')
),
)
self.assertEqual(
self._selector_to_expr("#withid"),
clear_locations(
elements_chain_match('.*?attr_id="withid".*?([-_a-zA-Z0-9\\.:"= ]*?)?($|;|:([^;^\\s]*(;|$|\\s)))')
elements_chain_match('(^|;).*?attr_id="withid".*?([-_a-zA-Z0-9\\.:"= ]*?)?($|;|:([^;^\\s]*(;|$|\\s)))')
),
)
self.assertEqual(
self._selector_to_expr("#with-dashed-id"),
clear_locations(
elements_chain_match(
'.*?attr_id="with\\-dashed\\-id".*?([-_a-zA-Z0-9\\.:"= ]*?)?($|;|:([^;^\\s]*(;|$|\\s)))'
'(^|;).*?attr_id="with\\-dashed\\-id".*?([-_a-zA-Z0-9\\.:"= ]*?)?($|;|:([^;^\\s]*(;|$|\\s)))'
)
),
)
Expand All @@ -473,7 +475,7 @@ def test_selector_to_expr(self):
self._selector_to_expr("#with\\slashed\\id"),
clear_locations(
elements_chain_match(
'.*?attr_id="with\\\\slashed\\\\id".*?([-_a-zA-Z0-9\\.:"= ]*?)?($|;|:([^;^\\s]*(;|$|\\s)))'
'(^|;).*?attr_id="with\\\\slashed\\\\id".*?([-_a-zA-Z0-9\\.:"= ]*?)?($|;|:([^;^\\s]*(;|$|\\s)))'
)
),
)
Expand Down Expand Up @@ -526,7 +528,7 @@ def test_action_to_expr(self):
"event = '$autocapture' and elements_chain =~ {regex1} and elements_chain =~ {regex2}",
{
"regex1": ast.Constant(
value='a.*?\\.active\\..*?nav\\-link([-_a-zA-Z0-9\\.:"= ]*?)?($|;|:([^;^\\s]*(;|$|\\s)))'
value='(^|;)a.*?\\.active\\..*?nav\\-link([-_a-zA-Z0-9\\.:"= ]*?)?($|;|:([^;^\\s]*(;|$|\\s)))'
),
"regex2": ast.Constant(value="(^|;)a(\\.|$|;|:)"),
},
Expand Down
23 changes: 13 additions & 10 deletions posthog/models/property/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -847,21 +847,24 @@ def process_ok_values(ok_values: Any, operator: OperatorType) -> List[str]:
def build_selector_regex(selector: Selector) -> str:
regex = r""
for tag in selector.parts:
if tag.data.get("tag_name") and isinstance(tag.data["tag_name"], str):
if tag.data["tag_name"] == "*":
regex += ".+"
else:
regex += re.escape(tag.data["tag_name"])
if tag.data.get("tag_name") and isinstance(tag.data["tag_name"], str) and tag.data["tag_name"] != "*":
# The elements in the elements_chain are separated by the semicolon
regex += re.escape(tag.data["tag_name"])
if tag.data.get("attr_class__contains"):
regex += r".*?\.{}".format(r"\..*?".join([re.escape(s) for s in sorted(tag.data["attr_class__contains"])]))
regex += r".*?\." + r"\..*?".join([re.escape(s) for s in sorted(tag.data["attr_class__contains"])])
if tag.ch_attributes:
regex += ".*?"
regex += r".*?"
for key, value in sorted(tag.ch_attributes.items()):
regex += '{}="{}".*?'.format(re.escape(key), re.escape(str(value)))
regex += rf'{re.escape(key)}="{re.escape(str(value))}".*?'
regex += r'([-_a-zA-Z0-9\.:"= ]*?)?($|;|:([^;^\s]*(;|$|\s)))'
if tag.direct_descendant:
regex += ".*"
return regex
regex += r".*"
if regex:
# Always start matching at the beginning of an element in the chain string
# This is to avoid issues like matching elements with class "foo" when looking for elements with tag name "foo"
return r"(^|;)" + regex
else:
return r""


class HogQLPropertyChecker(TraversingVisitor):
Expand Down
42 changes: 40 additions & 2 deletions posthog/models/test/test_event_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,15 @@ def _setup_action_selector_events(self):
attr_class=["one-class"],
),
Element(tag_name="button", nth_child=0, nth_of_type=0),
Element(tag_name="div", nth_child=0, nth_of_type=0),
Element(tag_name="div", nth_child=0, nth_of_type=0, attr_id="nested"),
Element(
# Important that in this hierarchy the div is sandwiched between button and section.
# This way makes sure that any conditions which should match this element also work
# if the element is neither first nor last in the hierarchy.
tag_name="div",
nth_child=0,
nth_of_type=0,
),
Element(tag_name="section", nth_child=0, nth_of_type=0, attr_id="nested"),
],
)

Expand Down Expand Up @@ -417,6 +424,37 @@ def test_with_class_with_escaped_slashes(self):
self.assertEqual(events[0].uuid, event1_uuid)
self.assertEqual(len(events), 1)

def test_with_tag_matching_class_selector(self):
_create_person(distinct_ids=["whatever"], team=self.team)
action1 = Action.objects.create(team=self.team)
ActionStep.objects.create(
event="$autocapture",
action=action1,
selector="input", # This should ONLY match the tag, but not a class named `input`
)
event_matching_tag_uuid = _create_event(
event="$autocapture",
team=self.team,
distinct_id="whatever",
elements=[
Element(tag_name="span", attr_class=None),
Element(tag_name="input", attr_class=["button"]), # Should match
],
)
_create_event(
event="$autocapture",
team=self.team,
distinct_id="whatever",
elements=[
Element(tag_name="span", attr_class=None),
Element(tag_name="button", attr_class=["input"]), # Cannot match
],
)

events = _get_events_for_action(action1)
self.assertEqual(len(events), 1)
self.assertEqual(events[0].uuid, event_matching_tag_uuid)

def test_attributes(self):
_create_person(distinct_ids=["whatever"], team=self.team)
event1_uuid = _create_event(
Expand Down

0 comments on commit b6162a6

Please sign in to comment.