From 4bfab8490cfd9b6af30478f0de58fe52d1d790e1 Mon Sep 17 00:00:00 2001 From: xxyzz Date: Fri, 21 Jun 2024 14:11:19 +0800 Subject: [PATCH] Only parse `----` as horizontal rule if it's at the start of line https://en.wikipedia.org/wiki/Help:Wikitext#Horizontal_rule GitHub issue tatuylonen/wiktextract#536 --- src/wikitextprocessor/parser.py | 2 +- tests/test_parser.py | 12 ++++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/src/wikitextprocessor/parser.py b/src/wikitextprocessor/parser.py index b9bcc243..782c7830 100644 --- a/src/wikitextprocessor/parser.py +++ b/src/wikitextprocessor/parser.py @@ -2256,7 +2256,7 @@ def process_text(ctx: "Wtp", text: str) -> None: subtitle_end_fn(ctx, token) elif token.startswith("<"): # HTML tag like construct tag_fn(ctx, token) - elif token.startswith("----"): + elif token.startswith("----") and ctx.beginning_of_line: hline_fn(ctx, token) elif re.match(list_prefix_re, token): list_fn(ctx, token) diff --git a/tests/test_parser.py b/tests/test_parser.py index 1782a655..cbf53a53 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -2920,6 +2920,18 @@ def test_zh_x_html(self): self.assertEqual(span_text, "example text") self.assertEqual(dd_text, "translation text") + def test_horizontal_rule_in_template_arg(self): + # GitHub issue tatuylonen/wiktextract#536 + self.ctx.start_page("shithole") + root = self.ctx.parse("{{alt|en|—hole|----hole}}") + template_node = root.children[0] + self.assertIsInstance(template_node, TemplateNode) + self.assertEqual(len(root.children), 1) + self.assertEqual( + template_node.template_parameters, + {1: "en", 2: "—hole", 3: "----hole"}, + ) + # XXX implement marking for links, templates # - https://en.wikipedia.org/wiki/Help:Wikitext#Nowiki