From cd183df181339a34d7649e77ee731861b5ad2a50 Mon Sep 17 00:00:00 2001 From: xxyzz Date: Wed, 11 Sep 2024 10:34:23 +0800 Subject: [PATCH] Only parse external link as text if `` directly after `[` Put `` at any other place expands to normal external url link. --- src/wikitextprocessor/core.py | 6 +++++- tests/test_parser.py | 15 +++++++++++++++ 2 files changed, 20 insertions(+), 1 deletion(-) diff --git a/src/wikitextprocessor/core.py b/src/wikitextprocessor/core.py index d9b1bbfb..3b37ce95 100644 --- a/src/wikitextprocessor/core.py +++ b/src/wikitextprocessor/core.py @@ -767,7 +767,11 @@ def repl_link(m: re.Match) -> CookieChar: def repl_extlink(m: re.Match) -> CookieChar: """Replacement function for external links [...]. This is also used to replace bracketed sections, such as [...].""" - nowiki = MAGIC_NOWIKI_CHAR in m.group(0) + + # parse as text if tag at the start + nowiki = ( + re.match(r"\[\s*" + MAGIC_NOWIKI_CHAR, m.group(0)) is not None + ) orig = m.group(1) if not orig.startswith(URL_STARTS): return MAGIC_LBRACKET_CHAR + orig + MAGIC_RBRACKET_CHAR diff --git a/tests/test_parser.py b/tests/test_parser.py index 7cecc950..5262a2f8 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -2947,6 +2947,21 @@ def test_nowiki_in_html_attr_value(self): self.assertIsInstance(span_node, HTMLNode) self.assertEqual(span_node.tag, "span") + def test_nowiki_tag_in_external_link(self): + # https://zh.wiktionary.org/wiki/Template:RQ:Qur'an + self.ctx.start_page("محمد") + root = self.ctx.parse("[https://quran.com/3/144 3:144]") + url_node = root.children[0] + self.assertIsInstance(url_node, WikiNode) + self.assertEqual(url_node.kind, NodeKind.URL) + self.assertEqual( + url_node.largs, [["https://quran.com/3/144"], ["3:144"]] + ) + + root = self.ctx.parse("[ https://quran.com/3/144 3:144]") + text_node = root.children[0] + self.assertEqual(text_node, "[ ") + # XXX implement marking for links, templates # - https://en.wikipedia.org/wiki/Help:Wikitext#Nowiki