Skip to content

Commit

Permalink
Merge pull request #734 from xxyzz/en
Browse files Browse the repository at this point in the history
[en] don't process soft redirect templates in `parse_language()`
  • Loading branch information
xxyzz authored Jul 22, 2024
2 parents ffd396d + 5e81ec9 commit 59f77f9
Show file tree
Hide file tree
Showing 2 changed files with 35 additions and 9 deletions.
20 changes: 12 additions & 8 deletions src/wiktextract/extractor/en/page.py
Original file line number Diff line number Diff line change
Expand Up @@ -1242,7 +1242,7 @@ def parse_part_of_speech(posnode: WikiNode, pos: str) -> None:
# to identify head templates. Too bad it's None.

# ignore {{category}}, {{cat}}... etc.
if node.largs[0][0] in stop_head_at_these_templates:
if node.template_name in stop_head_at_these_templates:
# we've reached a template that should be at the end,
continue

Expand All @@ -1251,7 +1251,7 @@ def parse_part_of_speech(posnode: WikiNode, pos: str) -> None:
# head parsing quite well.
# node.largs[0][0] should always be str, but can't type-check
# that.
if is_panel_template(wxr, node.largs[0][0]): # type: ignore[arg-type]
if is_panel_template(wxr, node.template_name):
continue
# skip these templates
# if node.largs[0][0] in skip_these_templates_in_head:
Expand Down Expand Up @@ -1408,7 +1408,7 @@ def parse_part_of_speech(posnode: WikiNode, pos: str) -> None:
# If there are no senses extracted, add a dummy sense. We want to
# keep tags extracted from the head for the dummy sense.
push_sense() # Make sure unfinished data pushed, and start clean sense
if not pos_datas:
if len(pos_datas) == 0:
data_extend(sense_data, "tags", header_tags)
data_append(sense_data, "tags", "no-gloss")
push_sense()
Expand Down Expand Up @@ -1514,11 +1514,15 @@ def process_gloss_without_list(
for node in strip_nodes(nodes):
if isinstance(node, WikiNode):
if node.kind == NodeKind.TEMPLATE:
template_name = node.largs[0][0]
if TYPE_CHECKING:
assert isinstance(template_name, str)
if template_name == "head" or template_name.startswith(
f"{lang_code}-"
if node.template_name in (
"zh-see",
"ja-see",
"ja-see-kango",
):
continue # soft redirect
elif (
node.template_name == "head"
or node.template_name.startswith(f"{lang_code}-")
):
header_nodes.append(node)
continue
Expand Down
24 changes: 23 additions & 1 deletion tests/test_page.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,7 +271,7 @@ def test_page5(self):
"examples": [
{
"text": "example 1 causes sense "
"1 to get pushed"
"1 to get pushed"
}
],
},
Expand Down Expand Up @@ -775,3 +775,25 @@ def test_plusobj_template(self, mock_get_page):
}
],
)

def test_zh_see_under_pos_title(self):
# GH issue #730
self.assertEqual(
parse_page(
self.wxr,
"马",
"""==Chinese=
===Definitions===
{{zh-see|馬}}""",
),
[
{
"lang": "Chinese",
"lang_code": "zh",
"pos": "character",
"redirects": ["馬"],
"senses": [{"tags": ["no-gloss"]}],
"word": "马",
}
],
)

0 comments on commit 59f77f9

Please sign in to comment.