diff --git a/README.md b/README.md index 7beecc70..1927d239 100644 --- a/README.md +++ b/README.md @@ -53,22 +53,14 @@ cd wikitextprocessor python -m venv .venv source .venv/bin/activate python -m pip install -U pip -python -m pip install --use-pep517 . +python -m pip install -e . ``` -Alternatively, you can install from pypi.org: - -``` -python -m pip install wikitextprocessor -``` - -If you are installing wiktextract from source, you also need to install wikitextprocessor from source separately; otherwise, a newer wiktextract version will be installed alongside an older pypi version of wikitextprocessor, which will not work out. - ### Running tests This package includes tests written using the `unittest` framework. The test dependencies can be installed with command -`python -m pip install --use-pep517 -e ".[dev]"`. +`python -m pip install -e .[dev]`. To run the tests, use the following command in the top-level directory: diff --git a/src/wikitextprocessor/wikihtml.py b/src/wikitextprocessor/wikihtml.py index 1f0a7748..8bb9f284 100644 --- a/src/wikitextprocessor/wikihtml.py +++ b/src/wikitextprocessor/wikihtml.py @@ -60,7 +60,7 @@ "del": {"parents": ["phrasing"], "content": ["phrasing"]}, "dfn": {"parents": ["phrasing"], "content": ["phrasing"]}, "div": {"parents": ["flow", "dl"], "content": ["flow"]}, - "dl": {"parents": ["flow"], "content": []}, + "dl": {"parents": ["flow"], "content": ["flow"]}, "dt": { "parents": ["dl", "div"], "close-next": ["dd", "dt"], diff --git a/tests/test_parser.py b/tests/test_parser.py index a6c9c0c8..1782a655 100644 --- a/tests/test_parser.py +++ b/tests/test_parser.py @@ -2903,6 +2903,23 @@ def test_html_end_tag_slash_after_attr(self): self.assertEqual(root.children[2], "\n") self.assertEqual(root.children[3].kind, NodeKind.LIST) + def test_zh_x_html(self): + # https://zh.wiktionary.org/wiki/大家 + # https://zh.wiktionary.org/wiki/Template:Zh-x + self.ctx.start_page("大家") + root = self.ctx.parse( + """
example text
translation text
""" # noqa: E501 + ) + span_text = "" + dd_text = "" + for dl_tag in root.find_html("dl"): + for span_tag in dl_tag.find_html("span"): + span_text = span_tag.children[0] + for dd_tag in dl_tag.find_html("dd"): + dd_text = dd_tag.children[0] + self.assertEqual(span_text, "example text") + self.assertEqual(dd_text, "translation text") + # XXX implement marking for links, templates # - https://en.wikipedia.org/wiki/Help:Wikitext#Nowiki