From 1ccba65c9aeda9e3dd95aad5762fc5aa304914f4 Mon Sep 17 00:00:00 2001 From: Torben <59419684+entorb@users.noreply.github.com> Date: Sat, 23 Nov 2024 09:07:46 +0100 Subject: [PATCH] check_chapters --- scripts/check_chapters.py | 24 ++++++++++- scripts/check_chapters_test.py | 75 ++++++++++++++++++++++------------ 2 files changed, 72 insertions(+), 27 deletions(-) diff --git a/scripts/check_chapters.py b/scripts/check_chapters.py index bf323664b..fc9142196 100755 --- a/scripts/check_chapters.py +++ b/scripts/check_chapters.py @@ -184,8 +184,17 @@ def fix_ellipsis(s: str) -> str: # remove all spaces around ellipsis s = re.sub(r" *… *", r"…", s) - # after punctuation: add space - s = re.sub(r"(?<=[\.\?!:,;])…", r" …", s) + if settings["lang"] != "DE": + # after punctuation: add space + s = re.sub(r"(?<=[\.\?!:,;])…", r" …", s) + + # new rule for German + if settings["lang"] == "DE": + # before: add space if not at start of line or quote + s = re.sub(r"(? str: s = re.sub(r"(\d)\-(?=\d)", r"\1–", s) # NOT: mid-length dash -> em dash (caution: false positives!) # s = s.replace("–", "—") + + # new rule for German + if settings["lang"] == "DE": + # remove all spaces around hyphens + s = re.sub(r" *— *", r"—", s) + + # before: add space if not at start of line or quote + s = re.sub(r"(? None: @pytest.mark.parametrize("lang", ["EN", "DE"]) def test_fix_ellipsis(lang: str) -> None: settings["lang"] = lang - pairs = [ - ("foo…bar", "foo…bar"), - ("foo … bar", "foo…bar"), - ("foo… bar", "foo…bar"), - ("foo …bar", "foo…bar"), - ("foo, …", "foo, …"), - ] + pairs = [] + if lang != "DE": + pairs.extend( + [ + ("foo...bar", "foo…bar"), + ("foo…bar", "foo…bar"), + ("foo … bar", "foo…bar"), + ("foo… bar", "foo…bar"), + ("foo …bar", "foo…bar"), + ("foo, …", "foo, …"), + ("foo …! bar", "foo…! bar"), + ] + ) + if lang == "DE": + pairs.extend( + [ + ("foo...bar", "foo … bar"), + ("foo…bar", "foo … bar"), + ("foo … bar", "foo … bar"), + ("foo… bar", "foo … bar"), + ("foo …bar", "foo … bar"), + ("foo, …“", "foo, …“"), + ("foo,…“", "foo, …“"), + ("foo …! bar", "foo …! bar"), + ] + ) + checkit(fix_ellipsis, pairs) @@ -90,26 +110,31 @@ def test_fix_hyphens(lang: str) -> None: settings["lang"] = lang pairs = [ ("2-3-4", "2–3–4"), - (" —,", "—,"), - (" —.", "—."), - (" —!", "—!"), - (" —?", "—?"), - ("— asdf", "—asdf"), - ("- asdf", "—asdf"), - ("-asdf", "—asdf"), ] + if lang != "DE": + pairs.extend( + ( + (" —,", "—,"), + (" —.", "—."), + (" —!", "—!"), + (" —?", "—?"), + ("— asdf", "—asdf"), + ("- asdf", "—asdf"), + ("-asdf", "—asdf"), + ) + ) if lang == "DE": pairs.extend( - [ - ("Text —", "Text—"), - ("Text—„", "Text— „"), - ("Text —„", "Text— „"), - ("Text „ —Quote", "Text „—Quote"), - ("Text „ — Quote", "Text „—Quote"), - ("Text—„— Quote", "Text— „—Quote"), - ("Text -“asdf", "Text—“ asdf"), - ("Text —“", "Text—“"), - ] + ( + ("Text — Text", "Text — Text"), + ("Text—„", "Text — „"), + ("Text —„", "Text — „"), + ("Text „ —Quote", "Text „— Quote"), + ("Text „ — Quote", "Text „— Quote"), + ("Text—„— Quote", "Text — „— Quote"), + ("Text -“asdf", "Text —“ asdf"), + ("Text —“", "Text —“"), + ) ) checkit(fix_hyphens, pairs) @@ -217,4 +242,4 @@ def checkit(fct: Callable, pairs: list[tuple[str, str]]) -> None: # test in complete fix_line context assert ( fix_line(text) == expected_output - ), f"'{fix_line(text)}' != '{expected_output}'" + ), f"'{fix_line(text)}' != '{expected_output}' (fix_line)"