Skip to content

Commit

Permalink
check_chapters
Browse files Browse the repository at this point in the history
  • Loading branch information
entorb committed Nov 23, 2024
1 parent 2f0f7b1 commit 1ccba65
Show file tree
Hide file tree
Showing 2 changed files with 72 additions and 27 deletions.
24 changes: 22 additions & 2 deletions scripts/check_chapters.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,8 +184,17 @@ def fix_ellipsis(s: str) -> str:
# remove all spaces around ellipsis
s = re.sub(r" *… *", r"…", s)

# after punctuation: add space
s = re.sub(r"(?<=[\.\?!:,;])…", r" …", s)
if settings["lang"] != "DE":
# after punctuation: add space
s = re.sub(r"(?<=[\.\?!:,;])…", r" …", s)

# new rule for German
if settings["lang"] == "DE":
# before: add space if not at start of line or quote
s = re.sub(r"(?<![ „‚\n^])…", r" …", s)
# after: add space if not followed by punctuation
s = re.sub(r"…(?![ \.\?!:,;“‘\n$])", r"… ", s)

return s


Expand Down Expand Up @@ -474,6 +483,17 @@ def fix_hyphens(s: str) -> str:
s = re.sub(r"(\d)\-(?=\d)", r"\1–", s)
# NOT: mid-length dash -> em dash (caution: false positives!)
# s = s.replace("–", "—")

# new rule for German
if settings["lang"] == "DE":
# remove all spaces around hyphens
s = re.sub(r" *— *", r"—", s)

# before: add space if not at start of line or quote
s = re.sub(r"(?<![ „‚\n^])—", r" —", s)
# after: add space if not followed by punctuation
s = re.sub(r"—(?![ \.\?!:,;“‘\n$])", r"— ", s)

return s


Expand Down
75 changes: 50 additions & 25 deletions scripts/check_chapters_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,13 +50,33 @@ def test_fix_common_typos(lang: str) -> None:
@pytest.mark.parametrize("lang", ["EN", "DE"])
def test_fix_ellipsis(lang: str) -> None:
settings["lang"] = lang
pairs = [
("foo…bar", "foo…bar"),
("foo … bar", "foo…bar"),
("foo… bar", "foo…bar"),
("foo …bar", "foo…bar"),
("foo, …", "foo, …"),
]
pairs = []
if lang != "DE":
pairs.extend(
[
("foo...bar", "foo…bar"),
("foo…bar", "foo…bar"),
("foo … bar", "foo…bar"),
("foo… bar", "foo…bar"),
("foo …bar", "foo…bar"),
("foo, …", "foo, …"),
("foo …! bar", "foo…! bar"),
]
)
if lang == "DE":
pairs.extend(
[
("foo...bar", "foo … bar"),
("foo…bar", "foo … bar"),
("foo … bar", "foo … bar"),
("foo… bar", "foo … bar"),
("foo …bar", "foo … bar"),
("foo, …“", "foo, …“"),
("foo,…“", "foo, …“"),
("foo …! bar", "foo …! bar"),
]
)

checkit(fix_ellipsis, pairs)


Expand Down Expand Up @@ -90,26 +110,31 @@ def test_fix_hyphens(lang: str) -> None:
settings["lang"] = lang
pairs = [
("2-3-4", "2–3–4"),
(" —,", "—,"),
(" —.", "—."),
(" —!", "—!"),
(" —?", "—?"),
("— asdf", "—asdf"),
("- asdf", "—asdf"),
("-asdf", "—asdf"),
]
if lang != "DE":
pairs.extend(
(
(" —,", "—,"),
(" —.", "—."),
(" —!", "—!"),
(" —?", "—?"),
("— asdf", "—asdf"),
("- asdf", "—asdf"),
("-asdf", "—asdf"),
)
)
if lang == "DE":
pairs.extend(
[
("Text —", "Text"),
("Text—„", "Text— „"),
("Text —„", "Text— „"),
("Text „ —Quote", "Text „—Quote"),
("Text „ — Quote", "Text „—Quote"),
("Text—„— Quote", "Text— „—Quote"),
("Text -“asdf", "Text—“ asdf"),
("Text —“", "Text—“"),
]
(
("Text — Text", "Text — Text"),
("Text—„", "Text — „"),
("Text —„", "Text — „"),
("Text „ —Quote", "Text „— Quote"),
("Text „ — Quote", "Text „— Quote"),
("Text—„— Quote", "Text — „— Quote"),
("Text -“asdf", "Text —“ asdf"),
("Text —“", "Text —“"),
)
)
checkit(fix_hyphens, pairs)

Expand Down Expand Up @@ -217,4 +242,4 @@ def checkit(fct: Callable, pairs: list[tuple[str, str]]) -> None:
# test in complete fix_line context
assert (
fix_line(text) == expected_output
), f"'{fix_line(text)}' != '{expected_output}'"
), f"'{fix_line(text)}' != '{expected_output}' (fix_line)"

0 comments on commit 1ccba65

Please sign in to comment.