From a5533c25d7e1e4861f07e52dcd2f106138a0ed39 Mon Sep 17 00:00:00 2001
From: Torben <59419684+entorb@users.noreply.github.com>
Date: Sat, 27 Apr 2024 02:18:01 +0200
Subject: [PATCH] ebook script renaming and unit tests

---
 scripts/ebook/3.py                | 156 ------------------------------
 scripts/ebook/6.py                | 131 -------------------------
 scripts/ebook/{1.sh => step_1.sh} |   0
 scripts/ebook/{2.sh => step_2.sh} |   0
 scripts/ebook/step_3.py           | 155 +++++++++++++++++++++++++++++
 scripts/ebook/{4.py => step_4.py} |  24 ++---
 scripts/ebook/step_4_test.py      |  23 +++++
 scripts/ebook/{5.sh => step_5.sh} |   0
 scripts/ebook/step_6.py           | 131 +++++++++++++++++++++++++
 scripts/ebook/{7.sh => step_7.sh} |   0
 scripts/make_ebooks.sh            |  14 +--
 11 files changed, 328 insertions(+), 306 deletions(-)
 delete mode 100755 scripts/ebook/3.py
 delete mode 100755 scripts/ebook/6.py
 rename scripts/ebook/{1.sh => step_1.sh} (100%)
 rename scripts/ebook/{2.sh => step_2.sh} (100%)
 create mode 100755 scripts/ebook/step_3.py
 rename scripts/ebook/{4.py => step_4.py} (63%)
 create mode 100644 scripts/ebook/step_4_test.py
 rename scripts/ebook/{5.sh => step_5.sh} (100%)
 create mode 100755 scripts/ebook/step_6.py
 rename scripts/ebook/{7.sh => step_7.sh} (100%)

diff --git a/scripts/ebook/3.py b/scripts/ebook/3.py
deleted file mode 100755
index 861d2c82c..000000000
--- a/scripts/ebook/3.py
+++ /dev/null
@@ -1,156 +0,0 @@
-#!/usr/bin/env python3
-# by Torben Menke https://entorb.net
-
-"""
-Modify flattened .tex file.
-"""
-
-import datetime as dt
-import os
-import re
-from pathlib import Path
-
-os.chdir(Path(__file__).parent.parent.parent)
-
-source_file = Path("tmp/hpmor-epub-2-flatten.tex")
-target_file = Path("tmp/hpmor-epub-3-flatten-mod.tex")
-
-print("=== 3. modify flattened file ===")
-
-
-with source_file.open(encoding="utf-8", newline="\n") as fh_in:
-    cont = fh_in.read()
-
-# \today
-date_str = dt.datetime.now(dt.timezone.utc).date().strftime("%d.%m.%Y")
-cont = cont.replace("\\today{}", date_str)
-
-# writtenNote env -> \writtenNoteA
-cont = re.sub(
-    r"\s*\\begin\{writtenNote\}\s*(.*?)\s*\\end\{writtenNote\}",
-    r"\\writtenNoteA{\1}",
-    cont,
-    flags=re.DOTALL,
-)
-
-# fix chapterOpeningAuthorNote
-cont = re.sub(
-    r"(\\begin\{chapterOpeningAuthorNote\}\n)(.*?\n)(\\end\{chapterOpeningAuthorNote\}\n)",
-    r"\1E.~Y.:~\2\\newline\\rule[1ex]{\\textwidth}{.1pt}\\newline%\n\3",
-    cont,
-    flags=re.DOTALL,
-)
-
-# some cleanup
-# TODO: removed when switching to Ubuntu >= 23.04,
-#   since it let to a problem
-#  in line 31 of tmp/hpmor-epub-3-flatten-mod.tex
-# cont = cont.replace("\\hplettrineextrapara", "")
-
-# additional linebreaks in verses of chapter 64
-cont = cont.replace("\\\\\n\n", "\n\n")
-
-# manual pagebreaks
-cont = re.sub(r"\\clearpage(\{\}|)\n?", "", cont)
-
-# \vskip 1\baselineskip plus .5\textheight minus 1\baselineskip
-cont = re.sub(r"\\vskip .*?\\baselineskip", "", cont)
-
-# remove \settowidth{\versewidth}... \begin{verse}[\versewidth]
-cont = re.sub(
-    r"\n[^\n]*?\\settowidth\{\\versewidth\}[^\n]*?\n(\\begin\{verse\}\[\\versewidth\])",
-    r"\n\\begin{verse}",
-    cont,
-)
-
-# remove \settowidth
-cont = re.sub(
-    r"\\settowidth\{[^\}]*\}\{([^\}]*)\}",
-    r"\1",
-    cont,
-    flags=re.DOTALL,
-)
-
-# fix „ at start of chapter
-# \lettrine[ante=„] -> „\lettrine
-# \lettrinepara[ante=„] -> „\lettrine
-cont = re.sub(
-    r"\\(lettrine|lettrinepara)\[ante=(.)\]",
-    r"\2\\lettrine",
-    cont,
-)
-
-# OMakeIV sections
-# \OmakeIVsection{My Little Pony: Friendship is Science}
-cont = re.sub(r"\\OmakeIVsection(\[[^\]]*\]|)\{(.*)\}\n+", r"\\section{\2}\n", cont)
-
-cont = re.sub(
-    r"\\OmakeIVspecialsection[^\n]+\{RingBearer\}.*?\n\n",
-    r"\\section{Lord of the Rationality}\n",
-    cont,
-    flags=re.DOTALL,
-    count=1,
-)
-cont = re.sub(
-    r"\\OmakeIVspecialsection[^\n]+\{NarniaBLL\}.*?\n\n",
-    r"\\section{The Witch and the Wardrobe}\n",
-    cont,
-    flags=re.DOTALL,
-    count=1,
-)
-cont = re.sub(
-    r"\\OmakeIVspecialsection[^\n]+\{Thundercats\}.*?\n\n",
-    r"\\section{ThunderSmarts}\n",
-    cont,
-    flags=re.DOTALL,
-    count=1,
-)
-
-cont = re.sub(
-    r"\\OmakeIVspecialsection[^\n]+\{Twilight\}.*?\n\n",
-    r"\\section{Utilitarian Twilight}\n",
-    cont,
-    flags=re.DOTALL,
-    count=1,
-)
-
-# \censor
-cont = re.sub(r"\\censor\{.*?\}", r"xxxxxx", cont)
-
-
-# # remove Deathly_Hallows_Sign.pdf and other pdf images
-# # \includegraphics[scale=0.125]{images/Deathly_Hallows_Sign.pdf}
-# cont = re.sub(
-#     # r"\\includegraphics.*?\{images/Deathly_Hallows_Sign.*?\}",
-#     r"\\includegraphics.*?\.pdf\}",
-#     "",
-#     cont,
-# )
-
-# remove all images
-cont = re.sub(
-    r"\\includegraphics\[.*?\]\{.*?\}",
-    "",
-    cont,
-    flags=re.DOTALL,
-)
-
-# remove empty envs
-cont = re.sub(
-    r"\\begin\{([^\}]*)\}\s*\\end\{\1}",
-    "",
-    cont,
-    flags=re.DOTALL,
-)
-
-# remove end stuff
-cont = re.sub(
-    r"(.*)\\end\{chapterOpeningAuthorNote\}.*?\\end\{document\}",
-    r"\1\\end{chapterOpeningAuthorNote}\n\\end{document}",
-    cont,
-    flags=re.DOTALL,
-    count=1,
-)
-
-with target_file.open(mode="w", encoding="utf-8", newline="\n") as fh_out:
-    fh_out.write(cont)
diff --git a/scripts/ebook/6.py b/scripts/ebook/6.py
deleted file mode 100755
index 6c03d53b5..000000000
--- a/scripts/ebook/6.py
+++ /dev/null
@@ -1,131 +0,0 @@
-#!/usr/bin/env python3
-# by Torben Menke https://entorb.net
-# ruff: noqa: RUF001
-
-"""
-HTML modifications.
-"""
-
-import os
-import re
-from pathlib import Path
-
-os.chdir(Path(__file__).parent.parent.parent)
-
-source_file = Path("tmp/hpmor-epub-5-html-unmod.html")
-target_file = Path("hpmor.html")
-
-print("=== 6. HTML modifications ===")
-
-
-with source_file.open(encoding="utf-8", newline="\n") as fh_in:
-    cont = fh_in.read()
-
-# remove strange leftovers from tex -> html conversion
-cont = re.sub(
-    r"(</header>).*?<p>Book :</p>\n",
-    r"\1",
-    cont,
-    flags=re.DOTALL | re.IGNORECASE,
-    count=1,
-)
-
-# cleanup hp-intro leftovers
-cont = re.sub(
-    """<p>Fanfiction based on the characters of</p>
-<p>J. K. ROWLING</p>
-<p>and her books:</p>""",
-    "<p>Fanfiction based on the characters of J. K. Rowling and her books:</p>",
-    cont,
-    count=1,
-)
-
-cont = re.sub("<p>Year at Hogwarts</p>\n", "", cont, count=7)
-cont = re.sub(
-    "</em></p>\n<p><em>Harry Potter and the",
-    "<br>\nHarry Potter and the",
-    cont,
-    count=7,
-)
-
-# now done via pandoc -V lang=en in 5.sh
-# # set language
-# cont = re.sub(
-#     r'(<html [^>]*) lang="" xml:lang=""',
-#     r'\1 lang="en" xml:lang="en"',
-#     cont,
-#     count=1,
-# )
-
-# remove training slashes to satisfy https://validator.w3.org
-cont = cont.replace("<br />", "<br>")
-cont = cont.replace("<hr />", "<hr>")
-
-cont = re.sub(
-    r"(<meta [^>]*) />",
-    r"\1>",
-    cont,
-)
-
-# remove bad span ids (containing spaces) from newspaper spans
-cont = re.sub(r'<span id="[^"]+" label="[^"]+">', r"<span>", cont, count=5)
-
-# doc structure (not needed any more, using calibi --level1-toc flag instead)
-# sed -i 's/<h1 /<h1 class="part"/g' $target_file
-# sed -i 's/<h2 /<h2 class="chapter"/g' $target_file
-# sed -i 's/<h3 /<h3 class="section"/g' $target_file
-
-# remove ids from chapters since umlaute cause problem
-cont = re.sub(
-    r'(<h\d) id="[^"]+"',
-    r"\1",
-    cont,
-    flags=re.DOTALL | re.IGNORECASE,
-)
-cont = re.sub(
-    r'(<h\d class="unnumbered") id="[^"]+"',
-    r"\1",
-    cont,
-    flags=re.DOTALL | re.IGNORECASE,
-)
-
-# add part numbers
-part_no = 0
-while "<h1>" in cont:
-    part_no += 1
-    cont = cont.replace("<h1>", f"<h1_DONE>{part_no}. ", 1)
-cont = cont.replace("<h1_DONE>", "<h1>")
-
-# add chapter numbers
-chapter_no = 0
-while "<h2>" in cont:
-    chapter_no += 1
-    cont = cont.replace("<h2>", f"<h2_DONE>{chapter_no}. ", 1)
-cont = cont.replace("<h2_DONE>", "<h2>")
-
-# fix double rules
-# cont = cont.replace("<hr />\n<hr />", "<hr />")
-cont = re.sub(
-    r"<hr */>\n<hr */>",
-    r"<hr />",
-    cont,
-    flags=re.DOTALL | re.IGNORECASE,
-)
-# fixing linebreak at author's comment
-cont = cont.replace("<p>E. Y.: </p>\n<p>", "<p>E.Y.: ")
-
-# converting "color-marked" styles of 1.sh back to proper style classes
-cont = re.sub(
-    r'<(div|span) style="color: (parsel|writtenNote|McGonagallWhiteBoard|headline)"',
-    r'<\1 class="\2"',
-    cont,
-)
-
-# add css style file format for \emph in \emph
-with Path("scripts/ebook/html.css").open(encoding="utf-8", newline="\n") as fh_in:
-    css = fh_in.read()
-cont = cont.replace("</style>\n", css + "\n</style>\n")
-
-
-with target_file.open(mode="w", encoding="utf-8", newline="\n") as fh_out:
-    fh_out.write(cont)
diff --git a/scripts/ebook/1.sh b/scripts/ebook/step_1.sh
similarity index 100%
rename from scripts/ebook/1.sh
rename to scripts/ebook/step_1.sh
diff --git a/scripts/ebook/2.sh b/scripts/ebook/step_2.sh
similarity index 100%
rename from scripts/ebook/2.sh
rename to scripts/ebook/step_2.sh
diff --git a/scripts/ebook/step_3.py b/scripts/ebook/step_3.py
new file mode 100755
index 000000000..21da654a1
--- /dev/null
+++ b/scripts/ebook/step_3.py
@@ -0,0 +1,155 @@
+#!/usr/bin/env python3
+# by Torben Menke https://entorb.net
+
+"""
+Modify flattened .tex file.
+"""
+
+import datetime as dt
+import os
+import re
+from pathlib import Path
+
+os.chdir(Path(__file__).parent.parent.parent)
+
+source_file = Path("tmp/hpmor-epub-2-flatten.tex")
+target_file = Path("tmp/hpmor-epub-3-flatten-mod.tex")
+
+if __name__ == "__main__":
+    print("=== 3. modify flattened file ===")
+
+    with source_file.open(encoding="utf-8", newline="\n") as fh_in:
+        cont = fh_in.read()
+
+    # \today
+    date_str = dt.datetime.now(dt.timezone.utc).date().strftime("%d.%m.%Y")
+    cont = cont.replace("\\today{}", date_str)
+
+    # writtenNote env -> \writtenNoteA
+    cont = re.sub(
+        r"\s*\\begin\{writtenNote\}\s*(.*?)\s*\\end\{writtenNote\}",
+        r"\\writtenNoteA{\1}",
+        cont,
+        flags=re.DOTALL,
+    )
+
+    # fix chapterOpeningAuthorNote
+    cont = re.sub(
+        r"(\\begin\{chapterOpeningAuthorNote\}\n)(.*?\n)(\\end\{chapterOpeningAuthorNote\}\n)",
+        r"\1E.~Y.:~\2\\newline\\rule[1ex]{\\textwidth}{.1pt}\\newline%\n\3",
+        cont,
+        flags=re.DOTALL,
+    )
+
+    # some cleanup
+    # TODO: removed when switching to Ubuntu >= 23.04,
+    #   since it let to a problem
+    #  in line 31 of tmp/hpmor-epub-3-flatten-mod.tex
+    # cont = cont.replace("\\hplettrineextrapara", "")
+
+    # additional linebreaks in verses of chapter 64
+    cont = cont.replace("\\\\\n\n", "\n\n")
+
+    # manual pagebreaks
+    cont = re.sub(r"\\clearpage(\{\}|)\n?", "", cont)
+
+    # \vskip 1\baselineskip plus .5\textheight minus 1\baselineskip
+    cont = re.sub(r"\\vskip .*?\\baselineskip", "", cont)
+
+    # remove \settowidth{\versewidth}... \begin{verse}[\versewidth]
+    cont = re.sub(
+        r"\n[^\n]*?\\settowidth\{\\versewidth\}[^\n]*?\n(\\begin\{verse\}\[\\versewidth\])",
+        r"\n\\begin{verse}",
+        cont,
+    )
+
+    # remove \settowidth
+    cont = re.sub(
+        r"\\settowidth\{[^\}]*\}\{([^\}]*)\}",
+        r"\1",
+        cont,
+        flags=re.DOTALL,
+    )
+
+    # fix „ at start of chapter
+    # \lettrine[ante=„] -> „\lettrine
+    # \lettrinepara[ante=„] -> „\lettrine
+    cont = re.sub(
+        r"\\(lettrine|lettrinepara)\[ante=(.)\]",
+        r"\2\\lettrine",
+        cont,
+    )
+
+    # OMakeIV sections
+    # \OmakeIVsection{My Little Pony: Friendship is Science}
+    cont = re.sub(r"\\OmakeIVsection(\[[^\]]*\]|)\{(.*)\}\n+", r"\\section{\2}\n", cont)
+
+    cont = re.sub(
+        r"\\OmakeIVspecialsection[^\n]+\{RingBearer\}.*?\n\n",
+        r"\\section{Lord of the Rationality}\n",
+        cont,
+        flags=re.DOTALL,
+        count=1,
+    )
+    cont = re.sub(
+        r"\\OmakeIVspecialsection[^\n]+\{NarniaBLL\}.*?\n\n",
+        r"\\section{The Witch and the Wardrobe}\n",
+        cont,
+        flags=re.DOTALL,
+        count=1,
+    )
+    cont = re.sub(
+        r"\\OmakeIVspecialsection[^\n]+\{Thundercats\}.*?\n\n",
+        r"\\section{ThunderSmarts}\n",
+        cont,
+        flags=re.DOTALL,
+        count=1,
+    )
+
+    cont = re.sub(
+        r"\\OmakeIVspecialsection[^\n]+\{Twilight\}.*?\n\n",
+        r"\\section{Utilitarian Twilight}\n",
+        cont,
+        flags=re.DOTALL,
+        count=1,
+    )
+
+    # \censor
+    cont = re.sub(r"\\censor\{.*?\}", r"xxxxxx", cont)
+
+    # # remove Deathly_Hallows_Sign.pdf and other pdf images
+    # # \includegraphics[scale=0.125]{images/Deathly_Hallows_Sign.pdf}
+    # cont = re.sub(
+    #     # r"\\includegraphics.*?\{images/Deathly_Hallows_Sign.*?\}",
+    #     r"\\includegraphics.*?\.pdf\}",
+    #     "",
+    #     cont,
+    # )
+
+    # remove all images
+    cont = re.sub(
+        r"\\includegraphics\[.*?\]\{.*?\}",
+        "",
+        cont,
+        flags=re.DOTALL,
+    )
+
+    # remove empty envs
+    cont = re.sub(
+        r"\\begin\{([^\}]*)\}\s*\\end\{\1}",
+        "",
+        cont,
+        flags=re.DOTALL,
+    )
+
+    # remove end stuff
+    cont = re.sub(
+        r"(.*)\\end\{chapterOpeningAuthorNote\}.*?\\end\{document\}",
+        r"\1\\end{chapterOpeningAuthorNote}\n\\end{document}",
+        cont,
+        flags=re.DOTALL,
+        count=1,
+    )
+
+    with target_file.open(mode="w", encoding="utf-8", newline="\n") as fh_out:
+        fh_out.write(cont)
diff --git a/scripts/ebook/4.py b/scripts/ebook/step_4.py
similarity index 63%
rename from scripts/ebook/4.py
rename to scripts/ebook/step_4.py
index b1f35d2e1..e4c9ccd50 100755
--- a/scripts/ebook/4.py
+++ b/scripts/ebook/step_4.py
@@ -14,8 +14,6 @@
 source_file = Path("tmp/hpmor-epub-3-flatten-mod.tex")
 target_file = Path("tmp/hpmor-epub-4-flatten-parsel.tex")
 
-print("=== 4. parselify flattened file in python ===")
-
 
 def convert_parsel(s: str) -> str:
     """Convert text to Parsel."""
@@ -34,16 +32,18 @@ def convert_parsel(s: str) -> str:
     return s
 
 
-with source_file.open(encoding="utf-8", newline="\n") as fh_in:
-    cont = fh_in.read()
+if __name__ == "__main__":
+    print("=== 4. parselify flattened file in python ===")
 
+    with source_file.open(encoding="utf-8", newline="\n") as fh_in:
+        cont = fh_in.read()
 
-# \parsel
-my_matches = re.finditer(r"(\\parsel\{([^\}\\]+)\})", cont)
-for my_match in my_matches:
-    was = my_match.group(1)
-    womit = convert_parsel(my_match.group(2))
-    cont = cont.replace(was, "\\parsel{" + womit + "}")
+    # \parsel
+    my_matches = re.finditer(r"(\\parsel\{([^\}\\]+)\})", cont)
+    for my_match in my_matches:
+        was = my_match.group(1)
+        womit = convert_parsel(my_match.group(2))
+        cont = cont.replace(was, "\\parsel{" + womit + "}")
 
-with target_file.open(mode="w", encoding="utf-8", newline="\n") as fh_out:
-    fh_out.write(cont)
+    with target_file.open(mode="w", encoding="utf-8", newline="\n") as fh_out:
+        fh_out.write(cont)
diff --git a/scripts/ebook/step_4_test.py b/scripts/ebook/step_4_test.py
new file mode 100644
index 000000000..c2b027112
--- /dev/null
+++ b/scripts/ebook/step_4_test.py
@@ -0,0 +1,23 @@
+"""Unit Tests."""  # noqa: INP001
+# ruff: noqa: S101
+
+from step_4 import convert_parsel
+
+assert convert_parsel("foo") == "foo"
+# s
+assert convert_parsel("house") == "housse"
+assert convert_parsel("Special") == "Sspecial"
+# ss and ß
+assert convert_parsel("Professor") == "Professsor"
+assert convert_parsel("muß") == "musss"
+# z
+assert convert_parsel("zero") == "zzero"
+assert convert_parsel("Zero") == "Zzero"
+# zz
+assert convert_parsel("puzzled") == "puzzzled"
+# x -> xs
+assert convert_parsel("Bellatrix") == "Bellatrixs"
+
+# combined
+assert convert_parsel("expression") == "exspresssion"
+assert convert_parsel("Salazar") == "Ssalazzar"
diff --git a/scripts/ebook/5.sh b/scripts/ebook/step_5.sh
similarity index 100%
rename from scripts/ebook/5.sh
rename to scripts/ebook/step_5.sh
diff --git a/scripts/ebook/step_6.py b/scripts/ebook/step_6.py
new file mode 100755
index 000000000..6f1edcae0
--- /dev/null
+++ b/scripts/ebook/step_6.py
@@ -0,0 +1,131 @@
+#!/usr/bin/env python3
+# by Torben Menke https://entorb.net
+# ruff: noqa: RUF001
+
+"""
+HTML modifications.
+"""
+
+import os
+import re
+from pathlib import Path
+
+os.chdir(Path(__file__).parent.parent.parent)
+
+source_file = Path("tmp/hpmor-epub-5-html-unmod.html")
+target_file = Path("hpmor.html")
+
+
+if __name__ == "__main__":
+    print("=== 6. HTML modifications ===")
+
+    with source_file.open(encoding="utf-8", newline="\n") as fh_in:
+        cont = fh_in.read()
+
+    # remove strange leftovers from tex -> html conversion
+    cont = re.sub(
+        r"(</header>).*?<p>Book :</p>\n",
+        r"\1",
+        cont,
+        flags=re.DOTALL | re.IGNORECASE,
+        count=1,
+    )
+
+    # cleanup hp-intro leftovers
+    cont = re.sub(
+        """<p>Fanfiction based on the characters of</p>
+    <p>J. K. ROWLING</p>
+    <p>and her books:</p>""",
+        "<p>Fanfiction based on the characters of J. K. Rowling and her books:</p>",
+        cont,
+        count=1,
+    )
+
+    cont = re.sub("<p>Year at Hogwarts</p>\n", "", cont, count=7)
+    cont = re.sub(
+        "</em></p>\n<p><em>Harry Potter and the",
+        "<br>\nHarry Potter and the",
+        cont,
+        count=7,
+    )
+
+    # now done via pandoc -V lang=en in 5.sh
+    # # set language
+    # cont = re.sub(
+    #     r'(<html [^>]*) lang="" xml:lang=""',
+    #     r'\1 lang="en" xml:lang="en"',
+    #     cont,
+    #     count=1,
+    # )
+
+    # remove training slashes to satisfy https://validator.w3.org
+    cont = cont.replace("<br />", "<br>")
+    cont = cont.replace("<hr />", "<hr>")
+
+    cont = re.sub(
+        r"(<meta [^>]*) />",
+        r"\1>",
+        cont,
+    )
+
+    # remove bad span ids (containing spaces) from newspaper spans
+    cont = re.sub(r'<span id="[^"]+" label="[^"]+">', r"<span>", cont, count=5)
+
+    # doc structure (not needed any more, using calibi --level1-toc flag instead)
+    # sed -i 's/<h1 /<h1 class="part"/g' $target_file
+    # sed -i 's/<h2 /<h2 class="chapter"/g' $target_file
+    # sed -i 's/<h3 /<h3 class="section"/g' $target_file
+
+    # remove ids from chapters since umlaute cause problem
+    cont = re.sub(
+        r'(<h\d) id="[^"]+"',
+        r"\1",
+        cont,
+        flags=re.DOTALL | re.IGNORECASE,
+    )
+    cont = re.sub(
+        r'(<h\d class="unnumbered") id="[^"]+"',
+        r"\1",
+        cont,
+        flags=re.DOTALL | re.IGNORECASE,
+    )
+
+    # add part numbers
+    part_no = 0
+    while "<h1>" in cont:
+        part_no += 1
+        cont = cont.replace("<h1>", f"<h1_DONE>{part_no}. ", 1)
+    cont = cont.replace("<h1_DONE>", "<h1>")
+
+    # add chapter numbers
+    chapter_no = 0
+    while "<h2>" in cont:
+        chapter_no += 1
+        cont = cont.replace("<h2>", f"<h2_DONE>{chapter_no}. ", 1)
+    cont = cont.replace("<h2_DONE>", "<h2>")
+
+    # fix double rules
+    # cont = cont.replace("<hr />\n<hr />", "<hr />")
+    cont = re.sub(
+        r"<hr */>\n<hr */>",
+        r"<hr />",
+        cont,
+        flags=re.DOTALL | re.IGNORECASE,
+    )
+    # fixing linebreak at author's comment
+    cont = cont.replace("<p>E. Y.: </p>\n<p>", "<p>E.Y.: ")
+
+    # converting "color-marked" styles of 1.sh back to proper style classes
+    cont = re.sub(
+        r'<(div|span) style="color: (parsel|writtenNote|McGonagallWhiteBoard|headline)"',  # noqa: E501
+        r'<\1 class="\2"',
+        cont,
+    )
+
+    # add css style file format for \emph in \emph
+    with Path("scripts/ebook/html.css").open(encoding="utf-8", newline="\n") as fh_in:
+        css = fh_in.read()
+    cont = cont.replace("</style>\n", css + "\n</style>\n")
+
+    with target_file.open(mode="w", encoding="utf-8", newline="\n") as fh_out:
+        fh_out.write(cont)
diff --git a/scripts/ebook/7.sh b/scripts/ebook/step_7.sh
similarity index 100%
rename from scripts/ebook/7.sh
rename to scripts/ebook/step_7.sh
diff --git a/scripts/make_ebooks.sh b/scripts/make_ebooks.sh
index a4e7d7572..eee9eb24e 100755
--- a/scripts/make_ebooks.sh
+++ b/scripts/make_ebooks.sh
@@ -7,10 +7,10 @@ cd $script_dir/..
 # TODO:
 # image on last page
 
-sh scripts/ebook/1.sh
-sh scripts/ebook/2.sh
-python3 scripts/ebook/3.py
-python3 scripts/ebook/4.py
-sh scripts/ebook/5.sh
-python3 scripts/ebook/6.py
-sh scripts/ebook/7.sh
+sh scripts/ebook/step_1.sh
+sh scripts/ebook/step_2.sh
+python3 scripts/ebook/step_3.py
+python3 scripts/ebook/step_4.py
+sh scripts/ebook/step_5.sh
+python3 scripts/ebook/step_6.py
+sh scripts/ebook/step_7.sh