diff --git a/scripts/ebook/3.py b/scripts/ebook/3.py index 220b31ebf..e614d0352 100755 --- a/scripts/ebook/3.py +++ b/scripts/ebook/3.py @@ -3,6 +3,7 @@ """ Modify flattened .tex file. """ + import datetime as dt import os import re @@ -87,18 +88,21 @@ r"\\section{Lord of the Rationality}\n", cont, flags=re.DOTALL, + count=1, ) cont = re.sub( r"\\OmakeIVspecialsection[^\n]+\{NarniaBLL\}.*?\n\n", r"\\section{The Witch and the Wardrobe}\n", cont, flags=re.DOTALL, + count=1, ) cont = re.sub( r"\\OmakeIVspecialsection[^\n]+\{Thundercats\}.*?\n\n", r"\\section{ThunderSmarts}\n", cont, flags=re.DOTALL, + count=1, ) cont = re.sub( @@ -106,6 +110,41 @@ r"\\section{Utilitarian Twilight}\n", cont, flags=re.DOTALL, + count=1, +) + +# # remove Deathly_Hallows_Sign.pdf and other pdf images +# # \includegraphics[scale=0.125]{images/Deathly_Hallows_Sign.pdf} +# cont = re.sub( +# # r"\\includegraphics.*?\{images/Deathly_Hallows_Sign.*?\}", +# r"\\includegraphics.*?\.pdf\}", +# "", +# cont, +# ) + +# remove all images +cont = re.sub( + r"\\includegraphics\[.*?\]\{.*?\}", + "", + cont, + flags=re.DOTALL, +) + +# remove empty envs +cont = re.sub( + r"\\begin\{([^\}]*)\}\s*\\end\{\1}", + "", + cont, + flags=re.DOTALL, +) + +# remove end stuff +cont = re.sub( + r"(.*)\\end\{chapterOpeningAuthorNote\}.*?\\end\{document\}", + r"\1\\end{chapterOpeningAuthorNote}\n\\end{document}", + cont, + flags=re.DOTALL, + count=1, ) with open(target_file, mode="w", encoding="utf-8", newline="\n") as fhOut: diff --git a/scripts/ebook/6.py b/scripts/ebook/6.py index 6ef0862c0..60d51b2af 100755 --- a/scripts/ebook/6.py +++ b/scripts/ebook/6.py @@ -3,6 +3,7 @@ """ HTML modifications. """ + import os import re import sys @@ -24,21 +25,48 @@ r"\1", cont, flags=re.DOTALL | re.IGNORECASE, + count=1, ) # cleanup hp-intro leftovers -cont = cont.replace( +cont = re.sub( """
Fanfiction based on the characters of
J. K. ROWLING
and her books:
""", "Fanfiction based on the characters of J. K. Rowling and her books:
", + cont, + count=1, ) -cont = cont.replace("Year at Hogwarts
\n", "") -cont = cont.replace( + +cont = re.sub("Year at Hogwarts
\n", "", cont, count=7) +cont = re.sub( "\nHarry Potter and the",
- "
\nHarry Potter and the",
+ "
\nHarry Potter and the",
+ cont,
+ count=7,
+)
+
+# set language
+cont = re.sub(
+ r'(]*) lang="" xml:lang=""',
+ r'\1 lang="en" xml:lang="en"',
+ cont,
+ count=1,
+)
+
+# remove training slashes to satisfy https://validator.w3.org
+cont = cont.replace("
", "
")
+cont = cont.replace("
", "
")
+
+cont = re.sub(
+ r"(]*) />",
+ r"\1>",
+ cont,
)
+# remove bad span ids (containing spaces) from newspaper spans
+cont = re.sub(r'', r"", cont, count=5)
+
# doc structure (not needed any more, using calibi --level1-toc flag instead)
# sed -i 's/