diff --git a/scripts/ebook/3.py b/scripts/ebook/3.py index 220b31ebf..e614d0352 100755 --- a/scripts/ebook/3.py +++ b/scripts/ebook/3.py @@ -3,6 +3,7 @@ """ Modify flattened .tex file. """ + import datetime as dt import os import re @@ -87,18 +88,21 @@ r"\\section{Lord of the Rationality}\n", cont, flags=re.DOTALL, + count=1, ) cont = re.sub( r"\\OmakeIVspecialsection[^\n]+\{NarniaBLL\}.*?\n\n", r"\\section{The Witch and the Wardrobe}\n", cont, flags=re.DOTALL, + count=1, ) cont = re.sub( r"\\OmakeIVspecialsection[^\n]+\{Thundercats\}.*?\n\n", r"\\section{ThunderSmarts}\n", cont, flags=re.DOTALL, + count=1, ) cont = re.sub( @@ -106,6 +110,41 @@ r"\\section{Utilitarian Twilight}\n", cont, flags=re.DOTALL, + count=1, +) + +# # remove Deathly_Hallows_Sign.pdf and other pdf images +# # \includegraphics[scale=0.125]{images/Deathly_Hallows_Sign.pdf} +# cont = re.sub( +# # r"\\includegraphics.*?\{images/Deathly_Hallows_Sign.*?\}", +# r"\\includegraphics.*?\.pdf\}", +# "", +# cont, +# ) + +# remove all images +cont = re.sub( + r"\\includegraphics\[.*?\]\{.*?\}", + "", + cont, + flags=re.DOTALL, +) + +# remove empty envs +cont = re.sub( + r"\\begin\{([^\}]*)\}\s*\\end\{\1}", + "", + cont, + flags=re.DOTALL, +) + +# remove end stuff +cont = re.sub( + r"(.*)\\end\{chapterOpeningAuthorNote\}.*?\\end\{document\}", + r"\1\\end{chapterOpeningAuthorNote}\n\\end{document}", + cont, + flags=re.DOTALL, + count=1, ) with open(target_file, mode="w", encoding="utf-8", newline="\n") as fhOut: diff --git a/scripts/ebook/6.py b/scripts/ebook/6.py index 6ef0862c0..60d51b2af 100755 --- a/scripts/ebook/6.py +++ b/scripts/ebook/6.py @@ -3,6 +3,7 @@ """ HTML modifications. """ + import os import re import sys @@ -24,21 +25,48 @@ r"\1", cont, flags=re.DOTALL | re.IGNORECASE, + count=1, ) # cleanup hp-intro leftovers -cont = cont.replace( +cont = re.sub( """

Fanfiction based on the characters of

J. K. ROWLING

and her books:

""", "

Fanfiction based on the characters of J. K. Rowling and her books:

", + cont, + count=1, ) -cont = cont.replace("

Year at Hogwarts

\n", "") -cont = cont.replace( + +cont = re.sub("

Year at Hogwarts

\n", "", cont, count=7) +cont = re.sub( "

\n

Harry Potter and the", - "
\nHarry Potter and the", + "
\nHarry Potter and the", + cont, + count=7, +) + +# set language +cont = re.sub( + r'(]*) lang="" xml:lang=""', + r'\1 lang="en" xml:lang="en"', + cont, + count=1, +) + +# remove training slashes to satisfy https://validator.w3.org +cont = cont.replace("
", "
") +cont = cont.replace("


", "
") + +cont = re.sub( + r"(]*) />", + r"\1>", + cont, ) +# remove bad span ids (containing spaces) from newspaper spans +cont = re.sub(r'', r"", cont, count=5) + # doc structure (not needed any more, using calibi --level1-toc flag instead) # sed -i 's/