-
Notifications
You must be signed in to change notification settings - Fork 17
/
Copy pathstrparse.py
103 lines (96 loc) · 3.69 KB
/
strparse.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
import re
# Demacro
def demacro(text: str) -> str:
# TODO this doesn't quite work, but oh well
replacements: list[tuple[str, str]] = [
(r"\ii ", r"\item "),
(r"\ii[", r"\item["),
(r"\wh", r"\widehat"),
(r"\wt", r"\widetilde"),
(r"\ol", r"\overline"),
(r"\epsilon", r"\eps"),
(r"\eps", r"\varepsilon"),
(r"\dang", r"\measuredangle"),
(r"\dg", r"^{\circ}"),
(r"\inv", r"^{-1}"),
(r"\half", r"\frac{1}{2}"),
(r"\GL", r"\operatorname{GL}"),
(r"\SL", r"\operatorname{SL}"),
(r"\NN", r"{\mathbb N}"),
(r"\ZZ", r"{\mathbb Z}"),
(r"\CC", r"{\mathbb C}"),
(r"\RR", r"{\mathbb R}"),
(r"\QQ", r"{\mathbb Q}"),
(r"\FF", r"{\mathbb F}"),
(r"\ts", r"\textsuperscript"),
(r"\opname", r"\operatorname"),
(r"\defeq", r"\overset{\text{def}}{=}"),
(r"\id", r"\operatorname{id}"),
(r"\ord", r"\operatorname{ord}"),
(r"\injto", r"\hookrightarrow"),
(r"\vdotswithin=", r"\vdots"),
]
s = text
for short, full in replacements:
s = s.replace(short, full)
return s
def remove_soft_newlines(text: str) -> str:
return re.sub(r"[a-zA-Z]\n[a-zA-Z]", lambda m: m.group(0).replace("\n", " "), text)
def toAOPS(text: str) -> str:
DIVIDER = "\n" + r"-------------------" + "\n\n"
text = demacro(text)
text = text.replace(r"\qedhere", "")
text = text.replace(r"\begin{asy}", "\n" + "[asy]" + "\n")
text = text.replace(r"\end{asy}", "\n" + "[/asy]")
text = text.replace(r"\begin{center}", "")
text = text.replace(r"\end{center}", "")
text = text.replace(r"\par ", "\n")
text = text.replace(r"\item ", "[*]")
text = text.replace(r"\begin{enumerate}", "[list=1]")
text = text.replace(r"\end{enumerate}", "[/list]")
text = text.replace(r"\begin{itemize}", "[list]")
text = text.replace(r"\end{itemize}", "[/list]")
for env in [
"theorem",
"claim",
"lemma",
"proposition",
"corollary",
"definition",
"remark",
]:
text = text.replace(
r"\begin{" + env + "*}",
"\n\n" + "[b][color=red]" + env.title() + ":[/color][/b] ",
)
text = text.replace(r"\end{" + env + "*}", "")
text = text.replace(
r"\begin{" + env + "}",
"\n\n" + "[b][color=red]" + env.title() + ":[/color][/b] ",
)
text = text.replace(r"\end{" + env + "}", "")
text = text.replace(r"\begin{proof}", "[i]Proof.[/i] ")
text = text.replace(r"\end{proof}", r"$\blacksquare$" + "\n")
text = text.replace(r"\bigskip", DIVIDER)
text = text.replace(r"\medskip", DIVIDER)
text = text.replace(r"\#", "#")
text = text.replace("%\n", "\n") # strip trailing percent signs
# Remove Asy opacities, doesn't work on AoPS
text = re.sub(r"opacity\(0.[0-9]+\)+([^,]+), ", "invisible, ", text)
# Replace \emph, \textit, et al
text = re.sub(r"\\emph{([^}]*)}", r"[i]\1[/i]", text)
text = re.sub(r"\\textit{([^}]*)}", r"[i]\1[/i]", text)
text = re.sub(r"\\textbf{([^}]*)}", r"[b]\1[/b]", text)
text = re.sub(
r"\\paragraph{([^}]*)}", DIVIDER + r"[color=blue][b]\1[/b][/color]", text
)
text = re.sub(r"\\subparagraph{([^}]*)}", DIVIDER + r"[b]\1[/b]", text)
text = re.sub(r"\\url{([^}]*)}", r"[url]\1[/url]", text)
text = re.sub(r"\\href{([^}]*)}{([^}]*)}", r"[url=\1]\2[/url]", text)
text = text.replace(r"\arc", r"\widehat")
# Join together newlines
paragraphs = [
" ".join([line.strip() for line in paragraph.splitlines()]).strip()
for paragraph in text.split("\n\n")
]
return "\n".join(paragraphs)