-
Notifications
You must be signed in to change notification settings - Fork 0
/
transform.py
194 lines (167 loc) · 8.63 KB
/
transform.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
from pathlib import Path
import re
from pathlib import Path
import re
def process_latex_content(latex_content):
"""
Processes the LaTeX content to replace \dfn, \prop, and \cor commands, using a stack to efficiently handle nested curly brackets
and dropping empty optional arguments.
"""
# Mapping of commands to their corresponding LaTeX environment names
command_to_env = {
'\\dfn': 'definition',
'\\prop': 'proposition',
'\\cor': 'corollary',
'\\lemm': 'lemma',
'\\thm': 'theorem',
'\\ex': 'example',
'\\pf': 'prf',
}
stack = [] # Stack to keep track of the positions of opening curly brackets
output = [] # Output list to build the processed content
i = 0 # Current position in the input content
while i < len(latex_content):
char = latex_content[i]
if char == '\\': # Escape sequence (e.g., LaTeX command)
# Find the end of the command (space or a curly bracket)
end_pos = i + 1
# if char is the last character, or the next character is not a letter, then break
if end_pos >= len(latex_content) or not latex_content[end_pos].isalpha():
cmd = 'not a command'
else:
while end_pos < len(latex_content) and latex_content[end_pos] not in (' ', '{', '[', '\\'):
end_pos += 1
cmd = latex_content[i:end_pos] # Extracted command
if cmd in command_to_env:
# Process the recognized command
env_name = command_to_env[cmd] # Get the corresponding environment name
i = end_pos # Move past the command
opt_arg = "" # Optional argument (e.g., the label)
if i < len(latex_content) and latex_content[i] == '[':
opt_arg_start = i
i = latex_content.find(']', i) + 1
if i == 0: # No closing ']' found or it's the last character
break
opt_arg = latex_content[opt_arg_start + 1:i - 1]
# Now, handle the curly brackets with the stack
args = [] # List to keep the extracted arguments
max_arg_count = 1 if cmd == '\\pf' else 2 # number of arguments
while i < len(latex_content) and len(args) < max_arg_count: # Ensure i is in bounds
current_char = latex_content[i]
# Check for math environments and skip them, because they can contain unpaired '{'
if latex_content[i:i + 1] == '$': # Single dollar math environment
end_pos = latex_content.find('$', i + 1) + 1
elif latex_content[i:i + 2] == '$$': # Double dollar math environment
end_pos = latex_content.find('$$', i + 2) + 2
elif latex_content[i:i + 2] == '\\[': # Display math environment
end_pos = latex_content.find('\\]', i + 2) + 2
elif latex_content[i:i + 14] == r'\begin{align*}': # Generic \begin \end block
end_pos = latex_content.find(r'\end{align*}', i + 14) + 12
elif latex_content[i:i + 14] == r'\begin{tikzcd}':
end_pos = latex_content.find(r'\end{tikzcd}', i + 14) + 12
elif latex_content[i:i + 17] == r'\begin{equation*}':
end_pos = latex_content.find(r'\end{equation*}', i + 17) + 15
if end_pos > i:
i = end_pos # Skip the entire math environment or \begin \end block
continue
if current_char == '{':
stack.append(i)
elif current_char == '}' and stack:
start = stack.pop()
if not stack: # If the stack is empty, we've found an outermost argument
args.append(latex_content[start + 1:i])
i += 1
if max_arg_count == 1 and len(args) == 1:
# For the \pf command, we don't need to handle the last argument
replacement = f"\\begin{{{env_name}}}{args[0]}\\end{{{env_name}}}\n"
output.append(replacement)
elif len(args) >= 2:
# Construct the replacement string, checking for an empty optional argument
# opt_arg = f"{{{opt_arg}}}" if opt_arg else ""
content = args[1] if len(args) == 2 else ""
replacement = f"\\begin{{{env_name}}}{{{args[0]}}}{{{opt_arg}}}{content}\\end{{{env_name}}}\n"
output.append(replacement)
continue # Skip the rest of the loop and continue processing
else:
# Not one of the recognized commands, just copy the command to the output
output.append(latex_content[i:end_pos])
i = end_pos
continue
# If not part of a command, or not handling brackets, just add the character to the output
if not stack and i < len(latex_content):
output.append(char)
i += 1
return ''.join(output)
def expand_dfn_macro(input_tex_path):
input_path = Path(input_tex_path)
output_path = 'macro_expanded' / input_path.with_stem(f"{input_path.stem}")
# create the output directory if it doesn't exist with pathlib
output_path.parent.mkdir(parents=True, exist_ok=True)
with open(input_tex_path, 'r', encoding='utf-8') as file:
content = file.read()
processed_content = process_latex_content(content)
with open(output_path, 'w', encoding='utf-8') as file:
file.write(processed_content)
return output_path
test_input = r'''
\[
\overline{a_1\cdots a_n}\diamond \overline{b_1\cdots b_m}=\overline{a_1\cdots a_nb_1\cdots b_m}.
\]
\dfn{Word}{
Let $S$ be a set. Define $S^{-1}=\left\{s^{-1}\midv s\in S\right\}$. A \textbf{word} on $S$ is a string over $S\sqcup S^{-1}\sqcup\{1\}$. $\overline{1}$ is called an \textbf{empty word}. The \textbf{length} of a word $w$ is the number of letters in $w$.
}
Associatedness can also be described in terms of the action of $R^\times$ on $R$ via multiplication: two elements of $R$ are associates if they are in the same $R^\times$-orbit.
\dfn{Irreducible Element}{
Let $R$ be an integal domain. An element $a\in R$ is called \textbf{irreducible} if
\begin{enumerate}[(i)]
\item $a\notin R^\times$, i.e. $a$ is not a unit.
\item $a=bc\implies b\in R^\times\text{ or }c\in R^\times$.
\end{enumerate}
}
\[
\left\{\text{maximal ideals of }R\right\} \subseteq \left\{\text{prime ideals of }R\right\} \subseteq \left\{\text{radical ideals of }R\right\} \subseteq \left\{\text{ideals of }R\right\}.
\]
\prop{Quotient Preserves Radical, Prime, Maximal Ideals}{
Let $R$ be a commutative ring and $I\subseteq R$ be a proper ideal. Then we have bijections between the following sets:
\begin{align*}
\left\{\text{ideals of }R\text{ containing }I\right\}&\longleftrightarrow\left\{\text{ideals of }R/I\right\}\\
J&\longmapsto J/I
\end{align*}
The ideal $J\supseteq I$ is radical, prime, or maximal if and only if $J/I$ is radical, prime, or maximal respectively.
}
\[
\mathsf{Ring}
\]
\dfn{Unit Group of a Ring}{
Let $R$ be a ring. The \textbf{unit group} of $R$ is the group of invertible elements of $R$ under multiplication, denoted by $R^\times$. We can define a functor $(-)^\times:\mathsf{Ring}\to\mathsf{Grp}$ that sends a ring to its unit group
\[
\begin{tikzcd}[ampersand replacement=\&]
\mathsf{Ring}\ \&[-25pt]\&[+10pt]\&[-30pt]\mathsf{Grp}\&[-30pt]\&[-30pt] \\ [-15pt]
R \arrow[dd, "f"{name=L, left}]
\&[-25pt] \& [+10pt]
\& [-30pt]R^{\times}\arrow[dd, "f|_{R^\times}"{name=R}] \\ [-10pt]
\& \phantom{.}\arrow[r, "(-)^\times", squigarrow]\&\phantom{.} \& \\[-10pt]
S \& \& \& S^{\times}
\end{tikzcd}
\]
}
\prop{Adjunction $\mathbb{Z}\left[-\right]\dashv \left(-\right)^\times$}{
$(-)^\times:\mathsf{Ring}\to\mathsf{Grp}$ has a left adjoint which sends each group $G$ to the group ring $\mathbb{Z}\left[G\right]$.
}
'''
print(process_latex_content(test_input))
filename_list = [
'set_theory.tex',
'category_theory.tex',
'group.tex',
# 'topological_group.tex',
#'ring.tex',
'commutative_ring.tex',
'module.tex',
# 'associative_algebra.tex',
# 'field.tex',
# 'valuation_theory.tex',
# 'number_theory.tex',
]
for input_tex_path in filename_list:
expand_dfn_macro(input_tex_path)