Skip to content

Commit

Permalink
Add Tex2ASCIIMath transformer
Browse files Browse the repository at this point in the history
  • Loading branch information
belerico committed Apr 19, 2020
1 parent 259f77c commit ab9caee
Show file tree
Hide file tree
Showing 7 changed files with 126 additions and 79 deletions.
8 changes: 4 additions & 4 deletions py_asciimath/const.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ def get_symbols_for(
symbol_group, lang_to, lang_from="asciimath"
): # pragma: no cover
return {
'"' + i[lang_from] + '"': (i[lang_to] if lang_to is not None else None)
i[lang_from]: (i[lang_to] if lang_to is not None else None)
for i in getattr(sys.modules[__name__], symbol_group)
}

Expand Down Expand Up @@ -360,15 +360,15 @@ def get_symbols_for(
]

left_parenthesis = [
{"asciimath": "(:", "latex": "\\langle ", "mathml": "⟨"},
{"asciimath": "(:", "latex": "\\langle", "mathml": "⟨"},
{"asciimath": "(", "latex": "(", "mathml": "("},
{"asciimath": "[", "latex": "[", "mathml": "["},
{"asciimath": "{:", "latex": ".", "mathml": ""},
{"asciimath": "{", "latex": "\\{", "mathml": "{"},
{"asciimath": "|:", "latex": "\\vert", "mathml": "∣"},
{"asciimath": "||:", "latex": "\\lVert", "mathml": "∥"},
{"asciimath": "langle", "latex": "\\langle ", "mathml": "⟨"},
{"asciimath": "<<", "latex": "\\langle ", "mathml": "&langle;"},
{"asciimath": "langle", "latex": "\\langle", "mathml": "&langle;"},
{"asciimath": "<<", "latex": "\\langle", "mathml": "&langle;"},
]

right_parenthesis = [
Expand Down
52 changes: 23 additions & 29 deletions py_asciimath/grammar/latex_grammar.py
Original file line number Diff line number Diff line change
@@ -1,26 +1,14 @@
from itertools import islice

from ..const import get_symbols_for
from ..utils.utils import alias_string

unary_functions = get_symbols_for("unary_functions", "asciimath", "latex")
unary_functions.update(
get_symbols_for("function_symbols", "asciimath", "latex")
from ..translation.latex2asciimath import (
left_parenthesis,
right_parenthesis,
smb,
unary_functions,
binary_functions,
)
binary_functions = get_symbols_for("binary_functions", "asciimath", "latex")
left_parenthesis = get_symbols_for("left_parenthesis", "asciimath", "latex")
left_parenthesis.pop('"\\{"')
right_parenthesis = get_symbols_for("right_parenthesis", "asciimath", "latex")
right_parenthesis.pop('"\\}"')

smb = get_symbols_for("misc_symbols", "asciimath", "latex")
smb.update(get_symbols_for("colors", "asciimath", "latex"))
smb.update(get_symbols_for("relation_symbols", "asciimath", "latex"))
smb.update(get_symbols_for("logical_symbols", "asciimath", "latex"))
smb.update(get_symbols_for("operation_symbols", "asciimath", "latex"))
smb.update(get_symbols_for("greek_letters", "asciimath", "latex"))
smb.update(get_symbols_for("arrows", "asciimath", "latex"))
smb = dict(sorted(smb.items(), key=lambda x: (-len(x[0]), x[0])))

latex_grammar = r"""
%import common.WS
Expand All @@ -33,10 +21,11 @@
| s "^" s -> exp_super
| s "_" s "^" s -> exp_under_super
s: _l start? _r -> exp_par
| "\\" _u "{{" start "}}" -> exp_unary
| "\\" _b "{{" start "}}" "{{" start "}}" -> exp_binary
| "\\" _latex1 -> symbol
| "\\" _latex2 -> symbol
| "\\left" (_l | /\./ | /\\vert/) start? "\\right" (_r | /\./ | /\\vert/) -> exp_par
| _u "{{" start "}}" -> exp_unary
| _b "{{" start "}}" "{{" start "}}" -> exp_binary
| _latex1 -> symbol
| _latex2 -> symbol
| _c -> const
!_c: /d[A-Za-z]/
| NUMBER
Expand All @@ -49,13 +38,18 @@
!_latex2: {}
QS: "\"" /(?<=").+(?=")/ "\"" // Quoted String
""".format(
alias_string(left_parenthesis, alias=False),
alias_string(right_parenthesis, alias=False),
alias_string(binary_functions, alias=False),
alias_string(unary_functions, alias=False),
alias_string(dict(islice(smb.items(), len(smb) // 2)), alias=False),
alias_string(left_parenthesis, alias=False, lang_from="latex"),
alias_string(right_parenthesis, alias=False, lang_from="latex"),
alias_string(binary_functions, alias=False, lang_from="latex"),
alias_string(unary_functions, alias=False, lang_from="latex"),
alias_string(
dict(islice(smb.items(), len(smb) // 2)),
alias=False,
lang_from="latex",
),
alias_string(
dict(islice(smb.items(), len(smb) // 2, len(smb))), alias=False,
dict(islice(smb.items(), len(smb) // 2, len(smb))),
alias=False,
lang_from="latex",
),
)
print(latex_grammar)
103 changes: 59 additions & 44 deletions py_asciimath/transformer/transformer.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,22 @@

from lark import Transformer

from ..translation.latex import binary_functions as latex_bin
from ..translation.latex import left_parenthesis as latex_left
from ..translation.latex import right_parenthesis as latex_right
from ..translation.latex import smb as latex_smb
from ..translation.latex import unary_functions as latex_una
from ..translation.mathml import binary_functions as mathml_bin
from ..translation.mathml import colors
from ..translation.mathml import left_parenthesis as mathml_left
from ..translation.mathml import right_parenthesis as mathml_right
from ..translation.mathml import smb as mathml_smb
from ..translation.mathml import unary_functions as mathml_una
from ..translation.asciimath2latex import binary_functions as latex_bin
from ..translation.asciimath2latex import left_parenthesis as latex_left
from ..translation.asciimath2latex import right_parenthesis as latex_right
from ..translation.asciimath2latex import smb as latex_smb
from ..translation.asciimath2latex import unary_functions as latex_una
from ..translation.asciimath2mathml import binary_functions as mathml_bin
from ..translation.asciimath2mathml import colors
from ..translation.asciimath2mathml import left_parenthesis as mathml_left
from ..translation.asciimath2mathml import right_parenthesis as mathml_right
from ..translation.asciimath2mathml import smb as mathml_smb
from ..translation.asciimath2mathml import unary_functions as mathml_una
from ..translation.latex2asciimath import binary_functions as l2mml_bin
from ..translation.latex2asciimath import left_parenthesis as l2mml_left
from ..translation.latex2asciimath import right_parenthesis as l2mml_right
from ..translation.latex2asciimath import smb as l2mml_smb
from ..translation.latex2asciimath import unary_functions as l2mml_una
from ..utils.log import Log
from ..utils.utils import UtilsMat, concat, encapsulate_mrow

Expand Down Expand Up @@ -151,8 +156,12 @@ def exp_par(self, items):
yeah_mat, row_par = UtilsMat.check_mat(s)
if yeah_mat:
s = UtilsMat.get_latex_mat(s, row_par)
lpar = "\\left" + latex_left[concat(items[0])]
rpar = "\\right" + latex_right[concat(items[-1])]
lpar = (
"\\left"
+ latex_left[items[0]]
+ (" " if items[0] == "langle" else "")
)
rpar = "\\right" + latex_right[items[-1]]
return (
lpar
+ ("\\begin{matrix}" + s + "\\end{matrix}" if yeah_mat else s)
Expand All @@ -161,7 +170,7 @@ def exp_par(self, items):

@ASCIIMathTransformer.log
def exp_unary(self, items):
unary = latex_una[concat(items[0])]
unary = latex_una[items[0]]
items[1] = self.remove_parenthesis(items[1])
if unary == "norm":
return "\\left\\lVert " + items[1] + " \\right\\rVert"
Expand All @@ -176,7 +185,7 @@ def exp_unary(self, items):

@ASCIIMathTransformer.log
def exp_binary(self, items):
binary = latex_bin[concat(items[0])]
binary = latex_bin[items[0]]
items[1] = self.remove_parenthesis(items[1])
items[2] = self.remove_parenthesis(items[2])
if binary == "\\sqrt":
Expand All @@ -186,11 +195,11 @@ def exp_binary(self, items):

@ASCIIMathTransformer.log
def symbol(self, items):
if items[0] == '\\':
if items[0] == "\\":
return "\\setminus"
elif items[0] == '/_\\':
elif items[0] == "/_\\":
return "\\triangle"
return latex_smb[concat(items[0])]
return latex_smb[items[0]]

@ASCIIMathTransformer.log
def const(self, items):
Expand Down Expand Up @@ -287,8 +296,8 @@ def exp_par(self, items):
+ UtilsMat.get_mathml_mat(s, row_par)
+ "</mtable>"
)
lpar = mathml_left[concat(items[0])]
rpar = mathml_right[concat(items[-1])]
lpar = mathml_left[items[0]]
rpar = mathml_right[items[-1]]
return encapsulate_mrow(
"<mo>"
+ lpar
Expand All @@ -301,16 +310,16 @@ def exp_par(self, items):

@ASCIIMathTransformer.log
def exp_unary(self, items):
unary = mathml_una[concat(items[0])]
unary = mathml_una[items[0]]
items[1] = self.remove_parenthesis(items[1])
return encapsulate_mrow(unary.format(encapsulate_mrow(items[1])))

@ASCIIMathTransformer.log
def exp_binary(self, items):
binary = mathml_bin[concat(items[0])]
binary = mathml_bin[items[0]]
items[1] = self.remove_parenthesis(items[1])
items[2] = self.remove_parenthesis(items[2])
if concat(items[1][6:-7]) in colors:
if items[1][6:-7] in colors:
s = binary.format(items[1][6:-7], encapsulate_mrow(items[2]))
elif items[0] != "root":
s = binary.format(
Expand All @@ -324,12 +333,12 @@ def exp_binary(self, items):

@ASCIIMathTransformer.log
def symbol(self, items):
if concat(items[0]) in colors:
return mathml_smb[concat(items[0])]
elif concat(items[0]) == '"\\"':
if items[0] in colors:
return items[0]
elif items[0] == '\\':
return "<mo>&setminus;</mo>"
else:
return "<mo>" + mathml_smb[concat(items[0])] + "</mo>"
return "<mo>" + mathml_smb[items[0]] + "</mo>"

@ASCIIMathTransformer.log
def const(self, items):
Expand All @@ -348,12 +357,8 @@ def __init__(
self, log=True, start_end_par_pattern="{}{}", visit_tokens=False
):
Transformer.__init__(self, visit_tokens=visit_tokens)
formatted_left_parenthesis = "|".join(
["\\(", "\\(:", "\\[", "\\{:"]
)
formatted_right_parenthesis = "|".join(
["\\)", ":\\)", "\\]", ":\\}"]
)
formatted_left_parenthesis = "|".join(["\\(", "\\(:", "\\[", "\\{:"])
formatted_right_parenthesis = "|".join(["\\)", ":\\)", "\\]", ":\\}"])
self.start_end_par_pattern = re.compile(
start_end_par_pattern.format(
formatted_left_parenthesis, formatted_right_parenthesis,
Expand All @@ -377,47 +382,57 @@ def decorator(*args, **kwargs):

@log
def exp(self, items):
return items
return " ".join(items)

@log
def exp_interm(self, items):
return items
return items[0]

@log
def exp_frac(self, items):
return items
return "(" + items[0] + ")/(" + items[1] + ")"

@log
def exp_under(self, items):
return items
return items[0] + "_(" + items[1] + ")"

@log
def exp_super(self, items):
return items
return items[0] + "^(" + items[1] + ")"

@log
def exp_under_super(self, items):
return items
return items[0] + "^(" + items[1] + ")_(" + items[2] + ")"

@log
def exp_par(self, items):
return items
left = items[0]
right = items[-1]
if left == ".":
left = "{:"
elif left == "\\vert":
left = "|:"
if right == ".":
right = ":}"
elif right == "\\vert":
right = ":|"
return left + " ".join(items[1:-1]) + right

@log
def exp_unary(self, items):
return items
return l2mml_una[items[0]] + "(" + items[1] + ")"

@log
def exp_binary(self, items):
return items
return l2mml_bin[items[0]] + "(" + items[1] + ")(" + items[2] + ")"

@log
def symbol(self, items):
return items
return l2mml_smb[items[0]]

@log
def const(self, items):
return items
return items[0]

@log
def q_str(self, items):
Expand Down
File renamed without changes.
File renamed without changes.
22 changes: 22 additions & 0 deletions py_asciimath/translation/latex2asciimath.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
from ..const import get_symbols_for

unary_functions = get_symbols_for("unary_functions", "asciimath", "latex")
binary_functions = get_symbols_for("binary_functions", "asciimath", "latex")
binary_functions.pop("\\sqrt")
left_parenthesis = get_symbols_for("left_parenthesis", "asciimath", "latex")
left_parenthesis.pop(".")
left_parenthesis.pop("\\vert")
right_parenthesis = get_symbols_for("right_parenthesis", "asciimath", "latex")
right_parenthesis.pop(".")
right_parenthesis.pop("\\vert")

smb = get_symbols_for("misc_symbols", "asciimath", "latex")
smb.update(get_symbols_for("function_symbols", "asciimath", "latex"))
smb.update(get_symbols_for("colors", "asciimath", "latex"))
smb.update(get_symbols_for("relation_symbols", "asciimath", "latex"))
smb.update(get_symbols_for("logical_symbols", "asciimath", "latex"))
smb.update(get_symbols_for("operation_symbols", "asciimath", "latex"))
smb.update(get_symbols_for("greek_letters", "asciimath", "latex"))
smb.update(get_symbols_for("arrows", "asciimath", "latex"))
smb = dict(sorted(smb.items(), key=lambda x: (-len(x[0]), x[0])))
smb.pop(".")
20 changes: 18 additions & 2 deletions py_asciimath/utils/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,15 @@ def alias_string(mapping, init=False, alias=True, prefix="", lang_from=None):
"|"
if init
else ""
+ mapping[0][0]
+ (
'"' + mapping[0][0] + '"'
if lang_from != "latex"
else (
'/' + mapping[0][0].encode("unicode-escape").decode() + '/'
if mapping[0][0].startswith("\\")
else '"' + mapping[0][0] + '"'
)
)
+ (
" -> " + (prefix + "_" if prefix != "" else "") + mapping[0][1]
if alias
Expand All @@ -32,7 +40,15 @@ def alias_string(mapping, init=False, alias=True, prefix="", lang_from=None):
s = (
s
+ "\n\t| "
+ k
+ (
'"' + k + '"'
if lang_from != "latex"
else (
'/' + k.encode("unicode-escape").decode() + '/'
if k.startswith("\\")
else '"' + k + '"'
)
)
+ (
" -> " + (prefix + "_" if prefix != "" else "") + v
if alias
Expand Down

0 comments on commit ab9caee

Please sign in to comment.