Skip to content

Commit

Permalink
Add MathMLParser tests and fix minors
Browse files Browse the repository at this point in the history
  • Loading branch information
belerico committed Apr 16, 2020
1 parent fe175a3 commit a988b45
Show file tree
Hide file tree
Showing 6 changed files with 175 additions and 36 deletions.
1 change: 0 additions & 1 deletion .coveragerc
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,6 @@ omit =
py_asciimath/parser/parse_lists.py
py_asciimath/utils/log.py
py_asciimath/grammar/*
py_asciimath/parser/*
py_asciimath/translation/*
*/__init__.py
source =
Expand Down
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,9 @@
# Exclude docs
docs/*

# Exclude all generated .xml files
*.xml

# Exclude main.py
main.py

Expand Down
3 changes: 2 additions & 1 deletion MANIFEST.in
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
include LICENSE
include README.md
recursive-include examples *
graft examples
global-exclude *.py[cod]
recursive-include py_asciimath/dtd/ *
recursive-include py_asciimath/translation/mathml2tex/ *
46 changes: 27 additions & 19 deletions py_asciimath/parser/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,13 +11,13 @@
class MathMLParser(object):

xml_decl_pattern = re.compile(
r"(<\?xml.*?(encoding=(?:'|\")(.*?)(?:'|\"))?\?>)"
r"(\s*)(<\?xml.*?(encoding=(?:'|\")(.*?)(?:'|\"))?\?>)"
)
doctype_pattern = re.compile(
r"(<!DOCTYPE math ([A-Z]+).*?mathml(\d)?\.dtd\">)", re.MULTILINE
)

def __init__(self, *args, **kwargs):
def __init__(self, *args, **kwargs): # pragma: no cover
super(MathMLParser, self).__init__()

@classmethod
Expand All @@ -38,13 +38,16 @@ def get_encoding(cls, s):
Returns:
str: Encoding of the XML document
"""
xml_decl_match = re.match(cls.xml_decl_pattern, s)
if xml_decl_match is not None:
if xml_decl_match.span(1)[0] != 0:
xml_decl_match = list(re.finditer(cls.xml_decl_pattern, s))
if len(xml_decl_match) > 1:
raise Exception("Multiple XML declarations found")
elif len(xml_decl_match) == 1:
xml_decl_match = xml_decl_match[0]
if xml_decl_match.span()[0] != 0:
raise Exception(
"XML declaration must be at the beginning of the file"
)
encoding = xml_decl_match.group(3)
encoding = xml_decl_match.group(4)
logging.info("Encoding from XML declaration: " + encoding)
else:
encoding = None
Expand Down Expand Up @@ -82,7 +85,7 @@ def set_doctype(cls, s, network, dtd=None):
doctype_match = list(re.finditer(cls.doctype_pattern, s))
if len(doctype_match) > 1:
raise Exception("Multiple DOCTYPE declarations found")
if doctype_match != []:
elif doctype_match != []:
doctype_match = doctype_match[0]
if (
doctype_match.group(2) == "PUBLIC"
Expand Down Expand Up @@ -111,21 +114,26 @@ def set_doctype(cls, s, network, dtd=None):
"no need to bother your ISP"
)
else:
dtd = dtd if dtd is not None else "mathml3"
logging.warning(
"No DTD declaration found: "
"set to local {} DTD".format(
dtd if dtd is not None else "mathml3"
"set to {} {} DTD".format(
"remote" if network else "local", dtd
)
)
xml_decl_match = re.match(cls.xml_decl_pattern, s)
if xml_decl_match is None:
start = 0
else:
start = xml_decl_match.span(1)[1]
if dtd is not None:
doctype = cls.get_doctype(dtd, network)
xml_decl_match = list(re.finditer(cls.xml_decl_pattern, s))
if len(xml_decl_match) > 1:
raise Exception("Multiple XML declarations found")
elif len(xml_decl_match) == 1:
xml_decl_match = xml_decl_match[0]
if xml_decl_match.span()[0] != 0:
raise Exception(
"XML declaration must be at the beginning of the file"
)
start = xml_decl_match.span()[1]
else:
doctype = cls.get_doctype("mathml3", network)
start = 0
doctype = cls.get_doctype(dtd, network)
s = s[:start] + doctype + s[start:]
return s

Expand Down Expand Up @@ -211,7 +219,7 @@ def get_parser(
ns_clean=True,
resolve_entities=False,
**kwargs
):
): # pragma: no cover
"""Create a MathML XML parser
Args:
Expand All @@ -226,7 +234,7 @@ def get_parser(
**kwargs: Additional ~lxml.extree.XMLParser options
Returns:
str: Parsed and possibly validated MathML XML
lxml.etree.XMLParser: MathML parser following the specifications
"""
return lxml.etree.XMLParser(
dtd_validation=dtd_validation,
Expand Down
23 changes: 8 additions & 15 deletions py_asciimath/translator/translator.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,15 +243,9 @@ def _translate(
dstyle = '<mstyle displaystyle="true">{}</mstyle>'
else:
dstyle = "{}"
if network: # pragma: no cover
if check_connection():
doctype = MathMLParser.get_doctype(dtd, True)
else:
network = False
doctype = MathMLParser.get_doctype(dtd, False)
logging.warning("No connection available...")
else:
doctype = MathMLParser.get_doctype(dtd, False)
if network and not check_connection():
network = False
logging.warning("No connection available...")
parsed = (
(
'<math xmlns="http://www.w3.org/1998/Math/MathML">'
Expand All @@ -273,11 +267,11 @@ def _translate(
parsed, dtd, dtd_validation, network, **kwargs
)
if output == "string":
encoding = parsed.getroottree().docinfo.encoding
parsed = parsed.getroottree()
encoding = parsed.docinfo.encoding
parsed = lxml.etree.tostring(
parsed,
pretty_print=xml_pprint,
doctype=(doctype if dtd_validation else None),
xml_declaration=xml_declaration,
encoding=encoding,
).decode(encoding)
Expand Down Expand Up @@ -365,10 +359,9 @@ def __init__(self):
self.transformer = lxml.etree.XSLT(transformer)

def _translate(self, exp, network=False, **kwargs):
if network:
if not check_connection():
network = False
logging.warning("No connection available...")
if network and not check_connection():
network = False
logging.warning("No connection available...")
mml_version = MathMLParser.get_doctype_version(exp)
if mml_version == "1":
raise NotImplementedError(
Expand Down
135 changes: 135 additions & 0 deletions tests/test_MathMLParser.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,135 @@
import unittest

from py_asciimath import PROJECT_ROOT
from py_asciimath.parser.parser import MathMLParser


class TestMathMLParser(unittest.TestCase):
def setUp(self):
self.maxDiff = None

def test_mathmlparser_get_doctype(self):
for network in [True, False]:
for mml in ["mathml1", "mathml2", "mathml3"]:
doctype = MathMLParser.get_doctype(mml, network)
if network:
self.assertEqual(
doctype,
'<!DOCTYPE math PUBLIC "-//W3C//DTD MathML {}.0//EN" '
'"http://www.w3.org/Math/DTD/{}/{}.dtd">'.format(
mml[-1], mml, mml
)
if mml != "mathml1"
else "<!DOCTYPE math SYSTEM "
'"http://www.w3.org/Math/DTD/mathml1/mathml.dtd">',
)
else:
self.assertEqual(
doctype,
'<!DOCTYPE math SYSTEM "'
+ PROJECT_ROOT
+ '/dtd/{}/{}.dtd">'.format(mml, mml),
)
self.assertRaises(
NotImplementedError, MathMLParser.get_doctype, "a", False
)

def test_mathmlparser_get_doctype_version_ok(self):
self.assertEqual(
"3",
MathMLParser.get_doctype_version(
'<!DOCTYPE math PUBLIC "-//W3C//DTD MathML 3.0//EN" '
'"http://www.w3.org/Math/DTD/mathml3/mathml3.dtd">'
),
)
self.assertEqual(
"1",
MathMLParser.get_doctype_version(
'<!DOCTYPE math SYSTEM "'
'"http://www.w3.org/Math/DTD/mathml1/mathml.dtd">'
),
)
self.assertEqual(
None, MathMLParser.get_doctype_version(""),
)
self.assertRaises(
Exception,
MathMLParser.get_doctype_version,
'<!DOCTYPE math PUBLIC "-//W3C//DTD MathML 3.0//EN" '
'"http://www.w3.org/Math/DTD/mathml3/mathml3.dtd">\n'
'<!DOCTYPE math PUBLIC "-//W3C//DTD MathML 3.0//EN" '
'"http://www.w3.org/Math/DTD/mathml3/mathml3.dtd">',
)

def test_mathmlparser_get_encoding(self):
self.assertRaises(
Exception,
MathMLParser.get_encoding,
"<?xml version='1.0' encoding='UTF-8'?>"
"<mi>+</mi><?xml version='1.0' encoding='UTF-8'?>",
)
self.assertRaises(
Exception,
MathMLParser.get_encoding,
"<mi>+</mi><?xml version='1.0' encoding='UTF-8'?>",
)
self.assertEqual(
"UTF-8",
MathMLParser.get_encoding(
"<?xml version='1.0' encoding='UTF-8'?><mi>+</mi>"
),
)
self.assertEqual(
None, MathMLParser.get_encoding(""),
)

def test_mathmlparser_set_doctype(self):
self.assertRaises(
Exception,
MathMLParser.set_doctype,
"<?xml version='1.0' encoding='UTF-8'?>"
+ '<!DOCTYPE math PUBLIC "-//W3C//DTD MathML 3.0//EN" '
+ '"http://www.w3.org/Math/DTD/mathml3/mathml3.dtd">'
+ '<!DOCTYPE math PUBLIC "-//W3C//DTD MathML 3.0//EN" '
+ '"http://www.w3.org/Math/DTD/mathml3/mathml3.dtd">',
False
)
s = (
"<?xml version='1.0' encoding='UTF-8'?>"
+ '<!DOCTYPE math PUBLIC "-//W3C//DTD MathML 3.0//EN" '
+ '"http://www.w3.org/Math/DTD/mathml3/mathml3.dtd">'
)
self.assertEqual(
"<?xml version='1.0' encoding='UTF-8'?>"
+ MathMLParser.get_doctype("mathml3", False),
MathMLParser.set_doctype(s, False),
)
s = "<?xml version='1.0' encoding='UTF-8'?>"
self.assertEqual(
"<?xml version='1.0' encoding='UTF-8'?>"
+ MathMLParser.get_doctype("mathml1", False),
MathMLParser.set_doctype(s, False, dtd="mathml1"),
)
s = (
'<!DOCTYPE math SYSTEM "'
+ PROJECT_ROOT
+ '/dtd/mathml2/mathml2.dtd">'
)
self.assertEqual(s, MathMLParser.set_doctype(s, True))
self.assertRaises(
Exception,
MathMLParser.set_doctype,
"<?xml version='1.0' encoding='UTF-8'?>"
"<?xml version='1.0' encoding='UTF-8'?>",
False
)
self.assertRaises(
Exception,
MathMLParser.set_doctype,
"<mi>+</mi><?xml version='1.0' encoding='UTF-8'?>",
False
)


if __name__ == "__main__":
unittest.main()

0 comments on commit a988b45

Please sign in to comment.