From 43496ec3cd21113f5332bb2664d9f9d37c40270b Mon Sep 17 00:00:00 2001 From: Simon Cross Date: Sun, 25 Aug 2024 13:06:35 +0200 Subject: [PATCH] Fix HTMLParser error handling. (#87) * Fix HTMLParser error handling which referenced Python's html.HTMLParseError which was never raised and removed in Python 3.5. * Don't use assertRaisesRegex on Python 2. * Fix typo in HTMLParser error handling comment. --- genshi/input.py | 10 +++++++--- genshi/tests/test_input.py | 16 +++++++++++++++- 2 files changed, 22 insertions(+), 4 deletions(-) diff --git a/genshi/input.py b/genshi/input.py index fa18c38..c21990a 100644 --- a/genshi/input.py +++ b/genshi/input.py @@ -346,9 +346,13 @@ def _generate(): for tag in open_tags: yield END, QName(tag), pos break - except html.HTMLParseError as e: - msg = '%s: line %d, column %d' % (e.msg, e.lineno, e.offset) - raise ParseError(msg, self.filename, e.lineno, e.offset) + except Exception as e: + # Python's simple HTMLParser does not raise detailed + # errors except in strict mode which was deprecated + # in Python 3.3 and removed in Python 3.5 and which in + # any case is not used is this code. + msg = str(e) + raise ParseError(msg, self.filename) return Stream(_generate()).filter(_coalesce) def __iter__(self): diff --git a/genshi/tests/test_input.py b/genshi/tests/test_input.py index 44b7442..e68515d 100644 --- a/genshi/tests/test_input.py +++ b/genshi/tests/test_input.py @@ -15,7 +15,7 @@ from genshi.core import Attrs, QName, Stream from genshi.input import XMLParser, HTMLParser, ParseError, ET -from genshi.compat import StringIO, BytesIO +from genshi.compat import IS_PYTHON2, StringIO, BytesIO from genshi.tests.utils import doctest_suite from xml.etree import ElementTree @@ -294,6 +294,20 @@ def test_convert_ElementTree_to_markup_stream(self): self.assertEqual((Stream.END, QName("span")), events[4][:2]) self.assertEqual((Stream.END, QName("div")), events[5][:2]) + def test_parsing_error(self): + text = u'
'.encode('utf-8') + events = HTMLParser(BytesIO(text)) + if IS_PYTHON2: + self.assertRaises(ParseError, list, events) + else: + self.assertRaisesRegex( + ParseError, + r"source returned bytes, but no encoding specified", + list, + events, + ) + + def suite(): suite = unittest.TestSuite() suite.addTest(doctest_suite(XMLParser.__module__))