From 20652d297a26373383eaf3a20149682b8b47fc94 Mon Sep 17 00:00:00 2001 From: Maxim Vladimirsky Date: Wed, 20 Jul 2016 10:47:43 -0700 Subject: [PATCH] Make DecodingError message safe --- flanker/mime/message/charsets.py | 5 +---- flanker/utils.py | 23 +++++------------------ setup.py | 2 +- 3 files changed, 7 insertions(+), 23 deletions(-) diff --git a/flanker/mime/message/charsets.py b/flanker/mime/message/charsets.py index 9cd1333f..b51b14b2 100644 --- a/flanker/mime/message/charsets.py +++ b/flanker/mime/message/charsets.py @@ -1,6 +1,4 @@ -import regex as re -from flanker.mime.message import errors -from flanker.utils import to_utf8, to_unicode +from flanker.utils import to_unicode def convert_to_unicode(charset, value): @@ -9,7 +7,6 @@ def convert_to_unicode(charset, value): return value charset = _translate_charset(charset) - return to_unicode(value, charset=charset) diff --git a/flanker/utils.py b/flanker/utils.py index afe99928..05b62b5d 100644 --- a/flanker/utils.py +++ b/flanker/utils.py @@ -2,7 +2,6 @@ """ Utility functions and classes used by flanker. """ -import logging import re import cchardet @@ -12,9 +11,6 @@ from functools import wraps -log = logging.getLogger(__name__) - - def _guess_and_convert(value): """ Try to guess the encoding of the passed value and decode it. @@ -23,9 +19,8 @@ def _guess_and_convert(value): back to chardet which is much slower. """ try: - return _guess_and_convert_with(value) + return _guess_and_convert_with(value, detector=cchardet) except: - log.warn("Fallback to chardet") return _guess_and_convert_with(value, detector=chardet) @@ -39,7 +34,7 @@ def _guess_and_convert_with(value, detector=cchardet): charset = detector.detect(value) if not charset["encoding"]: - raise errors.DecodingError("Failed to guess encoding for %s" % (value,)) + raise errors.DecodingError("Failed to guess encoding") try: value = value.decode(charset["encoding"], "replace") @@ -53,16 +48,9 @@ def _make_unicode(value, charset=None): if isinstance(value, unicode): return value + charset = charset or "utf-8" try: - # if charset is provided, try decoding with it - if charset: - value = value.decode(charset, "strict") - - # if charset is not provided, assume UTF-8 - else: - value = value.decode("utf-8", "strict") - - # last resort: try to guess the encoding + value = value.decode(charset, "strict") except (UnicodeError, LookupError): value = _guess_and_convert(value) @@ -70,8 +58,7 @@ def _make_unicode(value, charset=None): def to_unicode(value, charset=None): - value = _make_unicode(value, charset) - return unicode(value.encode("utf-8", "strict"), "utf-8", "strict") + return _make_unicode(value, charset) def to_utf8(value, charset=None): diff --git a/setup.py b/setup.py index fd2c4e8b..8fca487b 100644 --- a/setup.py +++ b/setup.py @@ -4,7 +4,7 @@ setup(name='flanker', - version='0.4.40', + version='0.4.41', description='Mailgun Parsing Tools', long_description=open('README.rst').read(), classifiers=[],