Skip to content

Commit

Permalink
Merge pull request #118 from mailgun/maxim/develop
Browse files Browse the repository at this point in the history
Make DecodingError message safe
  • Loading branch information
horkhe authored Jul 20, 2016
2 parents 50ea4f7 + 20652d2 commit 8421b0e
Show file tree
Hide file tree
Showing 3 changed files with 7 additions and 23 deletions.
5 changes: 1 addition & 4 deletions flanker/mime/message/charsets.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,4 @@
import regex as re
from flanker.mime.message import errors
from flanker.utils import to_utf8, to_unicode
from flanker.utils import to_unicode


def convert_to_unicode(charset, value):
Expand All @@ -9,7 +7,6 @@ def convert_to_unicode(charset, value):
return value

charset = _translate_charset(charset)

return to_unicode(value, charset=charset)


Expand Down
23 changes: 5 additions & 18 deletions flanker/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
"""
Utility functions and classes used by flanker.
"""
import logging
import re

import cchardet
Expand All @@ -12,9 +11,6 @@
from functools import wraps


log = logging.getLogger(__name__)


def _guess_and_convert(value):
"""
Try to guess the encoding of the passed value and decode it.
Expand All @@ -23,9 +19,8 @@ def _guess_and_convert(value):
back to chardet which is much slower.
"""
try:
return _guess_and_convert_with(value)
return _guess_and_convert_with(value, detector=cchardet)
except:
log.warn("Fallback to chardet")
return _guess_and_convert_with(value, detector=chardet)


Expand All @@ -39,7 +34,7 @@ def _guess_and_convert_with(value, detector=cchardet):
charset = detector.detect(value)

if not charset["encoding"]:
raise errors.DecodingError("Failed to guess encoding for %s" % (value,))
raise errors.DecodingError("Failed to guess encoding")

try:
value = value.decode(charset["encoding"], "replace")
Expand All @@ -53,25 +48,17 @@ def _make_unicode(value, charset=None):
if isinstance(value, unicode):
return value

charset = charset or "utf-8"
try:
# if charset is provided, try decoding with it
if charset:
value = value.decode(charset, "strict")

# if charset is not provided, assume UTF-8
else:
value = value.decode("utf-8", "strict")

# last resort: try to guess the encoding
value = value.decode(charset, "strict")
except (UnicodeError, LookupError):
value = _guess_and_convert(value)

return value


def to_unicode(value, charset=None):
value = _make_unicode(value, charset)
return unicode(value.encode("utf-8", "strict"), "utf-8", "strict")
return _make_unicode(value, charset)


def to_utf8(value, charset=None):
Expand Down
2 changes: 1 addition & 1 deletion setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@


setup(name='flanker',
version='0.4.40',
version='0.4.41',
description='Mailgun Parsing Tools',
long_description=open('README.rst').read(),
classifiers=[],
Expand Down

0 comments on commit 8421b0e

Please sign in to comment.