Merge branch 'master' into same-param-encoded-unencoded

mailgun · Dec 12, 2016 · be39322 · be39322
2 parents 33873cf + 8421b0e
commit be39322
Show file tree

Hide file tree

Showing 116 changed files with 1,011 additions and 29 deletions.
diff --git a/docs/User Manual.md b/docs/User Manual.md
@@ -14,6 +14,9 @@
     * [Drawbacks](#drawbacks)
     * [Parsing MIME Messages](#parsing-mime-messages)
     * [Creating MIME Messages](#creating-mime-messages)
+* [DKIM](#dkim)
+    * [Signing](#signing)
+    * [Verification](#verification)
 
 ### Introduction
 
@@ -633,3 +636,66 @@ Content-Transfer-Encoding: 7bit
 
 --13f551bddf2e4759b125f70674288048--
 ```
+
+### DKIM
+
+DKIM (DomainKey Identified Mail) is an IETF standard that allows MTAs to attach
+signatures to emails they send which allows an MTA receiving the email to
+verify that the sender was allowed to send mail for that domain. flanker
+provides support for generating DKIM signatures, as well as DomainKey
+signatures (a previous standard for doing the same thing).
+
+#### Signing
+
+To sign a message, you need a few things:
+
+* `key`: An RSA private key. This should be an instance of
+  `cryptography.hazmat.primitives.interfaces.RSAPrivateKey`, you can consult
+  the [PyCA Cryptography documentation](https://cryptography.io/en/latest/hazma
+  t/primitives/asymmetric/rsa/) for more information on how to generate and
+  load keys.
+* `selector`: A string which identifies a particular public key that the
+  receiving MTA should used to verify senders. It corresponds to a specific DNS
+  record.
+* `domain`: A string which specifies the domain the message is from.
+* `header_canonicalization` and `body_canonicalization` (optional): These
+  specify which of the canonicalization rules from the RFC can be used. Valid
+  values are `flanker.dkim.SimpleCanonicalization` (the default) and
+  `flanker.dkim.RelaxedCanonicalization`.
+* `signed_headers` (optional): A list of strings which specify which headers
+  should be signed. If this argument is not supplied, all of the message's
+  headers will be supplied.
+
+Finally, you need a `message`, which is a string containing an RFC 822
+formatted email.
+
+
+```pycon
+>>> from flanker.dkim import DKIMSigner
+>>> example_message = """
+... From: Joe SixPack <joe@football.example.com>
+... To: Suzie Q <suzie@shopping.example.net>
+... Subject: Is dinner ready?
+... Date: Fri, 11 Jul 2003 21:00:37 -0700 (PDT)
+... Message-ID: <20030712040037.46341.5F8J@football.example.com>
+...
+... Hi.
+...
+... We lost the game.  Are you hungry yet?
+...
+... Joe.
+... """.strip()
+>>> signer = DKIMSigner(rsa_key, selector="mx", domain="mailgun.net")
+>>> signer.sign(example_message)
+"DKIM-Signature: ..."
+```
+
+`sign()` will return the complete header line which can be added to the email
+before sending.
+
+`flanker.dkim.DomainKeySigner` works similarly, except it does not allow
+specifying the canonicalization rules.
+
+#### Verification
+
+flanker does not currently support verifying DKIM signatures, but it will soon.
diff --git a/flanker/dkim.py b/flanker/dkim.py
@@ -0,0 +1,197 @@
+import base64
+import regex as re
+import time
+
+from cryptography.hazmat.backends import default_backend
+from cryptography.hazmat.primitives import hashes
+from cryptography.hazmat.primitives.asymmetric import padding
+
+
+_BODY_TRAILING_WSP = re.compile(r"[\t ]+\r\n")
+_BODY_WSP_RE = re.compile(r"[\t ]+")
+
+
+class SimpleCanonicalization(object):
+    name = "simple"
+
+    def canonicalize_header(self, header, value):
+        return header, value
+
+    def canonicalize_body(self, body):
+        return body.rstrip("\r\n") + "\r\n"
+
+
+class RelaxedCanonicalization(object):
+    name = "relaxed"
+
+    def canonicalize_header(self, header, value):
+        header = header.lower()
+        value = _BODY_WSP_RE.sub(" ", value.replace("\r\n", ""))
+        return header, value.strip() + b"\r\n"
+
+    def canonicalize_body(self, body):
+        body = _BODY_TRAILING_WSP.sub("\r\n", body)
+        body = _BODY_WSP_RE.sub(" ", body)
+        body = body.rstrip("\r\n")
+        return body + b"\r\n" if body else b""
+
+
+class NoFWSCanonicalization(object):
+    _header_fws_re = re.compile(r"[\t \r\n]+")
+    _body_orphan_cr_re = re.compile(b"\r([^\n])")
+
+    def canonicalize_header(self, header, value):
+        return header, self._header_fws_re.sub("", value) + "\r\n"
+
+    def canonicalize_body(self, body):
+        body = _BODY_WSP_RE.sub("", body)
+        body = self._body_orphan_cr_re.sub(r"\1", body)
+        body = body.rstrip()
+        return body + "\r\n" if body else ""
+
+
+def _fold(header):
+    """Fold a header line into multiple crlf-separated lines at column 72."""
+
+    i = header.rfind("\r\n ")
+    if i == -1:
+        pre = ""
+    else:
+        i += 3
+        pre = header[:i]
+        header = header[i:]
+    while len(header) > 72:
+        i = header[:72].rfind(" ")
+        if i == -1:
+            i = j = 72
+        else:
+            j = i + 1
+        pre += header[:i] + "\r\n "
+        header = header[j:]
+    return pre + header
+
+
+class DomainKeySigner(object):
+    def __init__(self, key, selector, domain, signed_headers=None):
+        self._key = key
+        self._selector = selector
+        self._domain = domain
+        self._signed_headers = None
+
+    def sign(self, message):
+        canonicalization = NoFWSCanonicalization()
+        signer = self._key.signer(padding.PKCS1v15(), hashes.SHA1())
+
+        headers, body = _rfc822_parse(message)
+
+        h_field = []
+        for header, value in headers:
+            if self._signed_headers is None or header in self._signed_headers:
+                h_field.append(header)
+
+                header, value = canonicalization.canonicalize_header(
+                    header, value)
+                signer.update(header)
+                signer.update(b":")
+                signer.update(value)
+        body = canonicalization.canonicalize_body(body)
+        if body:
+            signer.update(b"\r\n")
+            signer.update(body)
+
+        return _fold(
+            b"DomainKey-Signature: a=rsa-sha1; c=nofws; d={domain}; "
+            b"s={selector}; q=dns; h={headers}; b={signature}".format(
+                domain=self._domain,
+                selector=self._selector,
+                headers=b": ".join(h_field),
+                signature=base64.b64encode(signer.finalize())
+            )) + b"\r\n"
+
+
+class DKIMSigner(object):
+    def __init__(self, key, selector, domain,
+                 header_canonicalization=SimpleCanonicalization(),
+                 body_canonicalization=SimpleCanonicalization(),
+                 signed_headers=None):
+        self._key = key
+        self._selector = selector
+        self._domain = domain
+        self._header_canonicalization = header_canonicalization
+        self._body_canonicalization = body_canonicalization
+        self._signed_headers = signed_headers
+
+    def sign(self, message, current_time=None):
+        if current_time is None:
+            current_time = int(time.time())
+
+        signer = self._key.signer(padding.PKCS1v15(), hashes.SHA256())
+
+        headers, body = _rfc822_parse(message)
+        h_field = []
+        for header, value in headers:
+            if self._signed_headers is None or header in self._signed_headers:
+                h_field.append(header)
+
+                h, v = self._header_canonicalization.canonicalize_header(
+                    header, value)
+                signer.update(h)
+                signer.update(b":")
+                signer.update(v)
+
+        h = hashes.Hash(hashes.SHA256(), backend=default_backend())
+        h.update(self._body_canonicalization.canonicalize_body(body))
+        dkim_header_value = _fold(
+            b" a=rsa-sha256; v=1; "
+            b"c={header_canonicalization.name}/{body_canonicalization.name}; "
+            b"d={domain}; q=dns/txt; s={selector}; t={time}; h={headers}; "
+            b"bh={body_hash}; b=".format(
+                header_canonicalization=self._header_canonicalization,
+                body_canonicalization=self._body_canonicalization,
+                domain=self._domain,
+                selector=self._selector,
+                time=current_time,
+                headers=": ".join(h_field),
+                body_hash=base64.b64encode(h.finalize()),
+            )
+        )
+
+        h, v = self._header_canonicalization.canonicalize_header(
+            "DKIM-Signature", dkim_header_value)
+        signer.update(h)
+        signer.update(b":")
+        signer.update(v)
+        return b"DKIM-Signature:{dkim_header}{signature}\r\n".format(
+            dkim_header=v,
+            signature=_fold(base64.b64encode(signer.finalize()))
+        )
+
+_RFC822_NEWLINE_RE = re.compile(r"\r?\n")
+_RFC822_WS_RE = re.compile(r"[\t ]")
+_RFC822_HEADER_RE = re.compile(r"([\x21-\x7e]+?):")
+
+
+def _rfc822_parse(message):
+    headers = []
+    lines = _RFC822_NEWLINE_RE.split(message)
+    i = 0
+    while i < len(lines):
+        if len(lines[i]) == 0:
+            # End of headers, return what we have plus the body, excluding the
+            # blank line.
+            i += 1
+            break
+        if _RFC822_WS_RE.match(lines[i][0]):
+            headers[-1][1] += lines[i] + "\r\n"
+        else:
+            m = _RFC822_HEADER_RE.match(lines[i])
+            if m is not None:
+                headers.append([m.group(1), lines[i][m.end(0):] + "\r\n"])
+            elif lines[i].startswith("From "):
+                pass
+            else:
+                raise ValueError(
+                    "Unexpected characters in RFC822 header: %s" % lines[i]
+                )
+        i += 1
+    return (headers, "\r\n".join(lines[i:]))
diff --git a/flanker/mime/message/charsets.py b/flanker/mime/message/charsets.py
@@ -1,6 +1,4 @@
-import regex as re
-from flanker.mime.message import errors
-from flanker.utils import to_utf8, to_unicode
+from flanker.utils import to_unicode
 
 
 def convert_to_unicode(charset, value):
@@ -9,7 +7,6 @@ def convert_to_unicode(charset, value):
         return value
 
     charset = _translate_charset(charset)
-
     return to_unicode(value, charset=charset)
 
 

diff --git a/flanker/mime/message/errors.py b/flanker/mime/message/errors.py
@@ -4,9 +4,13 @@ class MimeError(Exception):
 
 class DecodingError(MimeError):
     """Thrown when there is an encoding error."""
-    pass
+
+    def __str__(self):
+        return self.message[:256]
 
 
 class EncodingError(MimeError):
     """Thrown when there is an decoding error."""
-    pass
+
+    def __str__(self):
+        return self.message[:256]
diff --git a/flanker/mime/message/headers/encodedword.py b/flanker/mime/message/headers/encodedword.py
@@ -74,9 +74,14 @@ def mime_to_unicode(header):
         return u"".join(decoded)
     except Exception:
         try:
+            logged_header = header
+            if isinstance(logged_header, unicode):
+                logged_header = logged_header.encode('utf-8')
+                # encode header as utf-8 so all characters can be base64 encoded
+            logged_header = b64encode(logged_header)
             log.warning(
                 u"HEADER-DECODE-FAIL: ({0}) - b64encoded".format(
-                    b64encode(header)))
+                    logged_header))
         except Exception:
             log.exception("Failed to log exception")
         return header

diff --git a/flanker/mime/message/part.py b/flanker/mime/message/part.py
@@ -261,7 +261,7 @@ def detected_file_name(self):
         file_name = ctype.params.get('name', '') or ctype.params.get('filename', '')
 
         value, params = self.content_disposition
-        if value == 'attachment':
+        if value in ['attachment', 'inline']:
             file_name = params.get('filename', '') or file_name
 
         # filenames can be presented as tuples, like:

diff --git a/flanker/utils.py b/flanker/utils.py
@@ -2,7 +2,6 @@
 """
 Utility functions and classes used by flanker.
 """
-import logging
 import re
 
 import cchardet
@@ -12,9 +11,6 @@
 from functools import wraps
 
 
-log = logging.getLogger(__name__)
-
-
 def _guess_and_convert(value):
     """
     Try to guess the encoding of the passed value and decode it.
@@ -23,9 +19,8 @@ def _guess_and_convert(value):
     back to chardet which is much slower.
     """
     try:
-        return _guess_and_convert_with(value)
+        return _guess_and_convert_with(value, detector=cchardet)
     except:
-        log.warn("Fallback to chardet")
         return _guess_and_convert_with(value, detector=chardet)
 
 
@@ -39,7 +34,7 @@ def _guess_and_convert_with(value, detector=cchardet):
     charset = detector.detect(value)
 
     if not charset["encoding"]:
-        raise errors.DecodingError("Failed to guess encoding for %s" % (value,))
+        raise errors.DecodingError("Failed to guess encoding")
 
     try:
         value = value.decode(charset["encoding"], "replace")
@@ -53,25 +48,17 @@ def _make_unicode(value, charset=None):
     if isinstance(value, unicode):
         return value
 
+    charset = charset or "utf-8"
     try:
-        # if charset is provided, try decoding with it
-        if charset:
-            value = value.decode(charset, "strict")
-
-        # if charset is not provided, assume UTF-8
-        else:
-            value = value.decode("utf-8", "strict")
-
-    # last resort: try to guess the encoding
+        value = value.decode(charset, "strict")
     except (UnicodeError, LookupError):
         value = _guess_and_convert(value)
 
     return value
 
 
 def to_unicode(value, charset=None):
-    value = _make_unicode(value, charset)
-    return unicode(value.encode("utf-8", "strict"), "utf-8", "strict")
+    return _make_unicode(value, charset)
 
 
 def to_utf8(value, charset=None):