Skip to content

Commit

Permalink
♻️ Refactor header option parser to use the standard library tools in…
Browse files Browse the repository at this point in the history
…stead of a RegEx
  • Loading branch information
tiangolo committed Feb 3, 2024
1 parent d3d16da commit 0e4e4a1
Showing 1 changed file with 26 additions and 24 deletions.
50 changes: 26 additions & 24 deletions multipart/multipart.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,8 @@
import tempfile
from io import BytesIO
from numbers import Number
from email.message import Message
from typing import Dict, Union, Tuple

# Unique missing object.
_missing = object()
Expand Down Expand Up @@ -76,44 +78,44 @@
QUOTE = b'"'[0]


def parse_options_header(value):
def parse_options_header(value: Union[str, bytes]) -> Tuple[bytes, Dict[bytes, bytes]]:
"""
Parses a Content-Type header into a value in the following format:
(content_type, {parameters})
"""
# Uses email.message.Message to parse the header as described in PEP 594.
# Ref: https://peps.python.org/pep-0594/#cgi
if not value:
return (b'', {})

# If we are passed a string, we assume that it conforms to WSGI and does
# not contain any code point that's not in latin-1.
if isinstance(value, str): # pragma: no cover
value = value.encode('latin-1')
# If we are passed bytes, we assume that it conforms to WSGI, encoding in latin-1.
if isinstance(value, bytes): # pragma: no cover
value = value.decode('latin-1')

# For types
assert isinstance(value, str), 'Value should be a string by now'

# If we have no options, return the string as-is.
if b';' not in value:
return (value.lower().strip(), {})
if ";" not in value:
return (value.lower().strip().encode('latin-1'), {})

# Split at the first semicolon, to get our value and then options.
ctype, rest = value.split(b';', 1)
# ctype, rest = value.split(b';', 1)
message = Message()
message['content-type'] = value
params = message.get_params()
# If there were no parameters, this would have already returned above
assert params, 'At least the content type value should be present'
ctype = params.pop(0)[0].encode('latin-1')
options = {}

# Parse the options.
for match in OPTION_RE.finditer(rest):
key = match.group(1).lower()
value = match.group(2)
if value[0] == QUOTE and value[-1] == QUOTE:
# Unquote the value.
value = value[1:-1]
value = value.replace(b'\\\\', b'\\').replace(b'\\"', b'"')

for param in params:
key, value = param
# If the value is a filename, we need to fix a bug on IE6 that sends
# the full file path instead of the filename.
if key == b'filename':
if value[1:3] == b':\\' or value[:2] == b'\\\\':
value = value.split(b'\\')[-1]

options[key] = value

if key == 'filename':
if value[1:3] == ':\\' or value[:2] == '\\\\':
value = value.split('\\')[-1]
options[key.encode('latin-1')] = value.encode('latin-1')
return ctype, options


Expand Down

0 comments on commit 0e4e4a1

Please sign in to comment.