Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add MIME content type info to File #143

Open
wants to merge 8 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
57 changes: 41 additions & 16 deletions python_multipart/multipart.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,12 +68,14 @@ def finalize(self) -> None: ...
def close(self) -> None: ...

class FieldProtocol(_FormProtocol, Protocol):
def __init__(self, name: bytes | None) -> None: ...
def __init__(self, name: bytes, content_type: str | None = None) -> None: ...
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it needed on Field?

Copy link
Contributor Author

@jhnstrk jhnstrk Dec 11, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, but not frequently used. Examples:

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Specifying a charset for a field

This is enough to convince me. Thanks.


def set_none(self) -> None: ...

class FileProtocol(_FormProtocol, Protocol):
def __init__(self, file_name: bytes | None, field_name: bytes | None, config: FileConfig) -> None: ...
def __init__(
self, file_name: bytes | None, field_name: bytes | None, config: FileConfig, content_type: str | None = None
) -> None: ...

OnFieldCallback = Callable[[FieldProtocol], None]
OnFileCallback = Callable[[FileProtocol], None]
Expand Down Expand Up @@ -221,11 +223,13 @@ class Field:

Args:
name: The name of the form field.
content_type: The value of the Content-Type header for this field.
"""

def __init__(self, name: bytes | None) -> None:
def __init__(self, name: bytes, content_type: str | None = None) -> None:
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why did the name type changed here? Now we have a mismatch between this and the File.field_name. 🤔

Can we revert this bit?

self._name = name
self._value: list[bytes] = []
self._content_type = content_type

# We cache the joined version of _value for speed.
self._cache = _missing
Expand Down Expand Up @@ -317,6 +321,11 @@ def value(self) -> bytes | None:
assert isinstance(self._cache, bytes) or self._cache is None
return self._cache

@property
def content_type(self) -> str | None:
"""This property returns the content_type value of the field."""
return self._content_type

def __eq__(self, other: object) -> bool:
if isinstance(other, Field):
return self.field_name == other.field_name and self.value == other.value
Expand Down Expand Up @@ -354,20 +363,28 @@ class File:
file_name: The name of the file that this [`File`][python_multipart.File] represents.
field_name: The name of the form field that this file was uploaded with. This can be None, if, for example,
the file was uploaded with Content-Type application/octet-stream.
content_type: The value of the Content-Type header.
config: The configuration for this File. See above for valid configuration keys and their corresponding values.
""" # noqa: E501

def __init__(self, file_name: bytes | None, field_name: bytes | None = None, config: FileConfig = {}) -> None:
def __init__(
self,
file_name: bytes | None,
field_name: bytes | None = None,
content_type: str | None = None,
config: FileConfig = {},
) -> None:
# Save configuration, set other variables default.
self.logger = logging.getLogger(__name__)
self._config = config
self._in_memory = True
self._bytes_written = 0
self._fileobj: BytesIO | BufferedRandom = BytesIO()

# Save the provided field/file name.
# Save the provided field/file name and content type.
self._field_name = field_name
self._file_name = file_name
self._content_type = content_type

# Our actual file name is None by default, since, depending on our
# config, we may not actually use the provided name.
Expand Down Expand Up @@ -420,6 +437,11 @@ def in_memory(self) -> bool:
"""
return self._in_memory

@property
def content_type(self) -> str | None:
"""The Content-Type value for this part, if it was set."""
return self._content_type

def flush_to_disk(self) -> None:
"""If the file is already on-disk, do nothing. Otherwise, copy from
the in-memory buffer to a disk file, and then reassign our internal
Expand Down Expand Up @@ -1540,7 +1562,7 @@ def __init__(

def on_start() -> None:
nonlocal file
file = FileClass(file_name, None, config=cast("FileConfig", self.config))
file = FileClass(file_name, None, content_type=None, config=cast("FileConfig", self.config))
jhnstrk marked this conversation as resolved.
Show resolved Hide resolved

def on_data(data: bytes, start: int, end: int) -> None:
nonlocal file
Expand Down Expand Up @@ -1579,7 +1601,7 @@ def on_field_name(data: bytes, start: int, end: int) -> None:
def on_field_data(data: bytes, start: int, end: int) -> None:
nonlocal f
if f is None:
f = FieldClass(b"".join(name_buffer))
f = FieldClass(b"".join(name_buffer), content_type=None)
jhnstrk marked this conversation as resolved.
Show resolved Hide resolved
del name_buffer[:]
f.write(data[start:end])

Expand All @@ -1589,7 +1611,7 @@ def on_field_end() -> None:
if f is None:
# If we get here, it's because there was no field data.
# We create a field, set it to None, and then continue.
f = FieldClass(b"".join(name_buffer))
f = FieldClass(b"".join(name_buffer), content_type=None)
jhnstrk marked this conversation as resolved.
Show resolved Hide resolved
del name_buffer[:]
f.set_none()

Expand Down Expand Up @@ -1621,7 +1643,7 @@ def _on_end() -> None:

header_name: list[bytes] = []
header_value: list[bytes] = []
headers: dict[bytes, bytes] = {}
headers: dict[str, bytes] = {}

f_multi: FileProtocol | FieldProtocol | None = None
writer = None
Expand Down Expand Up @@ -1656,7 +1678,7 @@ def on_header_value(data: bytes, start: int, end: int) -> None:
header_value.append(data[start:end])

def on_header_end() -> None:
headers[b"".join(header_name)] = b"".join(header_value)
headers[b"".join(header_name).decode().lower()] = b"".join(header_value)
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Do we need to decode?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the headers dict is easier to use if the keys are strings rather than bytes, so I used decode to do the transformation. The string is guaranteed to be valid since it can only contain a subset of ASCII characters (it's filtered as it's built). Can be bytes if you prefer, in which case decode isn't needed.

Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Does it need to be in this PR?

del header_name[:]
del header_value[:]

Expand All @@ -1666,26 +1688,29 @@ def on_headers_finished() -> None:
is_file = False

# Parse the content-disposition header.
# TODO: handle mixed case
content_disp = headers.get(b"Content-Disposition")
content_disp = headers.get("content-disposition")
disp, options = parse_options_header(content_disp)

# Get the field and filename.
field_name = options.get(b"name")
field_name = options.get(b"name", b"")
Copy link
Owner

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this needed? Isn't the field_name supposed to always be present?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added this for typing. Without it, we'll need to raise or assert it's not None. Which makes sense actually.
Will probably need a test to cover it. too.

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Removed the default value and added a check for None.

file_name = options.get(b"filename")
# TODO: check for errors

# Create the proper class.
content_type_b = headers.get("content-type")
content_type = content_type_b.decode("latin-1") if content_type_b is not None else None
if file_name is None:
f_multi = FieldClass(field_name)
f_multi = FieldClass(field_name, content_type=content_type)
else:
f_multi = FileClass(file_name, field_name, config=cast("FileConfig", self.config))
f_multi = FileClass(
file_name, field_name, config=cast("FileConfig", self.config), content_type=content_type
)
is_file = True

# Parse the given Content-Transfer-Encoding to determine what
# we need to do with the incoming data.
# TODO: check that we properly handle 8bit / 7bit encoding.
transfer_encoding = headers.get(b"Content-Transfer-Encoding", b"7bit")
transfer_encoding = headers.get("content-transfer-encoding", b"7bit")

if transfer_encoding in (b"binary", b"8bit", b"7bit"):
writer = f_multi
Expand Down
1 change: 1 addition & 0 deletions tests/test_data/http/almost_match_boundary.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ expected:
- name: file
type: file
file_name: test.txt
content_type: text/plain
data: !!binary |
LS1ib3VuZGFyaQ0KLS1ib3VuZGFyeXEtLWJvdW5kYXJ5DXEtLWJvdW5kYXJxDQotLWJvdW5hcnlkLS0NCi0tbm90Ym91bmQtLQ0KLS1taXNtYXRjaA0KLS1taXNtYXRjaC0tDQotLWJvdW5kYXJ5LVENCi0tYm91bmRhcnkNUS0tYm91bmRhcnlR

1 change: 1 addition & 0 deletions tests/test_data/http/base64_encoding.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@ expected:
- name: file
type: file
file_name: test.txt
content_type: text/plain
data: !!binary |
VGVzdCAxMjM=
21 changes: 21 additions & 0 deletions tests/test_data/http/case_insensitive_headers.http
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
------WebKitFormBoundarygbACTUR58IyeurVf
Content-Disposition: form-data; name="file1"; filename="test1.txt"
Content-Type: text/plain

Test file #1
------WebKitFormBoundarygbACTUR58IyeurVf
CONTENT-DISPOSITION: form-data; name="file2"; filename="test2.txt"
CONTENT-Type: text/plain

Test file #2
------WebKitFormBoundarygbACTUR58IyeurVf
content-disposition: form-data; name="file3"; filename="test3.txt"
content-type: text/plain

Test file #3
------WebKitFormBoundarygbACTUR58IyeurVf
cOnTenT-DiSpOsItiOn: form-data; name="file4"; filename="test4.txt"
Content-Type: text/plain

Test file #4
------WebKitFormBoundarygbACTUR58IyeurVf--
26 changes: 26 additions & 0 deletions tests/test_data/http/case_insensitive_headers.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
boundary: ----WebKitFormBoundarygbACTUR58IyeurVf
expected:
- name: file1
type: file
file_name: test1.txt
content_type: text/plain
data: !!binary |
VGVzdCBmaWxlICMx
- name: file2
type: file
file_name: test2.txt
content_type: text/plain
data: !!binary |
VGVzdCBmaWxlICMy
- name: file3
type: file
file_name: test3.txt
content_type: text/plain
data: !!binary |
VGVzdCBmaWxlICMz
- name: file4
type: file
file_name: test4.txt
content_type: text/plain
data: !!binary |
VGVzdCBmaWxlICM0
1 change: 1 addition & 0 deletions tests/test_data/http/header_with_number.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@ expected:
- name: files
type: file
file_name: secret.txt
content_type: "text/plain; charset=utf-8"
data: !!binary |
YWFhYWFh
2 changes: 2 additions & 0 deletions tests/test_data/http/multiple_files.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,11 +3,13 @@ expected:
- name: file1
type: file
file_name: test1.txt
content_type: 'text/plain'
data: !!binary |
VGVzdCBmaWxlICMx
- name: file2
type: file
file_name: test2.txt
content_type: 'text/plain'
data: !!binary |
VGVzdCBmaWxlICMy

1 change: 1 addition & 0 deletions tests/test_data/http/quoted_printable_encoding.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,5 +3,6 @@ expected:
- name: file
type: file
file_name: test.txt
content_type: 'text/plain'
data: !!binary |
Zm9vPWJhcg==
2 changes: 2 additions & 0 deletions tests/test_data/http/single_field_single_file.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,11 +2,13 @@ boundary: boundary
expected:
- name: field
type: field
content_type: 'text/plain'
data: !!binary |
dGVzdDE=
- name: file
type: file
file_name: file.txt
content_type: 'text/plain'
data: !!binary |
dGVzdDI=

Expand Down
7 changes: 7 additions & 0 deletions tests/test_data/http/single_field_with_trailer.http
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
------WebKitFormBoundaryTkr3kCBQlBe1nrhc
Content-Disposition: form-data; name="field"

This is a test.
------WebKitFormBoundaryTkr3kCBQlBe1nrhc--
this trailer causes a warning
but should be ignored
6 changes: 6 additions & 0 deletions tests/test_data/http/single_field_with_trailer.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
boundary: ----WebKitFormBoundaryTkr3kCBQlBe1nrhc
expected:
- name: field
type: field
data: !!binary |
VGhpcyBpcyBhIHRlc3Qu
1 change: 1 addition & 0 deletions tests/test_data/http/single_file.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ expected:
- name: file
type: file
file_name: test.txt
content_type: 'text/plain'
data: !!binary |
VGhpcyBpcyBhIHRlc3QgZmlsZS4=

1 change: 1 addition & 0 deletions tests/test_data/http/utf8_filename.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ expected:
- name: file
type: file
file_name: ???.txt
content_type: 'text/plain'
data: !!binary |
44GT44KM44Gv44OG44K544OI44Gn44GZ44CC

Loading
Loading