From 4304002d2be96a36decd2b11de8ee91b805d1c12 Mon Sep 17 00:00:00 2001 From: John Stark Date: Tue, 30 Apr 2024 22:45:08 +0200 Subject: [PATCH] Add content_type property to File Makes all headers lower case, fixing case sensitivity issues. Exposes jheaders property in Files and Fields. --- multipart/multipart.py | 25 ++++--- .../test_data/http/almost_match_boundary.yaml | 1 + tests/test_data/http/base64_encoding.yaml | 1 + .../http/case_insensitive_headers.http | 21 ++++++ .../http/case_insensitive_headers.yaml | 26 +++++++ tests/test_data/http/header_with_number.yaml | 1 + tests/test_data/http/multiple_files.yaml | 2 + .../http/quoted_printable_encoding.yaml | 1 + .../http/single_field_single_file.yaml | 2 + tests/test_data/http/single_file.yaml | 1 + tests/test_data/http/utf8_filename.yaml | 1 + tests/test_multipart.py | 68 +++++++++++++++++-- 12 files changed, 138 insertions(+), 12 deletions(-) create mode 100644 tests/test_data/http/case_insensitive_headers.http create mode 100644 tests/test_data/http/case_insensitive_headers.yaml diff --git a/multipart/multipart.py b/multipart/multipart.py index 1dab14b..e3480ce 100644 --- a/multipart/multipart.py +++ b/multipart/multipart.py @@ -74,7 +74,8 @@ def set_none(self) -> None: ... class FileProtocol(_FormProtocol, Protocol): - def __init__(self, file_name: bytes | None, field_name: bytes | None, headers: dict[str,bytes], config: FileConfig) -> None: + def __init__(self, file_name: bytes | None, field_name: bytes | None, config: FileConfig, + headers: dict[str,bytes]) -> None: ... OnFieldCallback = Callable[[FieldProtocol], None] @@ -353,7 +354,8 @@ class File: config: The configuration for this File. See above for valid configuration keys and their corresponding values. """ # noqa: E501 - def __init__(self, file_name: bytes | None, field_name: bytes | None = None, headers: dict[str,bytes] = {}, config: FileConfig = {}) -> None: + def __init__(self, file_name: bytes | None, field_name: bytes | None = None, + headers: dict[str,bytes] = {}, config: FileConfig = {}) -> None: # Save configuration, set other variables default. self.logger = logging.getLogger(__name__) self._config = config @@ -423,6 +425,14 @@ def headers(self) -> dict[str,bytes]: """ return self._headers + @property + def content_type(self) -> bytes: + """The Content-Type value for this part. + """ + if self._headers is None: + return None + return self._headers.get("content-type", None) + def flush_to_disk(self) -> None: """If the file is already on-disk, do nothing. Otherwise, copy from the in-memory buffer to a disk file, and then reassign our internal @@ -1651,7 +1661,7 @@ def on_header_value(data: bytes, start: int, end: int) -> None: header_value.append(data[start:end]) def on_header_end() -> None: - headers[b"".join(header_name)] = b"".join(header_value) + headers[b"".join(header_name).decode().lower()] = b"".join(header_value) del header_name[:] del header_value[:] @@ -1661,8 +1671,7 @@ def on_headers_finished() -> None: is_file = False # Parse the content-disposition header. - # TODO: handle mixed case - content_disp = headers.get(b"Content-Disposition") + content_disp = headers.get("content-disposition") disp, options = parse_options_header(content_disp) # Get the field and filename. @@ -1672,15 +1681,15 @@ def on_headers_finished() -> None: # Create the proper class. if file_name is None: - f = FieldClass(field_name) + f = FieldClass(field_name, headers=headers) else: - f = FileClass(file_name, field_name, config=self.config) + f = FileClass(file_name, field_name, config=self.config, headers=headers) is_file = True # Parse the given Content-Transfer-Encoding to determine what # we need to do with the incoming data. # TODO: check that we properly handle 8bit / 7bit encoding. - transfer_encoding = headers.get(b"Content-Transfer-Encoding", b"7bit") + transfer_encoding = headers.get("content-transfer-encoding", b"7bit") if transfer_encoding in (b"binary", b"8bit", b"7bit"): writer = f diff --git a/tests/test_data/http/almost_match_boundary.yaml b/tests/test_data/http/almost_match_boundary.yaml index 235493e..c114ffe 100644 --- a/tests/test_data/http/almost_match_boundary.yaml +++ b/tests/test_data/http/almost_match_boundary.yaml @@ -3,6 +3,7 @@ expected: - name: file type: file file_name: test.txt + content_type: text/plain data: !!binary | LS1ib3VuZGFyaQ0KLS1ib3VuZGFyeXEtLWJvdW5kYXJ5DXEtLWJvdW5kYXJxDQotLWJvdW5hcnlkLS0NCi0tbm90Ym91bmQtLQ0KLS1taXNtYXRjaA0KLS1taXNtYXRjaC0tDQotLWJvdW5kYXJ5LVENCi0tYm91bmRhcnkNUS0tYm91bmRhcnlR diff --git a/tests/test_data/http/base64_encoding.yaml b/tests/test_data/http/base64_encoding.yaml index 1033150..db227a1 100644 --- a/tests/test_data/http/base64_encoding.yaml +++ b/tests/test_data/http/base64_encoding.yaml @@ -3,5 +3,6 @@ expected: - name: file type: file file_name: test.txt + content_type: text/plain data: !!binary | VGVzdCAxMjM= diff --git a/tests/test_data/http/case_insensitive_headers.http b/tests/test_data/http/case_insensitive_headers.http new file mode 100644 index 0000000..a14cc11 --- /dev/null +++ b/tests/test_data/http/case_insensitive_headers.http @@ -0,0 +1,21 @@ +------WebKitFormBoundarygbACTUR58IyeurVf +Content-Disposition: form-data; name="file1"; filename="test1.txt" +Content-Type: text/plain + +Test file #1 +------WebKitFormBoundarygbACTUR58IyeurVf +CONTENT-DISPOSITION: form-data; name="file2"; filename="test2.txt" +CONTENT-Type: text/plain + +Test file #2 +------WebKitFormBoundarygbACTUR58IyeurVf +content-disposition: form-data; name="file3"; filename="test3.txt" +content-type: text/plain + +Test file #3 +------WebKitFormBoundarygbACTUR58IyeurVf +cOnTenT-DiSpOsItiOn: form-data; name="file4"; filename="test4.txt" +Content-Type: text/plain + +Test file #4 +------WebKitFormBoundarygbACTUR58IyeurVf-- diff --git a/tests/test_data/http/case_insensitive_headers.yaml b/tests/test_data/http/case_insensitive_headers.yaml new file mode 100644 index 0000000..4c9d365 --- /dev/null +++ b/tests/test_data/http/case_insensitive_headers.yaml @@ -0,0 +1,26 @@ +boundary: ----WebKitFormBoundarygbACTUR58IyeurVf +expected: + - name: file1 + type: file + file_name: test1.txt + content_type: text/plain + data: !!binary | + VGVzdCBmaWxlICMx + - name: file2 + type: file + file_name: test2.txt + content_type: text/plain + data: !!binary | + VGVzdCBmaWxlICMy + - name: file3 + type: file + file_name: test3.txt + content_type: text/plain + data: !!binary | + VGVzdCBmaWxlICMz + - name: file4 + type: file + file_name: test4.txt + content_type: text/plain + data: !!binary | + VGVzdCBmaWxlICM0 diff --git a/tests/test_data/http/header_with_number.yaml b/tests/test_data/http/header_with_number.yaml index 493b783..86b4779 100644 --- a/tests/test_data/http/header_with_number.yaml +++ b/tests/test_data/http/header_with_number.yaml @@ -3,5 +3,6 @@ expected: - name: files type: file file_name: secret.txt + content_type: "text/plain; charset=utf-8" data: !!binary | YWFhYWFh diff --git a/tests/test_data/http/multiple_files.yaml b/tests/test_data/http/multiple_files.yaml index 3bf70e2..b372ab2 100644 --- a/tests/test_data/http/multiple_files.yaml +++ b/tests/test_data/http/multiple_files.yaml @@ -3,11 +3,13 @@ expected: - name: file1 type: file file_name: test1.txt + content_type: 'text/plain' data: !!binary | VGVzdCBmaWxlICMx - name: file2 type: file file_name: test2.txt + content_type: 'text/plain' data: !!binary | VGVzdCBmaWxlICMy diff --git a/tests/test_data/http/quoted_printable_encoding.yaml b/tests/test_data/http/quoted_printable_encoding.yaml index 2c6bbfb..6dcbde3 100644 --- a/tests/test_data/http/quoted_printable_encoding.yaml +++ b/tests/test_data/http/quoted_printable_encoding.yaml @@ -3,5 +3,6 @@ expected: - name: file type: file file_name: test.txt + content_type: 'text/plain' data: !!binary | Zm9vPWJhcg== diff --git a/tests/test_data/http/single_field_single_file.yaml b/tests/test_data/http/single_field_single_file.yaml index 47c8d6e..fa7002e 100644 --- a/tests/test_data/http/single_field_single_file.yaml +++ b/tests/test_data/http/single_field_single_file.yaml @@ -2,11 +2,13 @@ boundary: boundary expected: - name: field type: field + content_type: 'text/plain' data: !!binary | dGVzdDE= - name: file type: file file_name: file.txt + content_type: 'text/plain' data: !!binary | dGVzdDI= diff --git a/tests/test_data/http/single_file.yaml b/tests/test_data/http/single_file.yaml index 2a8e005..dbdff51 100644 --- a/tests/test_data/http/single_file.yaml +++ b/tests/test_data/http/single_file.yaml @@ -3,6 +3,7 @@ expected: - name: file type: file file_name: test.txt + content_type: 'text/plain' data: !!binary | VGhpcyBpcyBhIHRlc3QgZmlsZS4= diff --git a/tests/test_data/http/utf8_filename.yaml b/tests/test_data/http/utf8_filename.yaml index 507ba2c..25fab67 100644 --- a/tests/test_data/http/utf8_filename.yaml +++ b/tests/test_data/http/utf8_filename.yaml @@ -3,6 +3,7 @@ expected: - name: file type: file file_name: ???.txt + content_type: 'text/plain' data: !!binary | 44GT44KM44Gv44OG44K544OI44Gn44GZ44CC diff --git a/tests/test_multipart.py b/tests/test_multipart.py index 3a814fb..3977c62 100644 --- a/tests/test_multipart.py +++ b/tests/test_multipart.py @@ -732,7 +732,7 @@ def assert_file_data(self, f, data): file_data = o.read() self.assertEqual(file_data, data) - def assert_file(self, field_name, file_name, data): + def assert_file(self, field_name, file_name, content_type: str, data): # Find this file. found = None for f in self.files: @@ -743,6 +743,8 @@ def assert_file(self, field_name, file_name, data): # Assert that we found it. self.assertIsNotNone(found) + self.assertEqual(found.content_type, content_type.encode()) + try: # Assert about this file. self.assert_file_data(found, data) @@ -810,7 +812,7 @@ def test_http(self, param): self.assert_field(name, e["data"]) elif type == "file": - self.assert_file(name, e["file_name"].encode("latin-1"), e["data"]) + self.assert_file(name, e["file_name"].encode("latin-1"), e["content_type"], e["data"]) else: assert False @@ -841,7 +843,7 @@ def test_random_splitting(self): # Assert that our file and field are here. self.assert_field(b"field", b"test1") - self.assert_file(b"file", b"file.txt", b"test2") + self.assert_file(b"file", b"file.txt", "text/plain", b"test2") def test_feed_single_bytes(self): """ @@ -870,7 +872,7 @@ def test_feed_single_bytes(self): # Assert that our file and field are here. self.assert_field(b"field", b"test1") - self.assert_file(b"file", b"file.txt", b"test2") + self.assert_file(b"file", b"file.txt", "text/plain", b"test2") def test_feed_blocks(self): """ @@ -905,6 +907,64 @@ def test_feed_blocks(self): # Assert that our field is here. self.assert_field(b"field", b"0123456789ABCDEFGHIJ0123456789ABCDEFGHIJ") + def test_file_headers(self): + """ + This test checks headers for a file part are read. + """ + # Load test data. + test_file = "header_with_number.http" + with open(os.path.join(http_tests_dir, test_file), "rb") as f: + test_data = f.read() + + expected_headers = { + "content-disposition": b'form-data; filename="secret.txt"; name="files"', + "content-type": b"text/plain; charset=utf-8", + "x-funky-header-1": b"bar", + "abcdefghijklmnopqrstuvwxyz01234": b"foo", + "abcdefghijklmnopqrstuvwxyz56789": b"bar", + "other!#$%&'*+-.^_`|~": b"baz", + "content-length": b"6", + } + + # Create form parser. + self.make(boundary="b8825ae386be4fdc9644d87e392caad3") + self.f.write(test_data) + self.f.finalize() + + # Assert that our field is here. + self.assertEqual(1, len(self.files)) + actual_headers = self.files[0].headers + self.assertEqual(len(actual_headers), len(expected_headers)) + + for k,v in expected_headers.items(): + self.assertEqual(v, actual_headers[k]) + + def test_field_headers(self): + """ + This test checks headers for a field part are read. + """ + # Load test data. + test_file = "single_field.http" + with open(os.path.join(http_tests_dir, test_file), "rb") as f: + test_data = f.read() + + expected_headers = { + "content-disposition": b'form-data; name="field"', + } + + # Create form parser. + self.make(boundary="----WebKitFormBoundaryTkr3kCBQlBe1nrhc") + self.f.write(test_data) + self.f.finalize() + + # Assert that our field is here. + self.assertEqual(1, len(self.fields)) + actual_headers = self.fields[0].headers + self.assertEqual(len(actual_headers), len(expected_headers)) + + for k,v in expected_headers.items(): + self.assertEqual(v, actual_headers[k]) + def test_request_body_fuzz(self): """ This test randomly fuzzes the request body to ensure that no strange