Skip to content

Commit

Permalink
fix restore_payload for multipart requests (#13)
Browse files Browse the repository at this point in the history
  • Loading branch information
bentsku authored Jun 13, 2024
1 parent b1c24d7 commit ac4647e
Show file tree
Hide file tree
Showing 2 changed files with 135 additions and 7 deletions.
10 changes: 4 additions & 6 deletions rolo/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -295,12 +295,7 @@ def restore_payload(request: Request) -> bytes:
if request.method != "POST":
return data

# TODO: check how request that encode both files and form are really serialized (not sure this works this way)

if request.form:
data += urlencode(list(request.form.items(multi=True))).encode("utf-8")

if request.files:
if request.mimetype == "multipart/form-data":
boundary = request.content_type.split("=")[1]

fields = MultiDict()
Expand All @@ -310,4 +305,7 @@ def restore_payload(request: Request) -> bytes:
_, data_files = encode_multipart(fields, boundary)
data += data_files

elif request.mimetype == "application/x-www-form-urlencoded":
data += urlencode(list(request.form.items(multi=True))).encode("utf-8")

return data
132 changes: 131 additions & 1 deletion tests/test_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
import pytest
from werkzeug.exceptions import BadRequest

from rolo.request import Request, dummy_wsgi_environment, get_raw_path
from rolo.request import Request, dummy_wsgi_environment, get_raw_path, restore_payload


def test_get_json():
Expand Down Expand Up @@ -206,3 +206,133 @@ def test_utf8_path():

assert r.path == "/foo/Ā0Ä"
assert r.environ["PATH_INFO"] == "/foo/Ä\x80\x84" # quoted and latin-1 encoded


def test_restore_payload_multipart_parsing():
body = (
b"\r\n"
b"--4efd159eae0c4f4e125a5a509e073d85"
b"\r\n"
b'Content-Disposition: form-data; name="formfield"'
b"\r\n\r\n"
b"not a file, just a field"
b"\r\n"
b"--4efd159eae0c4f4e125a5a509e073d85"
b"\r\n"
b'Content-Disposition: form-data; name="foo"; filename="foo"'
b"\r\n"
b"Content-Type: text/plain;"
b"\r\n\r\n"
b"bar"
b"\r\n"
b"--4efd159eae0c4f4e125a5a509e073d85"
b"\r\n"
b'Content-Disposition: form-data; name="baz"; filename="baz"'
b"\r\n"
b"Content-Type: text/plain;"
b"\r\n\r\n"
b"ed"
b"\r\n"
b"\r\n--4efd159eae0c4f4e125a5a509e073d85--"
b"\r\n"
)

request = Request(
"POST",
path="/",
body=body,
headers={"Content-Type": "multipart/form-data; boundary=4efd159eae0c4f4e125a5a509e073d85"},
)

form = {}
for k, field in request.form.items():
form[k] = field

assert form == {"formfield": "not a file, just a field"}

files = []
for k, file_storage in request.files.items():
assert file_storage.stream
# we do not want to consume the file storage stream, because we can't restore the payload then
files.append(k)

assert files == ["foo", "baz"]
restored_data = restore_payload(request)

assert restored_data == body


def test_request_mixed_multipart():
# this is almost how we previously restored a form that would have both `form` fields and `files`
# we would URL encode the form first then add multipart, which does not work, the first part should be ignored
# and make certain strict multipart parser fail (Starlette), because it finds data before the first boundary

# this test does something a bit different to prove it is ignored (add an URL encoded part in the beginning)
body = (
b"formfield=not+a+file%2C+just+a+field\r\n"
b"--4efd159eae0c4f4e125a5a509e073d85"
b"\r\n"
b'Content-Disposition: form-data; name="foo"; filename="foo"'
b"\r\n"
b"Content-Type: text/plain;"
b"\r\n\r\n"
b"bar"
b"\r\n"
b"--4efd159eae0c4f4e125a5a509e073d85"
b"\r\n"
b'Content-Disposition: form-data; name="baz"; filename="baz"'
b"\r\n"
b"Content-Type: text/plain;"
b"\r\n\r\n"
b"ed"
b"\r\n"
b"\r\n--4efd159eae0c4f4e125a5a509e073d85--"
b"\r\n"
)

request = Request(
"POST",
path="/",
body=body,
headers={"Content-Type": "multipart/form-data; boundary=4efd159eae0c4f4e125a5a509e073d85"},
)

form = {}
for k, field in request.form.items():
form[k] = field

assert form == {}

files = []
for k, file_storage in request.files.items():
assert file_storage.stream
# we do not want to consume the file storage stream, because we can't restore the payload then
files.append(k)

assert files == ["foo", "baz"]

restored_data = restore_payload(request)
assert b"formfield" not in restored_data


def test_restore_payload_form_urlencoded():
body = b"formfield=not+a+file%2C+just+a+field"

request = Request(
"POST",
path="/",
body=body,
headers={"Content-Type": "application/x-www-form-urlencoded"},
)

form = {}
for k, field in request.form.items():
form[k] = field

assert form == {"formfield": "not a file, just a field"}

assert not request.files

restored_data = restore_payload(request)

assert restored_data == body

0 comments on commit ac4647e

Please sign in to comment.