From 86146db609b82f88b70b74c1a01cedbb7d14564a Mon Sep 17 00:00:00 2001 From: tdruez Date: Mon, 8 Jan 2024 13:50:36 -0700 Subject: [PATCH] Unquote the response.url to obtain a better filename #26 Signed-off-by: tdruez --- component_catalog/tests/test_models.py | 8 ++++---- dejacode_toolkit/download.py | 9 ++++++--- 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/component_catalog/tests/test_models.py b/component_catalog/tests/test_models.py index 46417c89..801af492 100644 --- a/component_catalog/tests/test_models.py +++ b/component_catalog/tests/test_models.py @@ -2213,7 +2213,7 @@ def test_collect_package_data(self, mock_get): with self.assertRaisesMessage(DataCollectionException, expected_message): collect_package_data("ftp://ftp.denx.de/pub/u-boot/u-boot-2017.11.tar.bz2") - download_url = "http://domain.com/a.zip;?#" + download_url = "http://domain.com/a%20b.zip;?#" default_max_length = download.CONTENT_MAX_LENGTH download.CONTENT_MAX_LENGTH = 0 @@ -2233,8 +2233,8 @@ def test_collect_package_data(self, mock_get): url=download_url, ) expected_data = { - "download_url": "http://domain.com/a.zip;?#", - "filename": "a.zip", + "download_url": download_url, + "filename": "a b.zip", "size": 1, "sha1": "5ba93c9db0cff93f52b521d7420e43f6eda2784f", "md5": "93b885adfe0da089cdf634904fd59f71", @@ -2264,7 +2264,7 @@ def test_collect_package_data(self, mock_get): } mock_get.return_value = mock.Mock(content=b"\x00", headers=headers, status_code=200) expected_data = { - "download_url": "http://domain.com/a.zip;?#", + "download_url": download_url, "filename": "another_name.zip", "size": 1, "sha1": "5ba93c9db0cff93f52b521d7420e43f6eda2784f", diff --git a/dejacode_toolkit/download.py b/dejacode_toolkit/download.py index 85d80b80..35331db7 100644 --- a/dejacode_toolkit/download.py +++ b/dejacode_toolkit/download.py @@ -9,6 +9,7 @@ import cgi import socket from pathlib import Path +from urllib.parse import unquote from urllib.parse import urlparse from django.template.defaultfilters import filesizeformat @@ -56,9 +57,11 @@ def collect_package_data(url): content_disposition = response.headers.get("content-disposition", "") _, params = cgi.parse_header(content_disposition) - # Using ``response.url`` in place of provided ``url`` arg since the former - # will be more accurate in case of HTTP redirect. - filename = params.get("filename") or Path(urlparse(response.url).path).name + filename = params.get("filename") + if not filename: + # Using ``response.url`` in place of provided ``url`` arg since the former + # will be more accurate in case of HTTP redirect. + filename = unquote(Path(urlparse(response.url).path).name) package_data = { "download_url": url,