Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve the support for SourgeForge download URLs #26 #29

Merged
merged 4 commits into from
Jan 9, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ Release notes
- Improve the stability of the "Check for new Package versions" feature.
https://github.com/nexB/dejacode/issues/17

- Improve the support for SourgeForge download URLs.
https://github.com/nexB/dejacode/issues/26

### Version 5.0.0

Initial release.
5 changes: 4 additions & 1 deletion component_catalog/tests/test_admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -2465,7 +2465,10 @@ def test_package_changeform_save_and_collect_data_on_addition(self, mock_get):
}

mock_get.return_value = mock.Mock(
content=b"\x00", headers={"content-length": 1}, status_code=200
content=b"\x00",
headers={"content-length": 1},
status_code=200,
url="http://domain.com/a.zip",
)

response = self.client.post(add_url, data, follow=True)
Expand Down
21 changes: 12 additions & 9 deletions component_catalog/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2213,7 +2213,7 @@ def test_collect_package_data(self, mock_get):
with self.assertRaisesMessage(DataCollectionException, expected_message):
collect_package_data("ftp://ftp.denx.de/pub/u-boot/u-boot-2017.11.tar.bz2")

package_url = "http://domain.com/a.zip;<params>?<query>#<fragment>"
download_url = "http://domain.com/a%20b.zip;<params>?<query>#<fragment>"

default_max_length = download.CONTENT_MAX_LENGTH
download.CONTENT_MAX_LENGTH = 0
Expand All @@ -2223,15 +2223,18 @@ def test_collect_package_data(self, mock_get):
content=b"\x00", headers={"content-length": 300000000}, status_code=200
)
with self.assertRaisesMessage(DataCollectionException, expected_message):
collect_package_data(package_url)
collect_package_data(download_url)

download.CONTENT_MAX_LENGTH = default_max_length
mock_get.return_value = mock.Mock(
content=b"\x00", headers={"content-length": 1}, status_code=200
content=b"\x00",
headers={"content-length": 1},
status_code=200,
url=download_url,
)
expected_data = {
"download_url": "http://domain.com/a.zip;<params>?<query>#<fragment>",
"filename": "a.zip",
"download_url": download_url,
"filename": "a b.zip",
"size": 1,
"sha1": "5ba93c9db0cff93f52b521d7420e43f6eda2784f",
"md5": "93b885adfe0da089cdf634904fd59f71",
Expand All @@ -2241,7 +2244,7 @@ def test_collect_package_data(self, mock_get):
"4a802a71c3580b6370de4ceb293c324a8423342557d4e5c38438f0e36910ee"
),
}
self.assertEqual(expected_data, collect_package_data(package_url))
self.assertEqual(expected_data, collect_package_data(download_url))

expected_message = (
"Exception Value: HTTPConnectionPool"
Expand All @@ -2253,15 +2256,15 @@ def test_collect_package_data(self, mock_get):
)
mock_get.return_value = response
with self.assertRaisesMessage(DataCollectionException, expected_message):
collect_package_data(package_url)
collect_package_data(download_url)

headers = {
"content-length": 1,
"content-disposition": 'attachment; filename="another_name.zip"',
}
mock_get.return_value = mock.Mock(content=b"\x00", headers=headers, status_code=200)
expected_data = {
"download_url": "http://domain.com/a.zip;<params>?<query>#<fragment>",
"download_url": download_url,
"filename": "another_name.zip",
"size": 1,
"sha1": "5ba93c9db0cff93f52b521d7420e43f6eda2784f",
Expand All @@ -2272,7 +2275,7 @@ def test_collect_package_data(self, mock_get):
"4a802a71c3580b6370de4ceb293c324a8423342557d4e5c38438f0e36910ee"
),
}
self.assertEqual(expected_data, collect_package_data(package_url))
self.assertEqual(expected_data, collect_package_data(download_url))

def test_package_create_save_set_usage_policy_from_license(self):
from policy.models import AssociatedPolicy
Expand Down
14 changes: 10 additions & 4 deletions dejacode_toolkit/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
#

import cgi
import os
import socket
from pathlib import Path
from urllib.parse import unquote
from urllib.parse import urlparse

from django.template.defaultfilters import filesizeformat
Expand All @@ -29,7 +30,7 @@ class DataCollectionException(Exception):

def collect_package_data(url):
try:
response = requests.get(url, timeout=10, stream=True)
response = requests.get(url, timeout=5, stream=True)
except (requests.RequestException, socket.timeout) as e:
raise DataCollectionException(e)

Expand All @@ -54,8 +55,13 @@ def collect_package_data(url):
)

content_disposition = response.headers.get("content-disposition", "")
value, params = cgi.parse_header(content_disposition)
filename = params.get("filename") or os.path.basename(urlparse(url).path)
_, params = cgi.parse_header(content_disposition)

filename = params.get("filename")
if not filename:
# Using ``response.url`` in place of provided ``url`` arg since the former
# will be more accurate in case of HTTP redirect.
filename = unquote(Path(urlparse(response.url).path).name)

package_data = {
"download_url": url,
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ install_requires =
openpyxl==3.1.2
et-xmlfile==1.1.0
# PackageURL
packageurl-python==0.12.0
packageurl-python==0.13.4
# Gunicorn
gunicorn==21.2.0
# SPDX validation
Expand Down
Binary file not shown.
14 changes: 0 additions & 14 deletions thirdparty/dist/packageurl_python-0.12.0-py3-none-any.whl.ABOUT

This file was deleted.

Binary file not shown.
12 changes: 12 additions & 0 deletions thirdparty/dist/packageurl_python-0.13.4-py3-none-any.whl.ABOUT
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
about_resource: packageurl_python-0.13.4-py3-none-any.whl
name: packageurl-python
version: 0.13.4
download_url: https://files.pythonhosted.org/packages/6f/d6/dc41590e65a95198ad7490ed0fb34a1148e8eb5032c35c8d157b55aa496d/packageurl_python-0.13.4-py3-none-any.whl
package_url: pkg:pypi/[email protected]
license_expression: mit
copyright: Copyright packageurl-python project contributors
attribute: yes
licenses:
- key: mit
name: MIT License
file: mit.LICENSE