Skip to content

Commit

Permalink
Merge pull request #29 from nexB/26-sourceforge-urls
Browse files Browse the repository at this point in the history
Improve the support for SourgeForge download URLs #26
  • Loading branch information
tdruez authored Jan 9, 2024
2 parents 01cf3b0 + 5ccd523 commit 06fa739
Show file tree
Hide file tree
Showing 9 changed files with 42 additions and 29 deletions.
3 changes: 3 additions & 0 deletions CHANGELOG.rst
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,9 @@ Release notes
- Improve the stability of the "Check for new Package versions" feature.
https://github.com/nexB/dejacode/issues/17

- Improve the support for SourgeForge download URLs.
https://github.com/nexB/dejacode/issues/26

### Version 5.0.0

Initial release.
5 changes: 4 additions & 1 deletion component_catalog/tests/test_admin.py
Original file line number Diff line number Diff line change
Expand Up @@ -2465,7 +2465,10 @@ def test_package_changeform_save_and_collect_data_on_addition(self, mock_get):
}

mock_get.return_value = mock.Mock(
content=b"\x00", headers={"content-length": 1}, status_code=200
content=b"\x00",
headers={"content-length": 1},
status_code=200,
url="http://domain.com/a.zip",
)

response = self.client.post(add_url, data, follow=True)
Expand Down
21 changes: 12 additions & 9 deletions component_catalog/tests/test_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -2213,7 +2213,7 @@ def test_collect_package_data(self, mock_get):
with self.assertRaisesMessage(DataCollectionException, expected_message):
collect_package_data("ftp://ftp.denx.de/pub/u-boot/u-boot-2017.11.tar.bz2")

package_url = "http://domain.com/a.zip;<params>?<query>#<fragment>"
download_url = "http://domain.com/a%20b.zip;<params>?<query>#<fragment>"

default_max_length = download.CONTENT_MAX_LENGTH
download.CONTENT_MAX_LENGTH = 0
Expand All @@ -2223,15 +2223,18 @@ def test_collect_package_data(self, mock_get):
content=b"\x00", headers={"content-length": 300000000}, status_code=200
)
with self.assertRaisesMessage(DataCollectionException, expected_message):
collect_package_data(package_url)
collect_package_data(download_url)

download.CONTENT_MAX_LENGTH = default_max_length
mock_get.return_value = mock.Mock(
content=b"\x00", headers={"content-length": 1}, status_code=200
content=b"\x00",
headers={"content-length": 1},
status_code=200,
url=download_url,
)
expected_data = {
"download_url": "http://domain.com/a.zip;<params>?<query>#<fragment>",
"filename": "a.zip",
"download_url": download_url,
"filename": "a b.zip",
"size": 1,
"sha1": "5ba93c9db0cff93f52b521d7420e43f6eda2784f",
"md5": "93b885adfe0da089cdf634904fd59f71",
Expand All @@ -2241,7 +2244,7 @@ def test_collect_package_data(self, mock_get):
"4a802a71c3580b6370de4ceb293c324a8423342557d4e5c38438f0e36910ee"
),
}
self.assertEqual(expected_data, collect_package_data(package_url))
self.assertEqual(expected_data, collect_package_data(download_url))

expected_message = (
"Exception Value: HTTPConnectionPool"
Expand All @@ -2253,15 +2256,15 @@ def test_collect_package_data(self, mock_get):
)
mock_get.return_value = response
with self.assertRaisesMessage(DataCollectionException, expected_message):
collect_package_data(package_url)
collect_package_data(download_url)

headers = {
"content-length": 1,
"content-disposition": 'attachment; filename="another_name.zip"',
}
mock_get.return_value = mock.Mock(content=b"\x00", headers=headers, status_code=200)
expected_data = {
"download_url": "http://domain.com/a.zip;<params>?<query>#<fragment>",
"download_url": download_url,
"filename": "another_name.zip",
"size": 1,
"sha1": "5ba93c9db0cff93f52b521d7420e43f6eda2784f",
Expand All @@ -2272,7 +2275,7 @@ def test_collect_package_data(self, mock_get):
"4a802a71c3580b6370de4ceb293c324a8423342557d4e5c38438f0e36910ee"
),
}
self.assertEqual(expected_data, collect_package_data(package_url))
self.assertEqual(expected_data, collect_package_data(download_url))

def test_package_create_save_set_usage_policy_from_license(self):
from policy.models import AssociatedPolicy
Expand Down
14 changes: 10 additions & 4 deletions dejacode_toolkit/download.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,9 @@
#

import cgi
import os
import socket
from pathlib import Path
from urllib.parse import unquote
from urllib.parse import urlparse

from django.template.defaultfilters import filesizeformat
Expand All @@ -29,7 +30,7 @@ class DataCollectionException(Exception):

def collect_package_data(url):
try:
response = requests.get(url, timeout=10, stream=True)
response = requests.get(url, timeout=5, stream=True)
except (requests.RequestException, socket.timeout) as e:
raise DataCollectionException(e)

Expand All @@ -54,8 +55,13 @@ def collect_package_data(url):
)

content_disposition = response.headers.get("content-disposition", "")
value, params = cgi.parse_header(content_disposition)
filename = params.get("filename") or os.path.basename(urlparse(url).path)
_, params = cgi.parse_header(content_disposition)

filename = params.get("filename")
if not filename:
# Using ``response.url`` in place of provided ``url`` arg since the former
# will be more accurate in case of HTTP redirect.
filename = unquote(Path(urlparse(response.url).path).name)

package_data = {
"download_url": url,
Expand Down
2 changes: 1 addition & 1 deletion setup.cfg
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ install_requires =
openpyxl==3.1.2
et-xmlfile==1.1.0
# PackageURL
packageurl-python==0.12.0
packageurl-python==0.13.4
# Gunicorn
gunicorn==21.2.0
# SPDX validation
Expand Down
Binary file not shown.
14 changes: 0 additions & 14 deletions thirdparty/dist/packageurl_python-0.12.0-py3-none-any.whl.ABOUT

This file was deleted.

Binary file not shown.
12 changes: 12 additions & 0 deletions thirdparty/dist/packageurl_python-0.13.4-py3-none-any.whl.ABOUT
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
about_resource: packageurl_python-0.13.4-py3-none-any.whl
name: packageurl-python
version: 0.13.4
download_url: https://files.pythonhosted.org/packages/6f/d6/dc41590e65a95198ad7490ed0fb34a1148e8eb5032c35c8d157b55aa496d/packageurl_python-0.13.4-py3-none-any.whl
package_url: pkg:pypi/[email protected]
license_expression: mit
copyright: Copyright packageurl-python project contributors
attribute: yes
licenses:
- key: mit
name: MIT License
file: mit.LICENSE

0 comments on commit 06fa739

Please sign in to comment.