Skip to content

Commit

Permalink
Fix URL encoding issue in HttpDownloader to handle special characters
Browse files Browse the repository at this point in the history
closes pulp#5686
  • Loading branch information
hstct committed Sep 19, 2024
1 parent 0cd8dc1 commit 1ac61f8
Show file tree
Hide file tree
Showing 4 changed files with 35 additions and 1 deletion.
1 change: 1 addition & 0 deletions CHANGES/5686.bugfix
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Implemented a more robust URL encoding mechanism that only encodes the path portion of the URL, ensuring that special characters are processed correctly without double encoding.
10 changes: 10 additions & 0 deletions pulp_file/pytest_plugin.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,16 @@ def basic_manifest_path(write_3_iso_file_fixture_data_factory):
return write_3_iso_file_fixture_data_factory("basic")


@pytest.fixture
def encoded_manifest_path(file_fixtures_root):
file_fixtures_root.joinpath("encoded").mkdir()
file1 = generate_iso(file_fixtures_root.joinpath("encoded/long-name-%253a-encoded.iso"))
file2 = generate_iso(file_fixtures_root.joinpath("encoded/another-%25-encoded.iso"))
file3 = generate_iso(file_fixtures_root.joinpath("encoded/more-%3C-encoded.iso"))
generate_manifest(file_fixtures_root.joinpath("encoded/PULP_MANIFEST"), [file1, file2, file3])
return "/encoded/PULP_MANIFEST"


@pytest.fixture
def copy_manifest_only_factory(file_fixtures_root):
def _copy_manifest_only(name):
Expand Down
13 changes: 13 additions & 0 deletions pulp_file/tests/functional/api/test_sync.py
Original file line number Diff line number Diff line change
Expand Up @@ -123,6 +123,19 @@ def test_duplicate_file_sync(
assert file_repo.latest_version_href.endswith("/2/")


@pytest.mark.parallel
def test_encoded_file_name(file_repo, file_bindings, encoded_manifest_path, file_remote_factory, monitor_task):
remote = file_remote_factory(manifest_path=encoded_manifest_path, policy="immediate")
body = RepositorySyncURL(remote=remote.pulp_href)
monitor_task(file_bindings.RepositoriesFileApi.sync(file_repo.pulp_href, body).task)
file_repo = file_bindings.RepositoriesFileApi.read(file_repo.pulp_href)

version = file_bindings.RepositoriesFileVersionsApi.read(file_repo.latest_version_href)
assert version.content_summary.present["file.file"]["count"] == 3
assert version.content_summary.added["file.file"]["count"] == 3
assert file_repo.latest_version_href.endswith("/1/")


@pytest.mark.parallel
def test_filepath_includes_commas(
file_bindings,
Expand Down
12 changes: 11 additions & 1 deletion pulpcore/download/http.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
import aiohttp
import asyncio
import backoff
import urllib.parse

from .base import BaseDownloader, DownloadResult
from pulpcore.exceptions import (
Expand Down Expand Up @@ -49,6 +50,14 @@ def http_giveup_handler(exc):
return False


def encode_url(url):
"""Helper function to encode only the path part of the URL."""
parsed_url = urllib.parse.urlparse(url)
encoded_path = urllib.parse.quote(parsed_url.path)
encoded_url = parsed_url._replace(path=encoded_path).geturl()
return encoded_url


class HttpDownloader(BaseDownloader):
"""
An HTTP/HTTPS Downloader built on `aiohttp`.
Expand Down Expand Up @@ -284,8 +293,9 @@ async def _run(self, extra_data=None):
"""
if self.download_throttler:
await self.download_throttler.acquire()
encoded_url = encode_url(self.url)
async with self.session.get(
self.url, proxy=self.proxy, proxy_auth=self.proxy_auth, auth=self.auth
encoded_url, proxy=self.proxy, proxy_auth=self.proxy_auth, auth=self.auth
) as response:
self.raise_for_status(response)
to_return = await self._handle_response(response)
Expand Down

0 comments on commit 1ac61f8

Please sign in to comment.