Skip to content

Commit

Permalink
Fix URL encoding issue in pulp_file
Browse files Browse the repository at this point in the history
closes pulp#5686
  • Loading branch information
sbernhard authored and hstct committed Oct 1, 2024
1 parent 79b87ef commit fe48e54
Show file tree
Hide file tree
Showing 3 changed files with 80 additions and 2 deletions.
1 change: 1 addition & 0 deletions CHANGES/pulp_file/5686.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
During sync, quote the URL path for file downloads using HTTP.
11 changes: 9 additions & 2 deletions pulp_file/app/tasks/synchronizing.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import os

from gettext import gettext as _
from urllib.parse import urlparse, urlunparse
from urllib.parse import quote, urlparse, urlunparse

from django.core.files import File

Expand Down Expand Up @@ -113,7 +113,8 @@ async def run(self):
await pb.asave()

for entry in entries:
path = os.path.join(root_dir, entry.relative_path)
path = _get_safe_path(root_dir, entry, parsed_url.scheme)

url = urlunparse(parsed_url._replace(path=path))
file = FileContent(relative_path=entry.relative_path, digest=entry.digest)
artifact = Artifact(size=entry.size, sha256=entry.digest)
Expand All @@ -127,3 +128,9 @@ async def run(self):
dc = DeclarativeContent(content=file, d_artifacts=[da])
await pb.aincrement()
await self.put(dc)


def _get_safe_path(root_dir, entry, scheme):
relative_path = entry.relative_path.lstrip("/")
path = os.path.join(root_dir, relative_path)
return path if scheme == "file" else quote(path, safe=":/")
70 changes: 70 additions & 0 deletions pulp_file/tests/unit/test_safe_paths.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,70 @@
import pytest
from unittest import mock
from pulp_file.app.tasks.synchronizing import _get_safe_path


@pytest.mark.parametrize(
"relative_path, scheme, expected_path",
[
# 1. Empty path
("", "file", "/root/directory/"),
("", "http", "/root/directory/"),
# 2. Leading/trailing slashes
("/leading/slash.txt", "file", "/root/directory/leading/slash.txt"),
("/leading/slash.txt", "http", "/root/directory/leading/slash.txt"),
("trailing/slash.txt/", "file", "/root/directory/trailing/slash.txt/"),
("trailing/slash.txt/", "http", "/root/directory/trailing/slash.txt/"),
# Special ASCII characters
("file#name.txt", "file", "/root/directory/file#name.txt"),
("file#name.txt", "http", "/root/directory/file%23name.txt"),
("file?name.txt", "file", "/root/directory/file?name.txt"),
("file?name.txt", "http", "/root/directory/file%3Fname.txt"),
("[email protected]", "file", "/root/directory/[email protected]"),
("[email protected]", "http", "/root/directory/file%40name.txt"),
("file$name.txt", "file", "/root/directory/file$name.txt"),
("file$name.txt", "http", "/root/directory/file%24name.txt"),
("file%name.txt", "file", "/root/directory/file%name.txt"),
("file%name.txt", "http", "/root/directory/file%25name.txt"),
# Spaces
("file with spaces.txt", "file", "/root/directory/file with spaces.txt"),
("file with spaces.txt", "http", "/root/directory/file%20%20with%20%20spaces.txt"),
("file.txt ", "file", "/root/directory/file.txt "),
("file.txt ", "http", "/root/directory/file.txt%20%20"),
# Unusual ASCII characters
("file!name.txt", "file", "/root/directory/file!name.txt"),
("file!name.txt", "http", "/root/directory/file%21name.txt"),
("file'name.txt", "file", "/root/directory/file'name.txt"),
("file'name.txt", "http", "/root/directory/file%27name.txt"),
("file(name).txt", "file", "/root/directory/file(name).txt"),
("file(name).txt", "http", "/root/directory/file%28name%29.txt"),
("file[name].txt", "file", "/root/directory/file[name].txt"),
("file[name].txt", "http", "/root/directory/file%5Bname%5D.txt"),
("file;name.txt", "file", "/root/directory/file;name.txt"),
("file;name.txt", "http", "/root/directory/file%3Bname.txt"),
("file&name.txt", "file", "/root/directory/file&name.txt"),
("file&name.txt", "http", "/root/directory/file%26name.txt"),
# Dots
(".", "file", "/root/directory/."),
(".", "http", "/root/directory/."),
("..", "file", "/root/directory/.."),
("..", "http", "/root/directory/.."),
# Mixed slashes
("dir\\file.txt", "file", "/root/directory/dir\\file.txt"),
("dir\\file.txt", "http", "/root/directory/dir%5Cfile.txt"),
("///path//to///file.txt", "file", "/root/directory/path//to///file.txt"),
("///path//to///file.txt", "http", "/root/directory/path//to///file.txt"),
# Only special characters
("!@#$%^&*()", "file", "/root/directory/!@#$%^&*()"),
("!@#$%^&*()", "http", "/root/directory/%21%40%23%24%25%5E%26%2A%28%29"),
# Encoded characters
("file%3a.txt", "file", "/root/directory/file%3a.txt"),
("file%3a.txt", "http", "/root/directory/file%253a.txt"),
("file%3A.txt", "file", "/root/directory/file%3A.txt"),
("file%3A.txt", "http", "/root/directory/file%253A.txt"),
],
)
def test_get_safe_path(relative_path, scheme, expected_path):
entry = mock.Mock(relative_path=relative_path)
root_dir = "/root/directory"
result = _get_safe_path(root_dir, entry, scheme)
assert result == expected_path

0 comments on commit fe48e54

Please sign in to comment.