From 65fa0186541c9c4a1c2abd5ab0d35e30381dcaf2 Mon Sep 17 00:00:00 2001 From: Bernhard Suttner Date: Tue, 24 Sep 2024 19:07:22 +0200 Subject: [PATCH] Fix URL encoding issue in pulp_file closes #5686 --- CHANGES/pulp_file/5686.bugfix | 1 + pulp_file/app/tasks/synchronizing.py | 11 +++- pulp_file/tests/unit/test_safe_paths.py | 70 +++++++++++++++++++++++++ 3 files changed, 80 insertions(+), 2 deletions(-) create mode 100644 CHANGES/pulp_file/5686.bugfix create mode 100644 pulp_file/tests/unit/test_safe_paths.py diff --git a/CHANGES/pulp_file/5686.bugfix b/CHANGES/pulp_file/5686.bugfix new file mode 100644 index 0000000000..9b3605eb58 --- /dev/null +++ b/CHANGES/pulp_file/5686.bugfix @@ -0,0 +1 @@ +During sync, quote the URL path for file downloads using HTTP. diff --git a/pulp_file/app/tasks/synchronizing.py b/pulp_file/app/tasks/synchronizing.py index e7eaca0a23..705af7e282 100644 --- a/pulp_file/app/tasks/synchronizing.py +++ b/pulp_file/app/tasks/synchronizing.py @@ -2,7 +2,7 @@ import os from gettext import gettext as _ -from urllib.parse import urlparse, urlunparse +from urllib.parse import quote, urlparse, urlunparse from django.core.files import File @@ -113,7 +113,8 @@ async def run(self): await pb.asave() for entry in entries: - path = os.path.join(root_dir, entry.relative_path) + path = _get_safe_path(root_dir, entry, parsed_url.scheme) + url = urlunparse(parsed_url._replace(path=path)) file = FileContent(relative_path=entry.relative_path, digest=entry.digest) artifact = Artifact(size=entry.size, sha256=entry.digest) @@ -127,3 +128,9 @@ async def run(self): dc = DeclarativeContent(content=file, d_artifacts=[da]) await pb.aincrement() await self.put(dc) + + +def _get_safe_path(root_dir, entry, scheme): + relative_path = entry.relative_path.lstrip("/") + path = os.path.join(root_dir, relative_path) + return path if scheme == "file" else quote(path, safe=":/") diff --git a/pulp_file/tests/unit/test_safe_paths.py b/pulp_file/tests/unit/test_safe_paths.py new file mode 100644 index 0000000000..36b05a2073 --- /dev/null +++ b/pulp_file/tests/unit/test_safe_paths.py @@ -0,0 +1,70 @@ +import pytest +from unittest import mock +from pulp_file.app.tasks.synchronizing import _get_safe_path + + +@pytest.mark.parametrize( + "relative_path, scheme, expected_path", + [ + # 1. Empty path + ("", "file", "/root/directory/"), + ("", "http", "/root/directory/"), + # 2. Leading/trailing slashes + ("/leading/slash.txt", "file", "/root/directory/leading/slash.txt"), + ("/leading/slash.txt", "http", "/root/directory/leading/slash.txt"), + ("trailing/slash.txt/", "file", "/root/directory/trailing/slash.txt/"), + ("trailing/slash.txt/", "http", "/root/directory/trailing/slash.txt/"), + # Special ASCII characters + ("file#name.txt", "file", "/root/directory/file#name.txt"), + ("file#name.txt", "http", "/root/directory/file%23name.txt"), + ("file?name.txt", "file", "/root/directory/file?name.txt"), + ("file?name.txt", "http", "/root/directory/file%3Fname.txt"), + ("file@name.txt", "file", "/root/directory/file@name.txt"), + ("file@name.txt", "http", "/root/directory/file%40name.txt"), + ("file$name.txt", "file", "/root/directory/file$name.txt"), + ("file$name.txt", "http", "/root/directory/file%24name.txt"), + ("file%name.txt", "file", "/root/directory/file%name.txt"), + ("file%name.txt", "http", "/root/directory/file%25name.txt"), + # Spaces + ("file with spaces.txt", "file", "/root/directory/file with spaces.txt"), + ("file with spaces.txt", "http", "/root/directory/file%20%20with%20%20spaces.txt"), + ("file.txt ", "file", "/root/directory/file.txt "), + ("file.txt ", "http", "/root/directory/file.txt%20%20"), + # Unusual ASCII characters + ("file!name.txt", "file", "/root/directory/file!name.txt"), + ("file!name.txt", "http", "/root/directory/file%21name.txt"), + ("file'name.txt", "file", "/root/directory/file'name.txt"), + ("file'name.txt", "http", "/root/directory/file%27name.txt"), + ("file(name).txt", "file", "/root/directory/file(name).txt"), + ("file(name).txt", "http", "/root/directory/file%28name%29.txt"), + ("file[name].txt", "file", "/root/directory/file[name].txt"), + ("file[name].txt", "http", "/root/directory/file%5Bname%5D.txt"), + ("file;name.txt", "file", "/root/directory/file;name.txt"), + ("file;name.txt", "http", "/root/directory/file%3Bname.txt"), + ("file&name.txt", "file", "/root/directory/file&name.txt"), + ("file&name.txt", "http", "/root/directory/file%26name.txt"), + # Dots + (".", "file", "/root/directory/."), + (".", "http", "/root/directory/."), + ("..", "file", "/root/directory/.."), + ("..", "http", "/root/directory/.."), + # Mixed slashes + ("dir\\file.txt", "file", "/root/directory/dir\\file.txt"), + ("dir\\file.txt", "http", "/root/directory/dir%5Cfile.txt"), + ("///path//to///file.txt", "file", "/root/directory/path//to///file.txt"), + ("///path//to///file.txt", "http", "/root/directory/path//to///file.txt"), + # Only special characters + ("!@#$%^&*()", "file", "/root/directory/!@#$%^&*()"), + ("!@#$%^&*()", "http", "/root/directory/%21%40%23%24%25%5E%26%2A%28%29"), + # Encoded characters + ("file%3a.txt", "file", "/root/directory/file%3a.txt"), + ("file%3a.txt", "http", "/root/directory/file%253a.txt"), + ("file%3A.txt", "file", "/root/directory/file%3A.txt"), + ("file%3A.txt", "http", "/root/directory/file%253A.txt"), + ], +) +def test_get_safe_path(relative_path, scheme, expected_path): + entry = mock.Mock(relative_path=relative_path) + root_dir = "/root/directory" + result = _get_safe_path(root_dir, entry, scheme) + assert result == expected_path