diff --git a/CHANGES/1340.feature.rst b/CHANGES/1340.feature.rst new file mode 100644 index 000000000..c879569a0 --- /dev/null +++ b/CHANGES/1340.feature.rst @@ -0,0 +1,22 @@ +Added support for using the :meth:`subtraction operator ` +to get the relative path between URLs. + +Note that both URLs must have the same scheme, user, password, host and port: + +.. code-block:: pycon + + >>> target = URL("http://example.com/path/index.html") + >>> base = URL("http://example.com/") + >>> target - base + URL('path/index.html') + +URLs can also be relative: + +.. code-block:: pycon + + >>> target = URL("/") + >>> base = URL("/path/index.html") + >>> target - base + URL('..') + +-- by :user:`oleksbabieiev`. diff --git a/docs/api.rst b/docs/api.rst index 3c123739a..c6b5b5891 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -974,6 +974,21 @@ The path is encoded if needed. >>> base.join(URL('//python.org/page.html')) URL('http://python.org/page.html') +The subtraction (``-``) operator creates a new URL with +a relative *path* to the target URL from the given base URL. +*scheme*, *user*, *password*, *host*, *port*, *query* and *fragment* are removed. + +.. method:: URL.__sub__(url) + + Returns a new URL with a relative *path* between two other URL objects. + + .. doctest:: + + >>> target = URL('http://example.com/path/index.html') + >>> base = URL('http://example.com/') + >>> target - base + URL('path/index.html') + Human readable representation ----------------------------- diff --git a/tests/test_url.py b/tests/test_url.py index 5546ca084..8973b1fd8 100644 --- a/tests/test_url.py +++ b/tests/test_url.py @@ -61,6 +61,53 @@ def test_str(): assert str(url) == "http://example.com:8888/path/to?a=1&b=2" +@pytest.mark.parametrize( + ("target", "base", "expected"), + [ + ("http://example.com/path/to", "http://example.com/", "path/to"), + ("http://example.com/path/to", "http://example.com/spam", "path/to"), + ("http://example.com/path/to", "http://example.com/spam/", "../path/to"), + ("http://example.com/path", "http://example.com/path/to/", ".."), + ("http://example.com/", "http://example.com/", "."), + ("http://example.com", "http://example.com", "."), + ("http://example.com/", "http://example.com", "."), + ("http://example.com", "http://example.com/", "."), + ("//example.com", "//example.com", "."), + ("/path/to", "/spam/", "../path/to"), + ("path/to", "spam/", "../path/to"), + ("path/to", "spam", "path/to"), + ("..", ".", ".."), + (".", "..", "."), + ], +) +def test_sub(target: str, base: str, expected: str): + assert URL(target) - URL(base) == URL(expected) + + +def test_sub_with_different_schemes(): + expected_error_msg = "Both URLs should have the same scheme" + with pytest.raises(ValueError, match=expected_error_msg): + URL("http://example.com/") - URL("https://example.com/") + + +def test_sub_with_different_netlocs(): + expected_error_msg = "Both URLs should have the same netloc" + with pytest.raises(ValueError, match=expected_error_msg): + URL("https://spam.com/") - URL("https://ham.com/") + + +def test_sub_with_different_anchors(): + expected_error_msg = "'path/to' and '/path' have different anchors" + with pytest.raises(ValueError, match=expected_error_msg): + URL("path/to") - URL("/path/from") + + +def test_sub_with_two_dots_in_base(): + expected_error_msg = "'..' segment in '/path/..' cannot be walked" + with pytest.raises(ValueError, match=expected_error_msg): + URL("path/to") - URL("/path/../from") + + def test_repr(): url = URL("http://example.com") assert "URL('http://example.com')" == repr(url) diff --git a/yarl/_path.py b/yarl/_path.py index c22f0b4b8..704f87242 100644 --- a/yarl/_path.py +++ b/yarl/_path.py @@ -2,6 +2,7 @@ from collections.abc import Sequence from contextlib import suppress +from pathlib import PurePosixPath def normalize_path_segments(segments: Sequence[str]) -> list[str]: @@ -39,3 +40,31 @@ def normalize_path(path: str) -> str: segments = path.split("/") return prefix + "/".join(normalize_path_segments(segments)) + + +def calculate_relative_path(target: str, base: str) -> str: + """Return the relative path between two other paths. + + If the operation is not possible, raise ValueError. + """ + + target = target or "/" + base = base or "/" + + target_path = PurePosixPath(target) + base_path = PurePosixPath(base) + + if not base.endswith("/"): + base_path = base_path.parent + + for step, path in enumerate([base_path] + list(base_path.parents)): + if target_path.is_relative_to(path): + break + elif path.name == "..": + raise ValueError(f"'..' segment in {str(base_path)!r} cannot be walked") + else: + raise ValueError( + f"{str(target_path)!r} and {str(base_path)!r} have different anchors" + ) + offset = len(path.parts) + return str(PurePosixPath(*("..",) * step, *target_path.parts[offset:])) diff --git a/yarl/_url.py b/yarl/_url.py index ff6ebb00b..5574a01eb 100644 --- a/yarl/_url.py +++ b/yarl/_url.py @@ -12,7 +12,7 @@ from propcache.api import under_cached_property as cached_property from ._parse import USES_AUTHORITY, make_netloc, split_netloc, split_url, unsplit_result -from ._path import normalize_path, normalize_path_segments +from ._path import calculate_relative_path, normalize_path, normalize_path_segments from ._query import ( Query, QueryVariable, @@ -476,6 +476,34 @@ def __truediv__(self, name: str) -> "URL": return NotImplemented return self._make_child((str(name),)) + def __sub__(self, other: object) -> "URL": + """Return a new URL with a relative path between two other URL objects. + + Note that both URLs must have the same scheme and netloc. + The new relative URL has only path: + scheme, user, password, host, port, query and fragment are removed. + + Example: + >>> target = URL("http://example.com/path/index.html") + >>> base = URL("http://example.com/") + >>> target - base + URL('path/index.html') + """ + + if type(other) is not URL: + return NotImplemented + + target = self._val + base = other._val + + if target.scheme != base.scheme: + raise ValueError("Both URLs should have the same scheme") + if target.netloc != base.netloc: + raise ValueError("Both URLs should have the same netloc") + + path = calculate_relative_path(target.path, base.path) + return self._from_tup(("", "", path, "", "")) + def __mod__(self, query: Query) -> "URL": return self.update_query(query)