From c27d7d45e91d25ac2d408f6c5e6eedb842c92700 Mon Sep 17 00:00:00 2001 From: Oleksandr Babieiev Date: Sun, 20 Oct 2024 16:47:09 +0200 Subject: [PATCH 01/22] Support using the subtraction operator to get the relative path between URLs --- docs/api.rst | 15 +++++++++++++++ tests/test_url.py | 27 +++++++++++++++++++++++++++ yarl/_url.py | 29 +++++++++++++++++++++++++++++ 3 files changed, 71 insertions(+) diff --git a/docs/api.rst b/docs/api.rst index b7b80cf61..5c41308fd 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -974,6 +974,21 @@ The path is encoded if needed. >>> base.join(URL('//python.org/page.html')) URL('http://python.org/page.html') +The subtraction (``-``) operator creates a new URL with +a relative *path* to the target URL from the given base URL. +*scheme*, *user*, *password*, *host* and *port* are removed. + +.. method:: URL.__sub__(url) + + Returns a new URL with a relative *path* between two other URL objects. + + .. doctest:: + + >>> target = URL('http://example.com/path/index.html') + >>> base = URL('http://example.com/') + >>> target - base + URL('path/index.html') + Human readable representation ----------------------------- diff --git a/tests/test_url.py b/tests/test_url.py index af6432d12..b390f9063 100644 --- a/tests/test_url.py +++ b/tests/test_url.py @@ -53,6 +53,33 @@ def test_str(): assert str(url) == "http://example.com:8888/path/to?a=1&b=2" +@pytest.mark.parametrize( + "target,base,expected", + [ + ("http://example.com/path/to", "http://example.com/", "path/to"), + ("http://example.com/path/to", "http://example.com/spam", "path/to"), + ("http://example.com/path/to", "http://example.com/spam/", "../path/to"), + ("http://example.com/path", "http://example.com/path/to/", ".."), + ("http://example.com/", "http://example.com/", "."), + ("http://example.com", "http://example.com", "."), + ], +) +def test_sub(target: str, base: str, expected: str): + assert URL(target) - URL(base) == URL(expected) + + +def test_sub_with_different_schemes(): + with pytest.raises(ValueError) as ctx: + URL("http://example.com/") - URL("https://example.com/") + assert "Both URLs should have the same scheme" == str(ctx.value) + + +def test_sub_with_different_netlocs(): + with pytest.raises(ValueError) as ctx: + URL("https://spam.com/") - URL("https://ham.com/") + assert "Both URLs should have the same netloc" == str(ctx.value) + + def test_repr(): url = URL("http://example.com") assert "URL('http://example.com')" == repr(url) diff --git a/yarl/_url.py b/yarl/_url.py index 5a0be4514..10a4d2729 100644 --- a/yarl/_url.py +++ b/yarl/_url.py @@ -6,6 +6,7 @@ from contextlib import suppress from functools import _CacheInfo, lru_cache from ipaddress import ip_address +from os.path import dirname, relpath from typing import ( TYPE_CHECKING, Any, @@ -531,6 +532,34 @@ def __truediv__(self, name: str) -> "URL": return NotImplemented return self._make_child((str(name),)) + def __sub__(self, other: object) -> "URL": + if type(other) is not URL: + return NotImplemented + + target = self._val + base = other._val + + if target.scheme != base.scheme: + raise ValueError("Both URLs should have the same scheme") + if target.netloc != base.netloc: + raise ValueError("Both URLs should have the same netloc") + + path = self._relpath(target.path, base.path) + return self._from_tup(("", "", path, "", "")) + + @staticmethod + def _relpath(path: str, start: str) -> str: + """A wrapper over os.path.relpath()""" + + if not path: + path = "/" + if not start: + start = "/" + if not start.endswith("/"): + start = dirname(start) + + return relpath(path, start) + def __mod__(self, query: Query) -> "URL": return self.update_query(query) From fe047cc97813333c62215ab83581f954f39c1a19 Mon Sep 17 00:00:00 2001 From: Oleksandr Babieiev Date: Sun, 20 Oct 2024 17:24:58 +0200 Subject: [PATCH 02/22] Add CHANGES/1340.feature.rst --- CHANGES/1340.feature.rst | 2 ++ 1 file changed, 2 insertions(+) create mode 100644 CHANGES/1340.feature.rst diff --git a/CHANGES/1340.feature.rst b/CHANGES/1340.feature.rst new file mode 100644 index 000000000..c0f4908ca --- /dev/null +++ b/CHANGES/1340.feature.rst @@ -0,0 +1,2 @@ +Added support for using the :meth:`subtraction operator ` +to get the relative path between URLs. From f18ed6bccbebfc93132c63b11e4ea6949a53559d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 21 Oct 2024 05:14:43 +0000 Subject: [PATCH 03/22] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- yarl/_url.py | 1 - 1 file changed, 1 deletion(-) diff --git a/yarl/_url.py b/yarl/_url.py index 729a3f11c..c61818f22 100644 --- a/yarl/_url.py +++ b/yarl/_url.py @@ -17,7 +17,6 @@ Union, overload, ) -from typing import TYPE_CHECKING, Any, TypedDict, TypeVar, Union, overload from urllib.parse import ( SplitResult, parse_qsl, From d13bf1d759096f73f634653413a9d13056d37d3d Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Mon, 21 Oct 2024 05:34:19 +0000 Subject: [PATCH 04/22] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- yarl/_url.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/yarl/_url.py b/yarl/_url.py index 17a930230..f09700df2 100644 --- a/yarl/_url.py +++ b/yarl/_url.py @@ -24,8 +24,6 @@ uses_netloc, uses_relative, ) -from typing import TYPE_CHECKING, Any, TypedDict, TypeVar, Union, overload -from urllib.parse import SplitResult, parse_qsl, quote, uses_relative import idna from multidict import MultiDict, MultiDictProxy From 5037e6b30229a2da85e73d3a91f93bc7afe0358d Mon Sep 17 00:00:00 2001 From: "J. Nick Koston" Date: Sun, 20 Oct 2024 19:35:14 -1000 Subject: [PATCH 05/22] fix conflicting imports --- yarl/_url.py | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) diff --git a/yarl/_url.py b/yarl/_url.py index f09700df2..c09e485c2 100644 --- a/yarl/_url.py +++ b/yarl/_url.py @@ -6,24 +6,8 @@ from functools import _CacheInfo, lru_cache from ipaddress import ip_address from os.path import dirname, relpath -from typing import ( - TYPE_CHECKING, - Any, - SupportsInt, - Tuple, - TypedDict, - TypeVar, - Union, - overload, -) -from urllib.parse import ( - SplitResult, - parse_qsl, - quote, - scheme_chars, - uses_netloc, - uses_relative, -) +from typing import TYPE_CHECKING, Any, TypedDict, TypeVar, Union, overload +from urllib.parse import SplitResult, parse_qsl, quote, uses_relative import idna from multidict import MultiDict, MultiDictProxy From 3add068de402fe49be4cbc223b83053f9daf81c4 Mon Sep 17 00:00:00 2001 From: Oleksandr Babieiev Date: Mon, 21 Oct 2024 16:13:21 +0200 Subject: [PATCH 06/22] Sign CHANGES/1340.feature.rst Co-authored-by: J. Nick Koston --- CHANGES/1340.feature.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/CHANGES/1340.feature.rst b/CHANGES/1340.feature.rst index c0f4908ca..19927d990 100644 --- a/CHANGES/1340.feature.rst +++ b/CHANGES/1340.feature.rst @@ -1,2 +1,2 @@ Added support for using the :meth:`subtraction operator ` -to get the relative path between URLs. +to get the relative path between URLs -- by :user:`oleksbabieiev`. From 38c7f3c95b9a7b4b319fa268727ac2a80e16dd01 Mon Sep 17 00:00:00 2001 From: Oleksandr Babieiev Date: Mon, 21 Oct 2024 16:31:16 +0200 Subject: [PATCH 07/22] Move `relative_path()` to `_path.py` --- yarl/_path.py | 14 ++++++++++++++ yarl/_url.py | 18 ++---------------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/yarl/_path.py b/yarl/_path.py index c22f0b4b8..48491659e 100644 --- a/yarl/_path.py +++ b/yarl/_path.py @@ -2,6 +2,7 @@ from collections.abc import Sequence from contextlib import suppress +from os.path import dirname, relpath def normalize_path_segments(segments: Sequence[str]) -> list[str]: @@ -39,3 +40,16 @@ def normalize_path(path: str) -> str: segments = path.split("/") return prefix + "/".join(normalize_path_segments(segments)) + + +def relative_path(path: str, start: str) -> str: + """A wrapper over os.path.relpath()""" + + if not path: + path = "/" + if not start: + start = "/" + if not start.endswith("/"): + start = dirname(start) + + return relpath(path, start) diff --git a/yarl/_url.py b/yarl/_url.py index ed5a9b671..272ca502f 100644 --- a/yarl/_url.py +++ b/yarl/_url.py @@ -4,7 +4,6 @@ from collections.abc import Mapping, Sequence from functools import _CacheInfo, lru_cache from ipaddress import ip_address -from os.path import dirname, relpath from typing import TYPE_CHECKING, Any, TypedDict, TypeVar, Union, overload from urllib.parse import SplitResult, parse_qsl, uses_relative @@ -13,7 +12,7 @@ from propcache.api import under_cached_property as cached_property from ._parse import USES_AUTHORITY, make_netloc, split_netloc, split_url, unsplit_result -from ._path import normalize_path, normalize_path_segments +from ._path import normalize_path, normalize_path_segments, relative_path from ._query import ( Query, QueryVariable, @@ -488,22 +487,9 @@ def __sub__(self, other: object) -> "URL": if target.netloc != base.netloc: raise ValueError("Both URLs should have the same netloc") - path = self._relpath(target.path, base.path) + path = relative_path(target.path, base.path) return self._from_tup(("", "", path, "", "")) - @staticmethod - def _relpath(path: str, start: str) -> str: - """A wrapper over os.path.relpath()""" - - if not path: - path = "/" - if not start: - start = "/" - if not start.endswith("/"): - start = dirname(start) - - return relpath(path, start) - def __mod__(self, query: Query) -> "URL": return self.update_query(query) From 4dd4a693ce71f6e4f52b9535636b6d54666e9d5f Mon Sep 17 00:00:00 2001 From: Oleksandr Babieiev Date: Mon, 21 Oct 2024 17:05:21 +0200 Subject: [PATCH 08/22] Add more parameters to test `URL.__sub__()` --- tests/test_url.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/tests/test_url.py b/tests/test_url.py index b65387311..e7b60b9eb 100644 --- a/tests/test_url.py +++ b/tests/test_url.py @@ -70,6 +70,14 @@ def test_str(): ("http://example.com/path", "http://example.com/path/to/", ".."), ("http://example.com/", "http://example.com/", "."), ("http://example.com", "http://example.com", "."), + ("http://example.com/", "http://example.com", "."), + ("http://example.com", "http://example.com/", "."), + ("//example.com", "//example.com", "."), + ("/path/to", "/spam/", "../path/to"), + ("path/to", "spam/", "../path/to"), + ("path/to", "spam", "path/to"), + ("..", ".", ".."), + (".", "..", "."), ], ) def test_sub(target: str, base: str, expected: str): From f5f242bbeeffb77deb2bde7f2aaf9c118c5fcdcf Mon Sep 17 00:00:00 2001 From: Oleksandr Babieiev Date: Mon, 21 Oct 2024 17:12:20 +0200 Subject: [PATCH 09/22] Use `SEPARATOR` constant for `/` --- yarl/_path.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/yarl/_path.py b/yarl/_path.py index 48491659e..28ff66fc8 100644 --- a/yarl/_path.py +++ b/yarl/_path.py @@ -4,6 +4,8 @@ from contextlib import suppress from os.path import dirname, relpath +SEPARATOR = "/" + def normalize_path_segments(segments: Sequence[str]) -> list[str]: """Drop '.' and '..' from a sequence of str segments""" @@ -32,24 +34,24 @@ def normalize_path_segments(segments: Sequence[str]) -> list[str]: def normalize_path(path: str) -> str: # Drop '.' and '..' from str path prefix = "" - if path and path[0] == "/": + if path and path[0] == SEPARATOR: # preserve the "/" root element of absolute paths, copying it to the # normalised output as per sections 5.2.4 and 6.2.2.3 of rfc3986. - prefix = "/" + prefix = SEPARATOR path = path[1:] - segments = path.split("/") - return prefix + "/".join(normalize_path_segments(segments)) + segments = path.split(SEPARATOR) + return prefix + SEPARATOR.join(normalize_path_segments(segments)) def relative_path(path: str, start: str) -> str: """A wrapper over os.path.relpath()""" if not path: - path = "/" + path = SEPARATOR if not start: - start = "/" - if not start.endswith("/"): + start = SEPARATOR + if not start.endswith(SEPARATOR): start = dirname(start) return relpath(path, start) From 20060668076cf662fc416030d2d1f35a8bbeea48 Mon Sep 17 00:00:00 2001 From: Oleksandr Babieiev Date: Mon, 21 Oct 2024 17:38:55 +0200 Subject: [PATCH 10/22] Disallow `relative_path()` between abs and rel paths --- tests/test_url.py | 11 +++++++++++ yarl/_path.py | 8 ++++++++ 2 files changed, 19 insertions(+) diff --git a/tests/test_url.py b/tests/test_url.py index e7b60b9eb..11b31a564 100644 --- a/tests/test_url.py +++ b/tests/test_url.py @@ -96,6 +96,17 @@ def test_sub_with_different_netlocs(): assert "Both URLs should have the same netloc" == str(ctx.value) +def test_sub_with_abs_and_rel_paths(): + with pytest.raises(ValueError) as ctx: + URL("path/to") - URL("/path/from") + assert ( + "It is forbidden to get the path " + "between the absolute and relative paths " + "because it is impossible " + "to get the current working directory." == str(ctx.value) + ) + + def test_repr(): url = URL("http://example.com") assert "URL('http://example.com')" == repr(url) diff --git a/yarl/_path.py b/yarl/_path.py index 28ff66fc8..e1f0e222d 100644 --- a/yarl/_path.py +++ b/yarl/_path.py @@ -54,4 +54,12 @@ def relative_path(path: str, start: str) -> str: if not start.endswith(SEPARATOR): start = dirname(start) + if (path.startswith(SEPARATOR) and not start.startswith(SEPARATOR)) or ( + not path.startswith(SEPARATOR) and start.startswith(SEPARATOR) + ): + raise ValueError( + "It is forbidden to get the path between the absolute and relative paths " + "because it is impossible to get the current working directory." + ) + return relpath(path, start) From c7178455786a721888b50915bf21e3e996057826 Mon Sep 17 00:00:00 2001 From: Oleksandr Babieiev Date: Mon, 21 Oct 2024 19:36:13 +0200 Subject: [PATCH 11/22] Remove the `SEPARATOR` constant --- yarl/_path.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/yarl/_path.py b/yarl/_path.py index e1f0e222d..f30537f64 100644 --- a/yarl/_path.py +++ b/yarl/_path.py @@ -4,8 +4,6 @@ from contextlib import suppress from os.path import dirname, relpath -SEPARATOR = "/" - def normalize_path_segments(segments: Sequence[str]) -> list[str]: """Drop '.' and '..' from a sequence of str segments""" @@ -34,28 +32,28 @@ def normalize_path_segments(segments: Sequence[str]) -> list[str]: def normalize_path(path: str) -> str: # Drop '.' and '..' from str path prefix = "" - if path and path[0] == SEPARATOR: + if path and path[0] == "/": # preserve the "/" root element of absolute paths, copying it to the # normalised output as per sections 5.2.4 and 6.2.2.3 of rfc3986. - prefix = SEPARATOR + prefix = "/" path = path[1:] - segments = path.split(SEPARATOR) - return prefix + SEPARATOR.join(normalize_path_segments(segments)) + segments = path.split("/") + return prefix + "/".join(normalize_path_segments(segments)) def relative_path(path: str, start: str) -> str: """A wrapper over os.path.relpath()""" if not path: - path = SEPARATOR + path = "/" if not start: - start = SEPARATOR - if not start.endswith(SEPARATOR): + start = "/" + if not start.endswith("/"): start = dirname(start) - if (path.startswith(SEPARATOR) and not start.startswith(SEPARATOR)) or ( - not path.startswith(SEPARATOR) and start.startswith(SEPARATOR) + if (path.startswith("/") and not start.startswith("/")) or ( + not path.startswith("/") and start.startswith("/") ): raise ValueError( "It is forbidden to get the path between the absolute and relative paths " From 827171f799b41b065ed28399e4444dd991cc8a8a Mon Sep 17 00:00:00 2001 From: Oleksandr Babieiev Date: Mon, 21 Oct 2024 20:10:40 +0200 Subject: [PATCH 12/22] Rename `relative_path()` --- yarl/_path.py | 2 +- yarl/_url.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/yarl/_path.py b/yarl/_path.py index f30537f64..d4a7dd63e 100644 --- a/yarl/_path.py +++ b/yarl/_path.py @@ -42,7 +42,7 @@ def normalize_path(path: str) -> str: return prefix + "/".join(normalize_path_segments(segments)) -def relative_path(path: str, start: str) -> str: +def calculate_relative_path(path: str, start: str) -> str: """A wrapper over os.path.relpath()""" if not path: diff --git a/yarl/_url.py b/yarl/_url.py index 272ca502f..a290d6931 100644 --- a/yarl/_url.py +++ b/yarl/_url.py @@ -12,7 +12,7 @@ from propcache.api import under_cached_property as cached_property from ._parse import USES_AUTHORITY, make_netloc, split_netloc, split_url, unsplit_result -from ._path import normalize_path, normalize_path_segments, relative_path +from ._path import calculate_relative_path, normalize_path, normalize_path_segments from ._query import ( Query, QueryVariable, @@ -487,7 +487,7 @@ def __sub__(self, other: object) -> "URL": if target.netloc != base.netloc: raise ValueError("Both URLs should have the same netloc") - path = relative_path(target.path, base.path) + path = calculate_relative_path(target.path, base.path) return self._from_tup(("", "", path, "", "")) def __mod__(self, query: Query) -> "URL": From b436b1440b383f0dcecd9215e1cbf949796c67ed Mon Sep 17 00:00:00 2001 From: Oleksandr Babieiev Date: Mon, 21 Oct 2024 20:46:05 +0200 Subject: [PATCH 13/22] Refactor `test_url.py` --- tests/test_url.py | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/tests/test_url.py b/tests/test_url.py index 11b31a564..f8540d770 100644 --- a/tests/test_url.py +++ b/tests/test_url.py @@ -62,7 +62,7 @@ def test_str(): @pytest.mark.parametrize( - "target,base,expected", + ("target", "base", "expected"), [ ("http://example.com/path/to", "http://example.com/", "path/to"), ("http://example.com/path/to", "http://example.com/spam", "path/to"), @@ -85,26 +85,24 @@ def test_sub(target: str, base: str, expected: str): def test_sub_with_different_schemes(): - with pytest.raises(ValueError) as ctx: + expected_error_msg = "Both URLs should have the same scheme" + with pytest.raises(ValueError, match=expected_error_msg): URL("http://example.com/") - URL("https://example.com/") - assert "Both URLs should have the same scheme" == str(ctx.value) def test_sub_with_different_netlocs(): - with pytest.raises(ValueError) as ctx: + expected_error_msg = "Both URLs should have the same netloc" + with pytest.raises(ValueError, match=expected_error_msg): URL("https://spam.com/") - URL("https://ham.com/") - assert "Both URLs should have the same netloc" == str(ctx.value) def test_sub_with_abs_and_rel_paths(): - with pytest.raises(ValueError) as ctx: - URL("path/to") - URL("/path/from") - assert ( - "It is forbidden to get the path " - "between the absolute and relative paths " - "because it is impossible " - "to get the current working directory." == str(ctx.value) + expected_error_msg = ( + "It is forbidden to get the path between the absolute and relative paths " + "because it is impossible to get the current working directory." ) + with pytest.raises(ValueError, match=expected_error_msg): + URL("path/to") - URL("/path/from") def test_repr(): From 1b620df2505276c0040b9fad97fd4c2269cb662d Mon Sep 17 00:00:00 2001 From: Oleksandr Babieiev Date: Mon, 21 Oct 2024 20:49:15 +0200 Subject: [PATCH 14/22] Refactor `_path.py` --- yarl/_path.py | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/yarl/_path.py b/yarl/_path.py index d4a7dd63e..7ecc4b948 100644 --- a/yarl/_path.py +++ b/yarl/_path.py @@ -52,9 +52,13 @@ def calculate_relative_path(path: str, start: str) -> str: if not start.endswith("/"): start = dirname(start) - if (path.startswith("/") and not start.startswith("/")) or ( - not path.startswith("/") and start.startswith("/") - ): + path_has_leading_slash = path.startswith("/") + start_has_leading_slash = start.startswith("/") + both_have_leading_slash = all((path_has_leading_slash, start_has_leading_slash)) + none_have_leading_slash = all( + (not path_has_leading_slash, not start_has_leading_slash) + ) + if not both_have_leading_slash and not none_have_leading_slash: raise ValueError( "It is forbidden to get the path between the absolute and relative paths " "because it is impossible to get the current working directory." From bfb2c4d6f40565ad1074b45cc5b913c6687a733e Mon Sep 17 00:00:00 2001 From: Oleksandr Babieiev Date: Mon, 21 Oct 2024 21:11:11 +0200 Subject: [PATCH 15/22] Add a small demo to `1340.feature.rst` --- CHANGES/1340.feature.rst | 22 +++++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/CHANGES/1340.feature.rst b/CHANGES/1340.feature.rst index 19927d990..777f72ea8 100644 --- a/CHANGES/1340.feature.rst +++ b/CHANGES/1340.feature.rst @@ -1,2 +1,22 @@ Added support for using the :meth:`subtraction operator ` -to get the relative path between URLs -- by :user:`oleksbabieiev`. +to get the relative path between URLs. + +Note that both URLs must have the same scheme and netloc: + +.. code-block:: pycon + + >>> target = URL("http://example.com/path/index.html") + >>> base = URL("http://example.com/") + >>> target - base + URL('path/index.html') + +URLs can also be relative and have no scheme or netloc at all: + +.. code-block:: pycon + + >>> target = URL("/") + >>> base = URL("/path/index.html") + >>> target - base + URL('..') + +-- by :user:`oleksbabieiev`. From 2b874784b761136ceb3f163c2bc1e8381d9ac1cb Mon Sep 17 00:00:00 2001 From: Oleksandr Babieiev Date: Mon, 21 Oct 2024 21:24:29 +0200 Subject: [PATCH 16/22] Update docs for `URL.__sub__()` --- docs/api.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/api.rst b/docs/api.rst index ccc1edce1..c6b5b5891 100644 --- a/docs/api.rst +++ b/docs/api.rst @@ -976,7 +976,7 @@ The path is encoded if needed. The subtraction (``-``) operator creates a new URL with a relative *path* to the target URL from the given base URL. -*scheme*, *user*, *password*, *host* and *port* are removed. +*scheme*, *user*, *password*, *host*, *port*, *query* and *fragment* are removed. .. method:: URL.__sub__(url) From 05c214797125badab07a6b26d6e560c74f2dc957 Mon Sep 17 00:00:00 2001 From: Oleksandr Babieiev Date: Mon, 21 Oct 2024 21:36:21 +0200 Subject: [PATCH 17/22] Add a PEP 257-compliant docstring for `URL.__sub__()` --- yarl/_url.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/yarl/_url.py b/yarl/_url.py index a290d6931..812ae89a7 100644 --- a/yarl/_url.py +++ b/yarl/_url.py @@ -476,6 +476,19 @@ def __truediv__(self, name: str) -> "URL": return self._make_child((str(name),)) def __sub__(self, other: object) -> "URL": + """Return a new URL with a relative path between two other URL objects. + + Note that both URLs must have the same scheme and netloc. + The new relative URL has only path: + scheme, user, password, host, port, query and fragment are removed. + + Example: + >>> target = URL("http://example.com/path/index.html") + >>> base = URL("http://example.com/") + >>> target - base + URL('path/index.html') + """ + if type(other) is not URL: return NotImplemented From 42c75aa37682ac195d73a760c029b5d51f6df701 Mon Sep 17 00:00:00 2001 From: Oleksandr Babieiev Date: Mon, 21 Oct 2024 22:36:38 +0200 Subject: [PATCH 18/22] Update CHANGES/1340.feature.rst --- CHANGES/1340.feature.rst | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/CHANGES/1340.feature.rst b/CHANGES/1340.feature.rst index 777f72ea8..c879569a0 100644 --- a/CHANGES/1340.feature.rst +++ b/CHANGES/1340.feature.rst @@ -1,7 +1,7 @@ Added support for using the :meth:`subtraction operator ` to get the relative path between URLs. -Note that both URLs must have the same scheme and netloc: +Note that both URLs must have the same scheme, user, password, host and port: .. code-block:: pycon @@ -10,7 +10,7 @@ Note that both URLs must have the same scheme and netloc: >>> target - base URL('path/index.html') -URLs can also be relative and have no scheme or netloc at all: +URLs can also be relative: .. code-block:: pycon From 7da15996e6b64b020dc7b9d70945a26c839c63a5 Mon Sep 17 00:00:00 2001 From: Oleksandr Babieiev Date: Tue, 22 Oct 2024 08:00:47 +0200 Subject: [PATCH 19/22] Avoid the `os` namespace --- tests/test_url.py | 13 ++++++++----- yarl/_path.py | 43 +++++++++++++++++++++++-------------------- 2 files changed, 31 insertions(+), 25 deletions(-) diff --git a/tests/test_url.py b/tests/test_url.py index f8540d770..9297fbf93 100644 --- a/tests/test_url.py +++ b/tests/test_url.py @@ -96,15 +96,18 @@ def test_sub_with_different_netlocs(): URL("https://spam.com/") - URL("https://ham.com/") -def test_sub_with_abs_and_rel_paths(): - expected_error_msg = ( - "It is forbidden to get the path between the absolute and relative paths " - "because it is impossible to get the current working directory." - ) +def test_sub_with_different_anchors(): + expected_error_msg = "'path/to' and '/path' have different anchors" with pytest.raises(ValueError, match=expected_error_msg): URL("path/to") - URL("/path/from") +def test_sub_with_two_dots_in_base(): + expected_error_msg = "'..' segment in '/path/..' cannot be walked" + with pytest.raises(ValueError, match=expected_error_msg): + URL("path/to") - URL("/path/../from") + + def test_repr(): url = URL("http://example.com") assert "URL('http://example.com')" == repr(url) diff --git a/yarl/_path.py b/yarl/_path.py index 7ecc4b948..ae01cf1b2 100644 --- a/yarl/_path.py +++ b/yarl/_path.py @@ -2,7 +2,7 @@ from collections.abc import Sequence from contextlib import suppress -from os.path import dirname, relpath +from pathlib import PurePath def normalize_path_segments(segments: Sequence[str]) -> list[str]: @@ -42,26 +42,29 @@ def normalize_path(path: str) -> str: return prefix + "/".join(normalize_path_segments(segments)) -def calculate_relative_path(path: str, start: str) -> str: - """A wrapper over os.path.relpath()""" +def calculate_relative_path(target: str, base: str) -> str: + """Return the relative path between two other paths. - if not path: - path = "/" - if not start: - start = "/" - if not start.endswith("/"): - start = dirname(start) + If the operation is not possible, raise ValueError. + """ - path_has_leading_slash = path.startswith("/") - start_has_leading_slash = start.startswith("/") - both_have_leading_slash = all((path_has_leading_slash, start_has_leading_slash)) - none_have_leading_slash = all( - (not path_has_leading_slash, not start_has_leading_slash) - ) - if not both_have_leading_slash and not none_have_leading_slash: + target = target or "/" + base = base or "/" + + target_path = PurePath(target) + base_path = PurePath(base) + + if not base.endswith("/"): + base_path = base_path.parent + + for step, path in enumerate([base_path] + list(base_path.parents)): + if target_path.is_relative_to(path): + break + elif path.name == "..": + raise ValueError(f"'..' segment in {str(base_path)!r} cannot be walked") + else: raise ValueError( - "It is forbidden to get the path between the absolute and relative paths " - "because it is impossible to get the current working directory." + f"{str(target_path)!r} and {str(base_path)!r} have different anchors" ) - - return relpath(path, start) + parts = [".."] * step + list(target_path.parts)[len(path.parts) :] + return str(PurePath(*parts)) From e252ff748eb6f6fd0222cfc0a313196d27b28764 Mon Sep 17 00:00:00 2001 From: Oleksandr Babieiev Date: Tue, 22 Oct 2024 08:18:54 +0200 Subject: [PATCH 20/22] Introduce the `offset` variable --- yarl/_path.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/yarl/_path.py b/yarl/_path.py index ae01cf1b2..4bbf58ab6 100644 --- a/yarl/_path.py +++ b/yarl/_path.py @@ -66,5 +66,6 @@ def calculate_relative_path(target: str, base: str) -> str: raise ValueError( f"{str(target_path)!r} and {str(base_path)!r} have different anchors" ) - parts = [".."] * step + list(target_path.parts)[len(path.parts) :] + offset = len(path.parts) + parts = [".."] * step + list(target_path.parts)[offset:] return str(PurePath(*parts)) From 39060d18aee3054e1ab2daeaa39b931f875a63f2 Mon Sep 17 00:00:00 2001 From: Oleksandr Babieiev Date: Tue, 22 Oct 2024 21:10:37 +0200 Subject: [PATCH 21/22] Replace `PurePath` with `PurePosixPath` --- yarl/_path.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/yarl/_path.py b/yarl/_path.py index 4bbf58ab6..b667dd09d 100644 --- a/yarl/_path.py +++ b/yarl/_path.py @@ -2,7 +2,7 @@ from collections.abc import Sequence from contextlib import suppress -from pathlib import PurePath +from pathlib import PurePosixPath def normalize_path_segments(segments: Sequence[str]) -> list[str]: @@ -51,8 +51,8 @@ def calculate_relative_path(target: str, base: str) -> str: target = target or "/" base = base or "/" - target_path = PurePath(target) - base_path = PurePath(base) + target_path = PurePosixPath(target) + base_path = PurePosixPath(base) if not base.endswith("/"): base_path = base_path.parent @@ -68,4 +68,4 @@ def calculate_relative_path(target: str, base: str) -> str: ) offset = len(path.parts) parts = [".."] * step + list(target_path.parts)[offset:] - return str(PurePath(*parts)) + return str(PurePosixPath(*parts)) From d683e16fbe31b60624e103d6348e79710a289235 Mon Sep 17 00:00:00 2001 From: Oleksandr Babieiev Date: Tue, 22 Oct 2024 21:48:05 +0200 Subject: [PATCH 22/22] Refactor `_path.py` Co-authored-by: J. Nick Koston --- yarl/_path.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/yarl/_path.py b/yarl/_path.py index b667dd09d..704f87242 100644 --- a/yarl/_path.py +++ b/yarl/_path.py @@ -67,5 +67,4 @@ def calculate_relative_path(target: str, base: str) -> str: f"{str(target_path)!r} and {str(base_path)!r} have different anchors" ) offset = len(path.parts) - parts = [".."] * step + list(target_path.parts)[offset:] - return str(PurePosixPath(*parts)) + return str(PurePosixPath(*("..",) * step, *target_path.parts[offset:]))