diff --git a/.bumpversion.toml b/.bumpversion.toml new file mode 100644 index 00000000..a86045a6 --- /dev/null +++ b/.bumpversion.toml @@ -0,0 +1,17 @@ +[tool.bumpversion] +current_version = "0.26.0" +commit = true +tag = true +tag_name = "{new_version}" + +[[tool.bumpversion.files]] +filename = 'CHANGES.rst' +search = "\\(unreleased\\)$" +replace = "({now:%Y-%m-%d})" +regex = true + +[[tool.bumpversion.files]] +filename = "docs/conf.py" + +[[tool.bumpversion.files]] +filename = "scrapy_zyte_api/__version__.py" diff --git a/CHANGES.rst b/CHANGES.rst index a2576ee4..e018c74e 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -1,6 +1,26 @@ Changes ======= +0.29.0 (unreleased) +------------------- + +* Cookie support is no longer experimental: + + * If the ``COOKIES_ENABLED`` setting is ``True`` (default), automatic request + parameter mapping now sets ``responseCookies`` to ``True`` and maps request + cookies to ``requestCookies``. + + * The ``ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED`` setting is now deprecated. + When enabled, however, the ``experimental`` name space is still used for + automatic request parameter mapping. + + * If you use ``requestCookies``, ``responseCookies``, or ``cookieManagement`` + within the ``experimental`` name space in request parameters, a deprecation + warning is now logged. + + * The ``responseCookies`` response parameter is now handled the same as + ``experimental.responseCookies``; the latter still works but is deprecated. + 0.28.0 (2025-02-18) ------------------- @@ -192,7 +212,6 @@ Changes * Fixed some documentation examples where the parameters of the ``check`` method of :setting:`ZYTE_API_SESSION_CHECKER` were in reverse order. - 0.20.0 (2024-06-26) ------------------- @@ -383,6 +402,7 @@ Changes work as expected with :class:`~scrapy.pqueues.DownloaderAwarePriorityQueue`. + 0.14.0 (2024-01-15) ------------------- @@ -394,6 +414,7 @@ Changes * Added support for ``zyte_common_items.JobPosting`` to the scrapy-poet provider. + 0.13.0 (2023-12-13) ------------------- @@ -436,6 +457,7 @@ Changes * Test and CI improvements. + 0.12.2 (2023-10-19) ------------------- @@ -444,6 +466,7 @@ Changes * When logging Zyte API requests, truncation now uses "..." instead of Unicode ellipsis. + 0.12.1 (2023-09-29) ------------------- @@ -471,7 +494,7 @@ Changes Experimental is treated as a namespace, and its parameters are the ones counted, i.e. there is no ``scrapy-zyte-api/request_args/experimental`` stat, but there are stats like - ``scrapy-zyte-api/request_args/experimental.responseCookies``. + ``scrapy-zyte-api/request_args/experimental.foo``. 0.11.1 (2023-08-25) diff --git a/docs/reference/fingerprint-params.rst b/docs/reference/fingerprint-params.rst index 86418503..273461c2 100644 --- a/docs/reference/fingerprint-params.rst +++ b/docs/reference/fingerprint-params.rst @@ -54,10 +54,9 @@ The following Zyte API parameters are *not* taken into account for request fingerprinting by default: - Request header parameters (:http:`request:customHttpRequestHeaders`, - :http:`request:requestHeaders`). + :http:`request:requestHeaders`, :http:`request:requestCookies`). -- Request cookie parameters (:http:`request:cookieManagement`, - :http:`request:requestCookies`). +- :http:`request:cookieManagement`. - :http:`request:sessionContextParameters`. diff --git a/docs/reference/request.rst b/docs/reference/request.rst index 1ae4f165..74c4418a 100644 --- a/docs/reference/request.rst +++ b/docs/reference/request.rst @@ -33,15 +33,14 @@ Automatic mapping If :http:`request:serp` is enabled, request header mapping is disabled. -- If :setting:`ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED` is ``True``, - :setting:`COOKIES_ENABLED ` is ``True`` (default), - and :attr:`Request.meta ` does not set +- If the :setting:`COOKIES_ENABLED ` is ``True`` + (default), and :attr:`Request.meta ` does not set :reqmeta:`dont_merge_cookies ` to ``True``: - - :http:`request:experimental.responseCookies` becomes ``True``. + - :http:`request:responseCookies` becomes ``True``. - Cookies from the :reqmeta:`cookiejar ` become - :http:`request:experimental.requestCookies`. + :http:`request:requestCookies`. All cookies from the cookie jar are set, regardless of their cookie domain. This is because Zyte API requests may involve requests to @@ -121,20 +120,18 @@ following parameters: "value": "application/json" } ], - "experimental": { - "requestCookies": [ - { - "name": "a", - "value": "b", - "domain": "" - } - ], - "responseCookies": true - }, "httpResponseBody": true, "httpResponseHeaders": true, "httpRequestBody": "eyJmb28iOiAiYmFyIn0=", "httpRequestMethod": "POST", + "requestCookies": [ + { + "name": "a", + "value": "b", + "domain": "" + } + ], + "responseCookies": true, "url": "https://httpbin.org/anything" } diff --git a/docs/reference/response.rst b/docs/reference/response.rst index 02608de6..63336d39 100644 --- a/docs/reference/response.rst +++ b/docs/reference/response.rst @@ -19,12 +19,11 @@ Zyte API response parameters are mapped into :ref:`response class `. - :http:`response:httpResponseHeaders` and - :http:`response:experimental.responseCookies` become - :class:`response.headers + :http:`response:responseCookies` become :class:`response.headers `. -- :http:`response:experimental.responseCookies` is also mapped into the - request :reqmeta:`cookiejar `. +- :http:`response:responseCookies` is also mapped into the request + :reqmeta:`cookiejar `. - :http:`response:browserHtml` and :http:`response:httpResponseBody` are mapped into both diff --git a/docs/reference/settings.rst b/docs/reference/settings.rst index c8edff34..9fff0cf8 100644 --- a/docs/reference/settings.rst +++ b/docs/reference/settings.rst @@ -136,16 +136,6 @@ Default: ``True`` Can be set to ``False`` to disable scrapy-zyte-api. -.. setting:: ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED - -ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED -===================================== - -Default: ``False`` - -See :ref:`request-automatic`. - - .. setting:: ZYTE_API_FALLBACK_REQUEST_FINGERPRINTER_CLASS ZYTE_API_FALLBACK_REQUEST_FINGERPRINTER_CLASS @@ -217,11 +207,11 @@ If the cookies to be set during :ref:`request mapping ` exceed this limit, a warning is logged, and only as many cookies as the limit allows are set for the target request. -To silence this warning, set :http:`request:experimental.requestCookies` -manually, e.g. to an empty :class:`dict`. +To silence this warning, set :http:`request:requestCookies` manually, e.g. to +an empty :class:`dict`. -Alternatively, if :http:`request:experimental.requestCookies` starts supporting -more than 100 cookies, update this setting accordingly. +Alternatively, if :http:`request:requestCookies` starts supporting more than +100 cookies, update this setting accordingly. .. setting:: ZYTE_API_MAX_REQUESTS diff --git a/docs/usage/automap.rst b/docs/usage/automap.rst index 600eb3a7..963d7560 100644 --- a/docs/usage/automap.rst +++ b/docs/usage/automap.rst @@ -70,9 +70,7 @@ following parameters: { "browserHtml": true, - "experimental": { - "responseCookies": true - }, + "responseCookies": true, "requestHeaders": {"referer": "https://example.com/"}, "url": "https://quotes.toscrape.com" } diff --git a/scrapy_zyte_api/_cookies.py b/scrapy_zyte_api/_cookies.py index 60ae268b..f20518ca 100644 --- a/scrapy_zyte_api/_cookies.py +++ b/scrapy_zyte_api/_cookies.py @@ -30,7 +30,8 @@ def _process_cookies( ): if not cookie_jars: return - response_cookies = api_response.get("experimental", {}).get("responseCookies") + old_response_cookies = api_response.get("experimental", {}).get("responseCookies") + response_cookies = api_response.get("responseCookies", old_response_cookies) if not response_cookies: return cookie_jar = _get_cookie_jar(request, cookie_jars) diff --git a/scrapy_zyte_api/_params.py b/scrapy_zyte_api/_params.py index 701d977b..d7e26208 100644 --- a/scrapy_zyte_api/_params.py +++ b/scrapy_zyte_api/_params.py @@ -619,64 +619,89 @@ def _set_http_response_headers_from_request( api_params.pop("httpResponseHeaders") -def _set_http_response_cookies_from_request( - *, - api_params: Dict[str, Any], -): - api_params.setdefault("experimental", {}) - api_params["experimental"].setdefault("responseCookies", True) - if api_params["experimental"]["responseCookies"] is False: - del api_params["experimental"]["responseCookies"] +def _handle_experimental_unnamespacing(api_params, request, experimental, field): + experimental_params = api_params.setdefault("experimental", {}) + if not experimental and field in experimental_params: + if field in api_params: + logger.warning( + f"Request {request!r} defines both {field} " + f"({api_params[field]}) and " + f"experimental.{field} " + f"({experimental_params[field]}). " + f"experimental.{field} will be ignored." + ) + del experimental_params[field] + else: + logger.warning( + f"Request {request!r} defines experimental.{field}. " + f"experimental.{field} will be removed, and its value " + f"will be set as {field}." + ) + api_params[field] = experimental_params.pop(field) + elif experimental and field in api_params: + if field in experimental_params: + logger.warning( + f"Request {request!r} defines both {field} " + f"({api_params[field]}) and " + f"experimental.{field} " + f"({experimental_params[field]}). Since the " + f"ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED setting is enabled, " + f"{field} will be removed, and its value will be set " + f"as experimental.{field}, overriding its current " + f"value." + ) + else: + logger.warning( + f"Request {request!r} defines {field}. Since the " + f"ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED setting is enabled, " + f"{field} will be removed, and its value will be set " + f"as experimental.{field}." + ) + experimental_params[field] = api_params.pop(field) + if not experimental_params: + del api_params["experimental"] -def _set_http_request_cookies_from_request( +def _set_http_response_cookies_from_request( *, api_params: Dict[str, Any], + experimental: bool, request: Request, - cookie_jars: Dict[Any, CookieJar], - max_cookies: int, ): - api_params.setdefault("experimental", {}) - if "requestCookies" in api_params["experimental"]: - request_cookies = api_params["experimental"]["requestCookies"] - if request_cookies is False: - del api_params["experimental"]["requestCookies"] - elif not request_cookies and isinstance(request_cookies, list): - logger.warning( - ( - "Request %(request)r is overriding automatic request " - "cookie mapping by explicitly setting " - "experimental.requestCookies to []. If this was your " - "intention, please use False instead of []. Otherwise, " - "stop defining experimental.requestCookies in your " - "request to let automatic mapping work." - ), - { - "request": request, - }, - ) + if "responseCookies" in api_params and api_params["responseCookies"] is False: + del api_params["responseCookies"] return + experimental_params = api_params.get("experimental", {}) + if ( + "responseCookies" in experimental_params + and experimental_params["responseCookies"] is False + ): + del experimental_params["responseCookies"] + if not experimental_params: + del api_params["experimental"] + return + if not experimental: + api_params.setdefault("responseCookies", True) + else: + api_params.setdefault("experimental", {}) + api_params["experimental"].setdefault("responseCookies", True) + + +def _get_output_cookies(request, cookie_jars, max_cookies, field): output_cookies = [] input_cookies = _get_all_cookies(request, cookie_jars) input_cookie_count = len(input_cookies) if input_cookie_count > max_cookies: logger.warning( - ( - "Request %(request)r would get %(count)r cookies, but request " - "cookie automatic mapping is limited to %(max)r cookies " - "(see the ZYTE_API_MAX_COOKIES setting), so only %(max)r " - "cookies have been added to this request. To silence this " - "warning, set the request cookies manually through the " - "experimental.requestCookies Zyte API parameter instead. " - "Alternatively, if Zyte API starts supporting more than " - "%(max)r request cookies, update the ZYTE_API_MAX_COOKIES " - "setting accordingly." - ), - { - "request": request, - "count": input_cookie_count, - "max": max_cookies, - }, + f"Request {request!r} would get {input_cookie_count!r} cookies, " + f"but request cookie automatic mapping is limited to " + f"{max_cookies!r} cookies (see the ZYTE_API_MAX_COOKIES setting), " + f"so only {max_cookies!r} cookies have been added to this " + f"request. To silence this warning, set the request cookies " + f"manually through the {field} Zyte API parameter instead. " + f"Alternatively, if Zyte API starts supporting more than " + f"{max_cookies!r} request cookies, update the ZYTE_API_MAX_COOKIES " + f"setting accordingly." ) input_cookies = input_cookies[:max_cookies] for input_cookie in input_cookies: @@ -688,8 +713,71 @@ def _set_http_request_cookies_from_request( if input_cookie.path_specified: output_cookie["path"] = input_cookie.path output_cookies.append(output_cookie) - if output_cookies: - api_params["experimental"]["requestCookies"] = output_cookies + return output_cookies + + +def _set_http_request_cookies_from_request( + *, + api_params: Dict[str, Any], + request: Request, + cookie_jars: Dict[Any, CookieJar], + max_cookies: int, + experimental: bool, +): + if "requestCookies" in api_params: + request_cookies = api_params["requestCookies"] + if not request_cookies: + del api_params["requestCookies"] + # Note: We do not warn about setting requestCookies to False + # when there is no need (i.e. no input_cookies below) because + # input cookies can change at run time due to cookiejars, so + # False may make sense for some iterations of the code. + if isinstance(request_cookies, list): + logger.warning( + f"Request {request!r} is overriding automatic request " + f"cookie mapping by explicitly setting " + f"requestCookies to []. If this was your intention, " + f"please use False instead of []. Otherwise, stop " + f"defining requestCookies in your request to let " + f"automatic mapping work." + ) + return + + experimental_params = api_params.get("experimental", {}) + if "requestCookies" in experimental_params: + request_cookies = experimental_params["requestCookies"] + if not request_cookies: + del experimental_params["requestCookies"] + if not experimental_params: + del api_params["experimental"] + # Note: We do not warn about setting requestCookies to False + # when there is no need (i.e. no input_cookies below) because + # input cookies can change at run time due to cookiejars, so + # False may make sense for some iterations of the code. + if isinstance(request_cookies, list): + logger.warning( + f"Request {request!r} is overriding automatic request " + f"cookie mapping by explicitly setting " + f"experimental.requestCookies to []. If this was your " + f"intention, please use False instead of []. " + f"Otherwise, stop defining " + f"experimental.requestCookies in your request to let " + f"automatic mapping work." + ) + return + + if not experimental: + output_cookies = _get_output_cookies( + request, cookie_jars, max_cookies, "requestCookies" + ) + if output_cookies: + api_params["requestCookies"] = output_cookies + else: + output_cookies = _get_output_cookies( + request, cookie_jars, max_cookies, "experimental.requestCookies" + ) + if output_cookies: + api_params.setdefault("experimental", {})["requestCookies"] = output_cookies def _set_http_request_method_from_request( @@ -746,13 +834,14 @@ def _unset_unneeded_api_params( default_params: Dict[str, Any], request: Request, ): + experimental_had_content = bool(api_params.get("experimental", None)) for param, default_value in _DEFAULT_API_PARAMS.items(): value = api_params.get(param, _Undefined) - if value is _Undefined: + if value is _Undefined or value != default_value: continue - if value != default_value: - continue - if param not in default_params or default_params.get(param) == default_value: + if ( + param not in default_params or default_params.get(param) == default_value + ) and (param != "experimental" or not experimental_had_content): logger.warning( f"Request {request} unnecessarily defines the Zyte API {param!r} " f"parameter with its default value, {default_value!r}. It will " @@ -773,7 +862,25 @@ def _update_api_params_from_request( cookies_enabled: bool, cookie_jars: Optional[Dict[Any, CookieJar]], max_cookies: int, + experimental_cookies: bool, + unreported_deprecated_experimental_fields: Set[str], ): + for field in ("responseCookies", "requestCookies", "cookieManagement"): + if ( + field in unreported_deprecated_experimental_fields + and field in api_params.get("experimental", {}) + ): + unreported_deprecated_experimental_fields.remove(field) + logger.warning( + f"Zyte API parameters for request {request} include " + f"experimental.{field}, which is deprecated. Please, " + f"replace it with {field}, both in request parameters " + f"and in any response parsing logic that might rely " + f"on the old parameter." + ) + _handle_experimental_unnamespacing( + api_params, request, experimental_cookies, field + ) _set_http_response_body_from_request(api_params=api_params, request=request) _set_http_response_headers_from_request( api_params=api_params, @@ -790,16 +897,21 @@ def _update_api_params_from_request( ) _set_http_request_body_from_request(api_params=api_params, request=request) if cookies_enabled: - assert cookie_jars is not None # typing - _set_http_response_cookies_from_request(api_params=api_params) - _set_http_request_cookies_from_request( + _set_http_response_cookies_from_request( api_params=api_params, + experimental=experimental_cookies, request=request, - cookie_jars=cookie_jars, - max_cookies=max_cookies, ) - if not api_params["experimental"]: - del api_params["experimental"] + # cookie_jars can be None when the param parser is used for request + # fingerprinting, in which case request cookies are not relevant. + if cookie_jars is not None: + _set_http_request_cookies_from_request( + api_params=api_params, + request=request, + cookie_jars=cookie_jars, + max_cookies=max_cookies, + experimental=experimental_cookies, + ) _unset_unneeded_api_params( api_params=api_params, request=request, default_params=default_params ) @@ -905,6 +1017,8 @@ def _get_automap_params( cookies_enabled: bool, cookie_jars: Optional[Dict[Any, CookieJar]], max_cookies: int, + experimental_cookies: bool, + unreported_deprecated_experimental_fields: Set[str], ): meta_params = request.meta.get("zyte_api_automap", default_enabled) if meta_params is False: @@ -935,6 +1049,8 @@ def _get_automap_params( cookies_enabled=cookies_enabled, cookie_jars=cookie_jars, max_cookies=max_cookies, + experimental_cookies=experimental_cookies, + unreported_deprecated_experimental_fields=unreported_deprecated_experimental_fields, ) return params @@ -953,6 +1069,8 @@ def _get_api_params( cookies_enabled: bool, cookie_jars: Optional[Dict[Any, CookieJar]], max_cookies: int, + experimental_cookies: bool, + unreported_deprecated_experimental_fields: Set[str], ) -> Optional[dict]: """Returns a dictionary of API parameters that must be sent to Zyte API for the specified request, or None if the request should not be sent through @@ -969,6 +1087,8 @@ def _get_api_params( cookies_enabled=cookies_enabled, cookie_jars=cookie_jars, max_cookies=max_cookies, + experimental_cookies=experimental_cookies, + unreported_deprecated_experimental_fields=unreported_deprecated_experimental_fields, ) if api_params is None: return None @@ -977,6 +1097,22 @@ def _get_api_params( f"Request {request} combines manually-defined parameters and " f"automatically-mapped parameters." ) + else: + if ( + api_params + and unreported_deprecated_experimental_fields + and "experimental" in api_params + ): + for field in list(unreported_deprecated_experimental_fields): + if field in api_params["experimental"]: + unreported_deprecated_experimental_fields.remove(field) + logger.warning( + f"Zyte API parameters for request {request} include " + f"experimental.{field}, which is deprecated. Please, " + f"replace it with {field}, both in request parameters " + f"and in any response parsing logic that might rely " + f"on the old parameter." + ) if job_id is not None: api_params["jobId"] = job_id @@ -1062,22 +1198,48 @@ def __init__(self, crawler, cookies_enabled=None): self._transparent_mode = settings.getbool("ZYTE_API_TRANSPARENT_MODE", False) self._http_skip_headers = _load_http_skip_headers(settings) self._mw_skip_headers = _load_mw_skip_headers(crawler) - self._warn_on_cookies = False + self._experimental_cookies = settings.getbool( + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED" + ) + if self._experimental_cookies: + logger.warning( + "The deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED setting " + "is set to True. Please, remove the deprecated " + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED setting, and remove " + "the experimental name space from the responseCookies and " + "requestCookies parameters in your code (if any), both when " + "building requests and when parsing responses.", + ) if cookies_enabled is not None: self._cookies_enabled = cookies_enabled - elif settings.getbool("ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED") is True: + else: self._cookies_enabled = settings.getbool("COOKIES_ENABLED") - if not self._cookies_enabled: + if not self._cookies_enabled and self._experimental_cookies: logger.warning( - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED is True, but it " - "will have no effect because COOKIES_ENABLED is False." + "The deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED " + "setting is True, but it will have no effect because the " + "COOKIES_ENABLED setting is False. To silence this " + "warning, remove the deprecated " + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED setting. To enable " + "automatic cookie mapping, set COOKIES_ENABLED to True. " + "Please, consider removing the deprecated " + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED setting, and " + "removing the experimental name space from the " + "responseCookies and requestCookies parameters in your " + "code (if any), both when building requests and when " + "parsing responses.", ) - else: - self._cookies_enabled = False - self._warn_on_cookies = settings.getbool("COOKIES_ENABLED") self._max_cookies = settings.getint("ZYTE_API_MAX_COOKIES", 100) self._crawler = crawler self._cookie_jars = None + if not self._experimental_cookies: + self._unreported_deprecated_experimental_fields = { + "requestCookies", + "responseCookies", + "cookieManagement", + } + else: + self._unreported_deprecated_experimental_fields = set() def _request_skip_headers(self, request): result = dict(self._mw_skip_headers) @@ -1103,32 +1265,7 @@ def parse(self, request): cookies_enabled=cookies_enabled, cookie_jars=self._cookie_jars, max_cookies=self._max_cookies, + experimental_cookies=self._experimental_cookies, + unreported_deprecated_experimental_fields=self._unreported_deprecated_experimental_fields, ) - if not dont_merge_cookies and self._warn_on_cookies: - self._handle_warn_on_cookies(request, params) return params - - def _handle_warn_on_cookies(self, request, params): - if params and params.get("experimental", {}).get("requestCookies") is not None: - return - if self._cookie_jars is None: - return - input_cookies = _get_all_cookies(request, self._cookie_jars) - if len(input_cookies) <= 0: - return - logger.warning( - ( - "Cookies are enabled for request %(request)r, and there are " - "cookies in the cookiejar, but " - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED is False, so automatic " - "mapping will not map cookies for this or any other request. " - "To silence this warning, disable cookies for all requests " - "that use automatic mapping, either with the " - "COOKIES_ENABLED setting or with the dont_merge_cookies " - "request metadata key." - ), - { - "request": request, - }, - ) - self._warn_on_cookies = False diff --git a/scrapy_zyte_api/_request_fingerprinter.py b/scrapy_zyte_api/_request_fingerprinter.py index 1750f351..0be90413 100644 --- a/scrapy_zyte_api/_request_fingerprinter.py +++ b/scrapy_zyte_api/_request_fingerprinter.py @@ -27,6 +27,8 @@ from ._params import _REQUEST_PARAMS, _may_use_browser, _ParamParser from .utils import _build_from_crawler + _Undefined = object() + class ScrapyZyteAPIRequestFingerprinter: @classmethod def from_crawler(cls, crawler): @@ -71,7 +73,7 @@ def __init__(self, crawler): ) self._has_poet = False self._cache: "WeakKeyDictionary[Request, bytes]" = WeakKeyDictionary() - self._param_parser = _ParamParser(crawler, cookies_enabled=False) + self._param_parser = _ParamParser(crawler) self._crawler = crawler def _normalize_params(self, api_params): @@ -85,9 +87,19 @@ def _normalize_params(self, api_params): api_params.pop("httpRequestText").encode() ).decode() + if ( + "responseCookies" not in api_params + and "responseCookies" in api_params.get("experimental", {}) + ): + api_params["responseCookies"] = api_params["experimental"].pop( + "responseCookies" + ) + for key, value in _REQUEST_PARAMS.items(): if not value.get("changes_fingerprint", True): api_params.pop(key, None) + elif value["default"] == api_params.get(key, _Undefined): + api_params.pop(key) @cached_property def _session_mw(self): diff --git a/scrapy_zyte_api/responses.py b/scrapy_zyte_api/responses.py index 941754f7..5a18b06f 100644 --- a/scrapy_zyte_api/responses.py +++ b/scrapy_zyte_api/responses.py @@ -90,9 +90,14 @@ def _prepare_headers(cls, api_response: Dict[str, Any]): input_headers: Optional[List[Dict[str, str]]] = api_response.get( "httpResponseHeaders" ) - response_cookies: Optional[List[Dict[str, str]]] = api_response.get( + deprecated_response_cookies: Optional[List[Dict[str, str]]] = api_response.get( "experimental", {} ).get("responseCookies") + response_cookies: Optional[List[Dict[str, str]]] = api_response.get( + "responseCookies", deprecated_response_cookies + ) + # Note: We do not warn about deprecated experimental cookie use because + # _process_cookies is called earlier and already takes care of that. if input_headers: headers_to_remove = copy(cls.REMOVE_HEADERS) if response_cookies: @@ -180,11 +185,10 @@ def _process_response( on which if it can properly decode the HTTP Body or have access to browserHtml. """ - # NOTES: Currently, Zyte API does NOT only allow both 'browserHtml' and + # NOTES: Currently, Zyte API does NOT allow both 'browserHtml' and # 'httpResponseBody' to be present at the same time. The support for both # will be addressed in the future. Reference: # - https://github.com/scrapy-plugins/scrapy-zyte-api/pull/10#issuecomment-1131406460 - # For now, at least one of them should be present. _process_cookies(api_response, request, cookie_jars) diff --git a/tests/__init__.py b/tests/__init__.py index 3a216f8e..0543993c 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -17,6 +17,12 @@ _API_KEY = "a" +DEFAULT_AUTOMAP_PARAMS: Dict[str, Any] = { + "httpResponseBody": True, + "httpResponseHeaders": True, + "responseCookies": True, +} + DEFAULT_CLIENT_CONCURRENCY = AsyncClient(api_key=_API_KEY).n_conn SETTINGS_T = Dict[str, Any] SETTINGS: SETTINGS_T = { diff --git a/tests/test_api_requests.py b/tests/test_api_requests.py index 5f1eeb88..456fd197 100644 --- a/tests/test_api_requests.py +++ b/tests/test_api_requests.py @@ -6,7 +6,7 @@ from functools import partial from http.cookiejar import Cookie from inspect import isclass -from typing import Any, Dict, Type, cast +from typing import Any, Dict, List, Type, cast from unittest import mock import pytest @@ -29,6 +29,7 @@ from scrapy_zyte_api.responses import _process_response from . import ( + DEFAULT_AUTOMAP_PARAMS, DEFAULT_CLIENT_CONCURRENCY, SETTINGS, SETTINGS_T, @@ -102,7 +103,7 @@ async def test_response_binary(meta: Dict[str, Dict[str, Any]], mockserver): [ {"browserHtml": True, "httpResponseHeaders": True}, {"browserHtml": True}, - {"httpResponseBody": True, "httpResponseHeaders": True}, + DEFAULT_AUTOMAP_PARAMS, pytest.param( {"httpResponseBody": True}, marks=pytest.mark.xfail( @@ -203,18 +204,20 @@ async def test_coro_handling(zyte_api: bool, mockserver): ], ) async def test_exceptions( - caplog: LogCaptureFixture, meta: Dict[str, Dict[str, Any]], exception_type: Type[Exception], exception_text: str, mockserver, + caplog: LogCaptureFixture, ): caplog.set_level("DEBUG") async with mockserver.make_handler() as handler: req = Request("http://example.com", method="POST", meta=meta) with pytest.raises(exception_type): await handler.download_request(req, None) - assert exception_text in caplog.text + _assert_log_messages( + caplog, [exception_text], levelname="DEBUG", allow_other_messages=True + ) @ensureDeferred @@ -280,8 +283,9 @@ async def parse(self, response): b"cookie": ANY_VALUE, } JOB_ID = None -COOKIES_ENABLED = False +COOKIES_ENABLED = True MAX_COOKIES = 100 +EXPERIMENTAL_COOKIES = False GET_API_PARAMS_KWARGS = { "default_params": DEFAULT_PARAMS, "transparent_mode": TRANSPARENT_MODE, @@ -291,6 +295,7 @@ async def parse(self, response): "job_id": JOB_ID, "cookies_enabled": COOKIES_ENABLED, "max_cookies": MAX_COOKIES, + "experimental_cookies": EXPERIMENTAL_COOKIES, } @@ -300,7 +305,7 @@ async def test_params_parser_input_default(mockserver): for key in GET_API_PARAMS_KWARGS: actual = getattr(handler._param_parser, f"_{key}") expected = GET_API_PARAMS_KWARGS[key] - assert actual == expected, key + assert expected == actual, key @ensureDeferred @@ -325,6 +330,7 @@ async def test_param_parser_input_custom(mockserver): b"a": ANY_VALUE, } assert parser._transparent_mode is True + assert parser._experimental_cookies is True @ensureDeferred @@ -356,12 +362,6 @@ async def test_param_parser_output_side_effects(output, uses_zyte_api, mockserve handler._fallback_handler.download_request.assert_called() -DEFAULT_AUTOMAP_PARAMS: Dict[str, Any] = { - "httpResponseBody": True, - "httpResponseHeaders": True, -} - - @pytest.mark.parametrize( "setting,meta,expected", [ @@ -454,7 +454,7 @@ async def test_transparent_mode_toggling(setting, meta, expected): api_params = func() if api_params is not None: api_params.pop("url") - assert api_params == expected + assert expected == api_params @pytest.mark.parametrize("meta", [None, 0, "", b"", [], ()]) @@ -528,8 +528,13 @@ async def test_default_params_none(mockserver, caplog): async with mockserver.make_handler(settings) as handler: assert handler._param_parser._automap_params == {"e": "f"} assert handler._param_parser._default_params == {"b": "c"} - assert "Parameter 'a' in the ZYTE_API_DEFAULT_PARAMS setting is None" in caplog.text - assert "Parameter 'd' in the ZYTE_API_AUTOMAP_PARAMS setting is None" in caplog.text + _assert_log_messages( + caplog, + [ + "Parameter 'a' in the ZYTE_API_DEFAULT_PARAMS setting is None", + "Parameter 'd' in the ZYTE_API_AUTOMAP_PARAMS setting is None", + ], + ) @pytest.mark.parametrize( @@ -594,12 +599,8 @@ async def test_default_params_merging( for key in ignore_keys: api_params.pop(key) api_params.pop("url") - assert api_params == expected - if warnings: - for warning in warnings: - assert warning in caplog.text - else: - assert not caplog.records + assert expected == api_params + _assert_log_messages(caplog, warnings) @pytest.mark.parametrize( @@ -647,6 +648,47 @@ async def test_default_params_immutability(setting_key, meta_key, setting, meta) assert default_params == setting +def _assert_log_messages( + caplog, messages, *, levelname="WARNING", allow_other_messages=False +): + seen_messages = { + record.getMessage(): False + for record in caplog.records + if record.levelname == levelname + } + if messages: + for message in messages: + matched = False + for seen_message in list(seen_messages): + if message in seen_message: + if seen_messages[seen_message] is True: + raise AssertionError( + f"Expected {levelname} message {message!r} matches more than " + f"1 seen {levelname} messages (all seen {levelname} messages: " + f"{list(seen_messages)!r})" + ) + seen_messages[seen_message] = True + matched = True + break + if not matched: + raise AssertionError( + f"Expected {levelname} message {message!r} not found in {list(seen_messages)!r}" + ) + if not allow_other_messages: + unexpected_messages = [ + message + for message, is_expected in seen_messages.items() + if not is_expected + ] + if unexpected_messages: + raise AssertionError( + f"Got unexpected {levelname} messages: {unexpected_messages}" + ) + else: + assert not seen_messages + caplog.clear() + + async def _test_automap( settings, request_kwargs, meta, expected, warnings, caplog, cookie_jar=None ): @@ -689,12 +731,8 @@ async def _test_automap( with caplog.at_level("WARNING"): api_params = param_parser.parse(request) api_params.pop("url") - assert api_params == expected - if warnings: - for warning in warnings: - assert warning in caplog.text - else: - assert not caplog.records + assert expected == api_params + _assert_log_messages(caplog, warnings) @pytest.mark.parametrize( @@ -702,12 +740,11 @@ async def _test_automap( [ # If no other known main output is specified in meta, httpResponseBody # is requested. - ({}, {"httpResponseBody": True, "httpResponseHeaders": True}, []), + ({}, DEFAULT_AUTOMAP_PARAMS, []), ( {"unknownMainOutput": True}, { - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, "unknownMainOutput": True, }, [], @@ -717,29 +754,29 @@ async def _test_automap( # may stop working for binary responses in the future. ( {"httpResponseBody": True}, - {"httpResponseBody": True, "httpResponseHeaders": True}, + DEFAULT_AUTOMAP_PARAMS, [], ), - # If other main outputs are specified in meta, httpRequestBody is not - # set. + # If other main outputs are specified in meta, httpResponseBody and + # httpResponseHeaders are not set. ( {"browserHtml": True}, - {"browserHtml": True}, + {"browserHtml": True, "responseCookies": True}, [], ), ( {"screenshot": True}, - {"screenshot": True}, + {"screenshot": True, "responseCookies": True}, [], ), ( {EXTRACT_KEY: True}, - {EXTRACT_KEY: True}, + {EXTRACT_KEY: True, "responseCookies": True}, [], ), ( {"browserHtml": True, "screenshot": True}, - {"browserHtml": True, "screenshot": True}, + {"browserHtml": True, "screenshot": True, "responseCookies": True}, [], ), # If no known main output is specified, and httpResponseBody is @@ -747,12 +784,12 @@ async def _test_automap( # is added. ( {"httpResponseBody": False}, - {}, + {"responseCookies": True}, [], ), ( {"httpResponseBody": False, "unknownMainOutput": True}, - {"unknownMainOutput": True}, + {"unknownMainOutput": True, "responseCookies": True}, [], ), # We allow httpResponseBody and browserHtml to be both set to True, in @@ -761,8 +798,7 @@ async def _test_automap( {"httpResponseBody": True, "browserHtml": True}, { "browserHtml": True, - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, }, [], ), @@ -770,12 +806,16 @@ async def _test_automap( # httpResponseBody. ( {"httpResponseHeaders": True}, - {"httpResponseBody": True, "httpResponseHeaders": True}, + DEFAULT_AUTOMAP_PARAMS, [], ), ( {"httpResponseBody": False, "httpResponseHeaders": True}, - {"httpResponseHeaders": True}, + { + k: v + for k, v in DEFAULT_AUTOMAP_PARAMS.items() + if k != "httpResponseBody" + }, [], ), ], @@ -796,22 +836,22 @@ async def test_automap_main_outputs(meta, expected, warnings, caplog): # not be implicitly set to True, it is passed as such. ( {"httpResponseBody": False, "httpResponseHeaders": True}, - {"httpResponseHeaders": True}, + {"httpResponseHeaders": True, "responseCookies": True}, [], ), ( {"browserHtml": True, "httpResponseHeaders": True}, - {"browserHtml": True, "httpResponseHeaders": True}, + {"browserHtml": True, "httpResponseHeaders": True, "responseCookies": True}, [], ), ( {"screenshot": True, "httpResponseHeaders": True}, - {"screenshot": True, "httpResponseHeaders": True}, + {"screenshot": True, "httpResponseHeaders": True, "responseCookies": True}, [], ), ( {EXTRACT_KEY: True, "httpResponseHeaders": True}, - {EXTRACT_KEY: True, "httpResponseHeaders": True}, + {EXTRACT_KEY: True, "httpResponseHeaders": True, "responseCookies": True}, [], ), ( @@ -820,7 +860,11 @@ async def test_automap_main_outputs(meta, expected, warnings, caplog): "httpResponseBody": False, "httpResponseHeaders": True, }, - {"unknownMainOutput": True, "httpResponseHeaders": True}, + { + "unknownMainOutput": True, + "httpResponseHeaders": True, + "responseCookies": True, + }, [], ), # Setting httpResponseHeaders to True where it would be already True @@ -830,12 +874,12 @@ async def test_automap_main_outputs(meta, expected, warnings, caplog): # stops being set to True by default in those scenarios. ( {"httpResponseHeaders": True}, - {"httpResponseBody": True, "httpResponseHeaders": True}, + DEFAULT_AUTOMAP_PARAMS, [], ), ( - {"httpResponseBody": True, "httpResponseHeaders": True}, - {"httpResponseBody": True, "httpResponseHeaders": True}, + DEFAULT_AUTOMAP_PARAMS, + DEFAULT_AUTOMAP_PARAMS, [], ), ( @@ -846,8 +890,7 @@ async def test_automap_main_outputs(meta, expected, warnings, caplog): }, { "browserHtml": True, - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, }, [], ), @@ -855,18 +898,29 @@ async def test_automap_main_outputs(meta, expected, warnings, caplog): {"unknownMainOutput": True, "httpResponseHeaders": True}, { "unknownMainOutput": True, - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, }, [], ), # If httpResponseHeaders is set to False, httpResponseHeaders is not # defined, even if httpResponseBody is set to True, implicitly or # explicitly. - ({"httpResponseHeaders": False}, {"httpResponseBody": True}, []), + ( + {"httpResponseHeaders": False}, + { + k: v + for k, v in DEFAULT_AUTOMAP_PARAMS.items() + if k != "httpResponseHeaders" + }, + [], + ), ( {"httpResponseBody": True, "httpResponseHeaders": False}, - {"httpResponseBody": True}, + { + k: v + for k, v in DEFAULT_AUTOMAP_PARAMS.items() + if k != "httpResponseHeaders" + }, [], ), ( @@ -875,12 +929,26 @@ async def test_automap_main_outputs(meta, expected, warnings, caplog): "browserHtml": True, "httpResponseHeaders": False, }, - {"browserHtml": True, "httpResponseBody": True}, + { + "browserHtml": True, + **{ + k: v + for k, v in DEFAULT_AUTOMAP_PARAMS.items() + if k != "httpResponseHeaders" + }, + }, [], ), ( {"unknownMainOutput": True, "httpResponseHeaders": False}, - {"unknownMainOutput": True, "httpResponseBody": True}, + { + "unknownMainOutput": True, + **{ + k: v + for k, v in DEFAULT_AUTOMAP_PARAMS.items() + if k != "httpResponseHeaders" + }, + }, [], ), # If httpResponseHeaders is unnecessarily set to False where @@ -889,22 +957,47 @@ async def test_automap_main_outputs(meta, expected, warnings, caplog): # logged. ( {"httpResponseBody": False, "httpResponseHeaders": False}, - {}, + { + k: v + for k, v in DEFAULT_AUTOMAP_PARAMS.items() + if k not in {"httpResponseBody", "httpResponseHeaders"} + }, ["do not need to set httpResponseHeaders to False"], ), ( {"browserHtml": True, "httpResponseHeaders": False}, - {"browserHtml": True}, + { + "browserHtml": True, + **{ + k: v + for k, v in DEFAULT_AUTOMAP_PARAMS.items() + if k not in {"httpResponseBody", "httpResponseHeaders"} + }, + }, ["do not need to set httpResponseHeaders to False"], ), ( {"screenshot": True, "httpResponseHeaders": False}, - {"screenshot": True}, + { + "screenshot": True, + **{ + k: v + for k, v in DEFAULT_AUTOMAP_PARAMS.items() + if k not in {"httpResponseBody", "httpResponseHeaders"} + }, + }, ["do not need to set httpResponseHeaders to False"], ), ( {EXTRACT_KEY: True, "httpResponseHeaders": False}, - {EXTRACT_KEY: True}, + { + EXTRACT_KEY: True, + **{ + k: v + for k, v in DEFAULT_AUTOMAP_PARAMS.items() + if k not in {"httpResponseBody", "httpResponseHeaders"} + }, + }, ["do not need to set httpResponseHeaders to False"], ), ( @@ -913,7 +1006,14 @@ async def test_automap_main_outputs(meta, expected, warnings, caplog): "httpResponseBody": False, "httpResponseHeaders": False, }, - {"unknownMainOutput": True}, + { + "unknownMainOutput": True, + **{ + k: v + for k, v in DEFAULT_AUTOMAP_PARAMS.items() + if k not in {"httpResponseBody", "httpResponseHeaders"} + }, + }, ["do not need to set httpResponseHeaders to False"], ), ], @@ -930,10 +1030,7 @@ async def test_automap_header_output(meta, expected, warnings, caplog): ( "GET", {}, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, + DEFAULT_AUTOMAP_PARAMS, [], ), # Other HTTP methods, regardless of whether they are supported, @@ -944,8 +1041,7 @@ async def test_automap_header_output(meta, expected, warnings, caplog): method, {}, { - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, "httpRequestMethod": method, }, [], @@ -968,18 +1064,17 @@ async def test_automap_header_output(meta, expected, warnings, caplog): ( None, {"httpRequestMethod": "GET"}, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, - ["Use Request.method"], + DEFAULT_AUTOMAP_PARAMS, + [ + "Use Request.method", + "unnecessarily defines the Zyte API 'httpRequestMethod' parameter with its default value", + ], ), ( "POST", {"httpRequestMethod": "POST"}, { - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, "httpRequestMethod": "POST", }, ["Use Request.method"], @@ -990,21 +1085,18 @@ async def test_automap_header_output(meta, expected, warnings, caplog): ( "POST", {"httpRequestMethod": "GET"}, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, + DEFAULT_AUTOMAP_PARAMS, [ "Use Request.method", "does not match the Zyte API httpRequestMethod", + "unnecessarily defines the Zyte API 'httpRequestMethod' parameter with its default value", ], ), ( "POST", {"httpRequestMethod": "PUT"}, { - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, "httpRequestMethod": "PUT", }, [ @@ -1020,6 +1112,7 @@ async def test_automap_header_output(meta, expected, warnings, caplog): { "browserHtml": True, "httpRequestMethod": "POST", + "responseCookies": True, }, [], ), @@ -1029,6 +1122,7 @@ async def test_automap_header_output(meta, expected, warnings, caplog): { "screenshot": True, "httpRequestMethod": "POST", + "responseCookies": True, }, [], ), @@ -1038,6 +1132,7 @@ async def test_automap_header_output(meta, expected, warnings, caplog): { EXTRACT_KEY: True, "httpRequestMethod": "POST", + "responseCookies": True, }, [], ), @@ -1045,7 +1140,10 @@ async def test_automap_header_output(meta, expected, warnings, caplog): ) @ensureDeferred async def test_automap_method(method, meta, expected, warnings, caplog): - await _test_automap({}, {"method": method}, meta, expected, warnings, caplog) + request_kwargs = {} + if method is not None: + request_kwargs["method"] = method + await _test_automap({}, request_kwargs, meta, expected, warnings, caplog) @pytest.mark.parametrize( @@ -1060,8 +1158,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): "customHttpRequestHeaders": [ {"name": "Referer", "value": "a"}, ], - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, }, [], ), @@ -1073,6 +1170,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): { "browserHtml": True, "requestHeaders": {"referer": "a"}, + "responseCookies": True, }, [], ), @@ -1082,6 +1180,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): { "requestHeaders": {"referer": "a"}, "screenshot": True, + "responseCookies": True, }, [], ), @@ -1091,6 +1190,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): { "requestHeaders": {"referer": "a"}, EXTRACT_KEY: True, + "responseCookies": True, }, [], ), @@ -1105,8 +1205,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): "customHttpRequestHeaders": [ {"name": "Referer", "value": "a"}, ], - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, "requestHeaders": {"referer": "a"}, }, [], @@ -1118,8 +1217,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): "customHttpRequestHeaders": [ {"name": "Referer", "value": "a"}, ], - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, "requestHeaders": {"referer": "a"}, "screenshot": True, }, @@ -1133,8 +1231,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): "customHttpRequestHeaders": [ {"name": "Referer", "value": "a"}, ], - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, "requestHeaders": {"referer": "a"}, "screenshot": True, }, @@ -1150,8 +1247,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): "customHttpRequestHeaders": [ {"name": "Referer", "value": "a"}, ], - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, EXTRACT_KEY: True, }, [], @@ -1167,8 +1263,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): "customHttpRequestHeaders": [ {"name": "Referer", "value": "a"}, ], - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, EXTRACT_KEY: True, f"{EXTRACT_KEY}Options": {"extractFrom": "httpResponseBody"}, }, @@ -1186,6 +1281,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): ], EXTRACT_KEY: True, f"{EXTRACT_KEY}Options": {"extractFrom": "httpResponseBody"}, + "responseCookies": True, }, [], ), @@ -1200,8 +1296,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): "customHttpRequestHeaders": [ {"name": "Referer", "value": "a"}, ], - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, f"{EXTRACT_KEY}Options": {"extractFrom": "browserHtml"}, "requestHeaders": {"referer": "a"}, EXTRACT_KEY: True, @@ -1224,8 +1319,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): "customHttpRequestHeaders": [ {"name": "Referer", "value": "a"}, ], - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, "unknownMainOutput": True, }, [], @@ -1239,6 +1333,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): {"httpResponseBody": False}, { "requestHeaders": {"referer": "a"}, + "responseCookies": True, }, [], ), @@ -1248,6 +1343,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): { "requestHeaders": {"referer": "a"}, "unknownMainOutput": True, + "responseCookies": True, }, [], ), @@ -1255,10 +1351,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): ( {"Referer": "a"}, {"customHttpRequestHeaders": False}, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, + DEFAULT_AUTOMAP_PARAMS, [], ), ( @@ -1266,6 +1359,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): {"browserHtml": True, "requestHeaders": False}, { "browserHtml": True, + "responseCookies": True, }, [], ), @@ -1278,8 +1372,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): }, { "browserHtml": True, - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, "requestHeaders": {"referer": "a"}, }, [], @@ -1292,8 +1385,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): "customHttpRequestHeaders": [ {"name": "Referer", "value": "a"}, ], - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, }, [], ), @@ -1307,8 +1399,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): }, { "browserHtml": True, - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, }, [], ), @@ -1320,8 +1411,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): "customHttpRequestHeaders": [ {"name": "Referer", "value": "a"}, ], - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, "requestHeaders": {"referer": "a"}, }, [], @@ -1335,6 +1425,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): {"name": "Referer", "value": "a"}, ], "requestHeaders": {"referer": "a"}, + "responseCookies": True, }, [], ), @@ -1342,10 +1433,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): ( {"Referer": None}, {}, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, + DEFAULT_AUTOMAP_PARAMS, [], ), ( @@ -1353,6 +1441,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): {"browserHtml": True}, { "browserHtml": True, + "responseCookies": True, }, [], ), @@ -1361,8 +1450,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): {"browserHtml": True, "httpResponseBody": True}, { "browserHtml": True, - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, }, [], ), @@ -1371,6 +1459,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): {"screenshot": True}, { "screenshot": True, + "responseCookies": True, }, [], ), @@ -1379,6 +1468,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): {EXTRACT_KEY: True}, { EXTRACT_KEY: True, + "responseCookies": True, }, [], ), @@ -1387,8 +1477,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): {"screenshot": True, "httpResponseBody": True}, { "screenshot": True, - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, }, [], ), @@ -1397,8 +1486,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): {EXTRACT_KEY: True, "httpResponseBody": True}, { EXTRACT_KEY: True, - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, }, [], ), @@ -1406,8 +1494,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): {"Referer": None}, {"unknownMainOutput": True}, { - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, "unknownMainOutput": True, }, [], @@ -1417,13 +1504,14 @@ async def test_automap_method(method, meta, expected, warnings, caplog): {"unknownMainOutput": True, "httpResponseBody": False}, { "unknownMainOutput": True, + "responseCookies": True, }, [], ), ( {"Referer": None}, {"httpResponseBody": False}, - {}, + {"responseCookies": True}, [], ), # Warn if header parameters are used in meta, even if the values match @@ -1440,8 +1528,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): "customHttpRequestHeaders": [ {"name": "Referer", "value": "a"}, ], - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, }, ["Use Request.headers instead"], ), @@ -1454,6 +1541,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): { "browserHtml": True, "requestHeaders": {"referer": "a"}, + "responseCookies": True, }, ["Use Request.headers instead"], ), @@ -1468,8 +1556,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): "customHttpRequestHeaders": [ {"name": "Referer", "value": "b"}, ], - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, }, ["Use Request.headers instead"], ), @@ -1482,6 +1569,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): { "browserHtml": True, "requestHeaders": {"referer": "b"}, + "responseCookies": True, }, ["Use Request.headers instead"], ), @@ -1496,8 +1584,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): "customHttpRequestHeaders": [ {"name": "Referer", "value": "a"}, ], - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, }, ["Use Request.headers instead"], ), @@ -1510,6 +1597,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): { "browserHtml": True, "requestHeaders": {"referer": "a"}, + "responseCookies": True, }, ["Use Request.headers instead"], ), @@ -1527,8 +1615,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): "requestHeaders": {"referer": "a"}, }, { - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, "requestHeaders": {"referer": "a"}, }, [], @@ -1546,6 +1633,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): "customHttpRequestHeaders": [ {"name": "Referer", "value": "a"}, ], + "responseCookies": True, }, [], ), @@ -1557,6 +1645,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): {"browserHtml": True}, { "browserHtml": True, + "responseCookies": True, }, ["cannot be mapped"], ), @@ -1566,6 +1655,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): {"browserHtml": True}, { "browserHtml": True, + "responseCookies": True, }, ["cannot be mapped"], ), @@ -1579,6 +1669,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): {"browserHtml": True}, { "browserHtml": True, + "responseCookies": True, }, ["cannot be mapped"], ) @@ -1619,8 +1710,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): "customHttpRequestHeaders": [ {"name": "User-Agent", "value": DEFAULT_USER_AGENT} ], - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, }, [], ), @@ -1629,8 +1719,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): {}, { "customHttpRequestHeaders": [{"name": "User-Agent", "value": ""}], - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, }, [], ), @@ -1638,37 +1727,25 @@ async def test_automap_method(method, meta, expected, warnings, caplog): ( {"X-Crawlera-Foo": "Bar"}, {}, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, + DEFAULT_AUTOMAP_PARAMS, ["This header has been dropped"], ), ( {"X-Crawlera-Client": "Custom client string"}, {}, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, + DEFAULT_AUTOMAP_PARAMS, ["This header has been dropped"], ), ( {"X-Crawlera-Cookies": "enable"}, {}, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, + DEFAULT_AUTOMAP_PARAMS, ["To achieve the same behavior with Zyte API, do not set request cookies"], ), ( {"X-Crawlera-Cookies": "disable"}, {}, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, + DEFAULT_AUTOMAP_PARAMS, ["it is the default behavior of Zyte API"], ), ( @@ -1676,8 +1753,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): {}, { "cookieManagement": "discard", - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, }, ["has been assigned to the matching Zyte API request parameter"], ), @@ -1688,27 +1764,22 @@ async def test_automap_method(method, meta, expected, warnings, caplog): }, { "cookieManagement": "bar", - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, }, ["has already been defined on the request"], ), ( {"X-Crawlera-Cookies": "foo"}, {}, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, + DEFAULT_AUTOMAP_PARAMS, ["cannot be mapped to a Zyte API request parameter"], ), ( {"X-Crawlera-JobId": "foo"}, {}, { - "httpResponseBody": True, - "httpResponseHeaders": True, "jobId": "foo", + **DEFAULT_AUTOMAP_PARAMS, }, ["has been assigned to the matching Zyte API request parameter"], ), @@ -1718,65 +1789,51 @@ async def test_automap_method(method, meta, expected, warnings, caplog): "jobId": "bar", }, { - "httpResponseBody": True, - "httpResponseHeaders": True, "jobId": "bar", + **DEFAULT_AUTOMAP_PARAMS, }, ["has already been defined on the request"], ), ( {"X-Crawlera-Max-Retries": "1"}, {}, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, + DEFAULT_AUTOMAP_PARAMS, ["This header has been dropped"], ), ( {"X-Crawlera-No-Bancheck": "1"}, {}, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, + DEFAULT_AUTOMAP_PARAMS, ["This header has been dropped"], ), ( {"X-Crawlera-Profile": "pass"}, {}, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, + DEFAULT_AUTOMAP_PARAMS, ["cannot be mapped to the matching Zyte API request parameter"], ), ( {"X-Crawlera-Profile": "desktop"}, {}, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, - ["has been assigned to the matching Zyte API request parameter"], + DEFAULT_AUTOMAP_PARAMS, + [ + "has been assigned to the matching Zyte API request parameter", + "unnecessarily defines the Zyte API 'device' parameter with its default value", + ], ), ( {"X-Crawlera-Profile": "mobile"}, {}, { "device": "mobile", - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, }, ["has been assigned to the matching Zyte API request parameter"], ), ( {"X-Crawlera-Profile": "foo"}, {}, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, + DEFAULT_AUTOMAP_PARAMS, ["cannot be mapped to the matching Zyte API request parameter"], ), ( @@ -1786,18 +1843,14 @@ async def test_automap_method(method, meta, expected, warnings, caplog): }, { "device": "bar", - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, }, ["has already been defined on the request"], ), ( {"X-Crawlera-Profile-Pass": "foo"}, {}, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, + DEFAULT_AUTOMAP_PARAMS, ["This header has been dropped"], ), ( @@ -1805,8 +1858,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): {}, { "geolocation": "foo", - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, }, ["has been assigned to the matching Zyte API request parameter"], ), @@ -1817,36 +1869,26 @@ async def test_automap_method(method, meta, expected, warnings, caplog): }, { "geolocation": "bar", - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, }, ["has already been defined on the request"], ), ( {"X-Crawlera-Session": "foo"}, {}, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, + DEFAULT_AUTOMAP_PARAMS, ["This header has been dropped"], ), ( {"X-Crawlera-Timeout": "40000"}, {}, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, + DEFAULT_AUTOMAP_PARAMS, ["This header has been dropped"], ), ( {"X-Crawlera-Use-Https": "1"}, {}, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, + DEFAULT_AUTOMAP_PARAMS, ["This header has been dropped"], ), ( @@ -1856,6 +1898,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): }, { "browserHtml": True, + "responseCookies": True, }, ["This header has been dropped"], ), @@ -1866,6 +1909,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): }, { "browserHtml": True, + "responseCookies": True, }, ["This header has been dropped"], ), @@ -1876,6 +1920,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): }, { "browserHtml": True, + "responseCookies": True, }, ["To achieve the same behavior with Zyte API, do not set request cookies"], ), @@ -1886,6 +1931,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): }, { "browserHtml": True, + "responseCookies": True, }, ["it is the default behavior of Zyte API"], ), @@ -1897,6 +1943,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): { "browserHtml": True, "cookieManagement": "discard", + "responseCookies": True, }, ["has been assigned to the matching Zyte API request parameter"], ), @@ -1905,10 +1952,12 @@ async def test_automap_method(method, meta, expected, warnings, caplog): { "browserHtml": True, "cookieManagement": "bar", + "responseCookies": True, }, { "browserHtml": True, "cookieManagement": "bar", + "responseCookies": True, }, ["has already been defined on the request"], ), @@ -1919,6 +1968,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): }, { "browserHtml": True, + "responseCookies": True, }, ["cannot be mapped to a Zyte API request parameter"], ), @@ -1930,6 +1980,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): { "browserHtml": True, "jobId": "foo", + "responseCookies": True, }, ["has been assigned to the matching Zyte API request parameter"], ), @@ -1942,6 +1993,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): { "browserHtml": True, "jobId": "bar", + "responseCookies": True, }, ["has already been defined on the request"], ), @@ -1952,6 +2004,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): }, { "browserHtml": True, + "responseCookies": True, }, ["This header has been dropped"], ), @@ -1962,6 +2015,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): }, { "browserHtml": True, + "responseCookies": True, }, ["This header has been dropped"], ), @@ -1972,6 +2026,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): }, { "browserHtml": True, + "responseCookies": True, }, ["This header has been dropped"], ), @@ -1982,6 +2037,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): }, { "browserHtml": True, + "responseCookies": True, }, ["This header has been dropped"], ), @@ -1992,6 +2048,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): }, { "browserHtml": True, + "responseCookies": True, }, ["This header has been dropped"], ), @@ -2002,6 +2059,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): }, { "browserHtml": True, + "responseCookies": True, }, ["This header has been dropped"], ), @@ -2017,6 +2075,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): { "device": "bar", "browserHtml": True, + "responseCookies": True, }, ["This header has been dropped"], ), @@ -2027,6 +2086,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): }, { "browserHtml": True, + "responseCookies": True, }, ["This header has been dropped"], ), @@ -2038,6 +2098,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): { "browserHtml": True, "geolocation": "foo", + "responseCookies": True, }, ["has been assigned to the matching Zyte API request parameter"], ), @@ -2050,6 +2111,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): { "browserHtml": True, "geolocation": "bar", + "responseCookies": True, }, ["has already been defined on the request"], ), @@ -2060,6 +2122,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): }, { "browserHtml": True, + "responseCookies": True, }, ["This header has been dropped"], ), @@ -2070,6 +2133,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): }, { "browserHtml": True, + "responseCookies": True, }, ["This header has been dropped"], ), @@ -2080,6 +2144,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): }, { "browserHtml": True, + "responseCookies": True, }, ["This header has been dropped"], ), @@ -2096,6 +2161,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): "customHttpRequestHeaders": [ {"name": "Referer", "value": "a"}, ], + "responseCookies": True, }, [], ), @@ -2109,6 +2175,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): EXTRACT_KEY: True, f"{EXTRACT_KEY}Options": {"extractFrom": "browserHtml"}, "requestHeaders": {"referer": "a"}, + "responseCookies": True, }, [], ), @@ -2124,6 +2191,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): EXTRACT_KEY: True, f"{EXTRACT_KEY_2}Options": {"extractFrom": "httpResponseBody"}, "requestHeaders": {"referer": "a"}, + "responseCookies": True, }, [], ), @@ -2144,6 +2212,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): "customHttpRequestHeaders": [ {"name": "Referer", "value": "a"}, ], + "responseCookies": True, }, [], ), @@ -2166,6 +2235,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): {"name": "Referer", "value": "a"}, ], "requestHeaders": {"referer": "a"}, + "responseCookies": True, }, [], ), @@ -2185,6 +2255,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): "customHttpRequestHeaders": [ {"name": "Referer", "value": "a"}, ], + "responseCookies": True, }, [], ), @@ -2200,6 +2271,7 @@ async def test_automap_method(method, meta, expected, warnings, caplog): f"{EXTRACT_KEY}Options": {"extractFrom": "browserHtml"}, EXTRACT_KEY_2: True, "requestHeaders": {"referer": "a"}, + "responseCookies": True, }, [], ), @@ -2225,8 +2297,7 @@ async def test_automap_headers(headers, meta, expected, warnings, caplog): }, {}, { - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, "customHttpRequestHeaders": [ {"name": "User-Agent", "value": ""}, ], @@ -2248,6 +2319,7 @@ async def test_automap_headers(headers, meta, expected, warnings, caplog): { "browserHtml": True, "requestHeaders": {"userAgent": ""}, + "responseCookies": True, }, [], ), @@ -2278,8 +2350,13 @@ async def test_automap_header_settings( "settings,cookies,meta,params,expected,warnings,cookie_jar", [ # Cookies, both for requests and for responses, are enabled based on - # both ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED (default: False) and - # COOKIES_ENABLED (default: True). + # COOKIES_ENABLED (default: True). Disabling cookie mapping at the + # spider level requires setting COOKIES_ENABLED to False. + # + # ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED (deprecated, default: False), + # when enabled, triggers a deprecation warning, and forces the + # experimental name space to be used for automatic cookie parameters if + # COOKIES_ENABLED is also True. *( ( settings, @@ -2290,40 +2367,24 @@ async def test_automap_header_settings( "httpResponseBody": True, "httpResponseHeaders": True, }, - setup_warnings - or ( - run_time_warnings - if cast(Dict, settings).get("COOKIES_ENABLED", True) - else [] - ), + warnings, [], ) - for input_cookies, run_time_warnings in ( + for input_cookies in ( + REQUEST_INPUT_COOKIES_EMPTY, + REQUEST_INPUT_COOKIES_MINIMAL_DICT, + ) + for settings, warnings in ( ( - REQUEST_INPUT_COOKIES_EMPTY, - [], - ), - ( - REQUEST_INPUT_COOKIES_MINIMAL_DICT, - [ - "there are cookies in the cookiejar, but ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED is False", - ], - ), - ) - for settings, setup_warnings in ( - ( - {}, - [], - ), - ( - { - "COOKIES_ENABLED": True, - }, + { + "COOKIES_ENABLED": False, + }, [], ), ( { "COOKIES_ENABLED": False, + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": False, }, [], ), @@ -2333,18 +2394,47 @@ async def test_automap_header_settings( "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, }, [ - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED is True, but it will have no effect because COOKIES_ENABLED is False.", + "deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED", + "will have no effect", ], ), + ) + ), + # When COOKIES_ENABLED is True, responseCookies is set to True, and + # requestCookies is filled automatically if there are cookies. + *( + ( + settings, + input_cookies, + {}, + {}, + { + "httpResponseBody": True, + "httpResponseHeaders": True, + "responseCookies": True, + **cast(Dict, output_cookies), + }, + [], + [], + ) + for input_cookies, output_cookies in ( + ( + REQUEST_INPUT_COOKIES_EMPTY, + {}, + ), ( - { - "COOKIES_ENABLED": False, - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": False, - }, - [], + REQUEST_INPUT_COOKIES_MINIMAL_DICT, + {"requestCookies": REQUEST_OUTPUT_COOKIES_MINIMAL}, ), ) + for settings in ( + {}, + {"COOKIES_ENABLED": True}, + ) ), + # When ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED is also True, + # responseCookies and requestCookies are defined within the + # experimental name space, and a deprecation warning is issued. *( ( settings, @@ -2359,7 +2449,9 @@ async def test_automap_header_settings( **cast(Dict, output_cookies), }, }, - [], + [ + "deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED", + ], [], ) for input_cookies, output_cookies in ( @@ -2382,74 +2474,51 @@ async def test_automap_header_settings( }, ) ), - # Do not warn about request cookies not being mapped if cookies are - # manually set. + # When ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED is not True and + # requestCookies is manually set in the experimental namespace, it is + # made a root parameter with a deprecation warning. + # The experimental namespace is removed if it is now empty or kept + # otherwise. *( ( - settings, - REQUEST_INPUT_COOKIES_MINIMAL_DICT, + {}, + REQUEST_INPUT_COOKIES_EMPTY, {}, { "experimental": { - "requestCookies": REQUEST_OUTPUT_COOKIES_MINIMAL, - } + "requestCookies": [{"name": "a", "value": "b"}], + **input_experimental_extra, + }, }, { "httpResponseBody": True, "httpResponseHeaders": True, - "experimental": { - "requestCookies": REQUEST_OUTPUT_COOKIES_MINIMAL, - }, + "requestCookies": [{"name": "a", "value": "b"}], + "responseCookies": True, + **output_params_extra, }, - [], + [ + "include experimental.requestCookies, which is deprecated", + "experimental.requestCookies will be removed, and its value will be set as requestCookies", + ], [], ) - for settings in ( - {}, - { - "COOKIES_ENABLED": True, - }, + for input_experimental_extra, output_params_extra in ( + ( + {}, + {}, + ), + ( + {"foo": "bar"}, + {"experimental": {"foo": "bar"}}, + ), ) ), # dont_merge_cookies=True on request metadata disables cookies. - ( - { - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, - }, - REQUEST_INPUT_COOKIES_EMPTY, - { - "dont_merge_cookies": True, - }, - {}, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, - [], - [], - ), - ( - { - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, - }, - REQUEST_INPUT_COOKIES_MINIMAL_DICT, - { - "dont_merge_cookies": True, - }, - {}, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, - [], - [], - ), - # Do not warn about request cookies not being mapped if - # dont_merge_cookies=True is set on request metadata. *( ( settings, - REQUEST_INPUT_COOKIES_MINIMAL_DICT, + input_cookies, { "dont_merge_cookies": True, }, @@ -2458,305 +2527,920 @@ async def test_automap_header_settings( "httpResponseBody": True, "httpResponseHeaders": True, }, + warnings, [], - [ - { - "name": "foo", - "value": "bar", - "domain": "example.com", - } - ], ) - for settings in ( - {}, - { - "COOKIES_ENABLED": True, - }, + for input_cookies in ( + REQUEST_INPUT_COOKIES_EMPTY, + REQUEST_INPUT_COOKIES_MINIMAL_DICT, + ) + for settings, warnings in ( + ( + {}, + [], + ), + ( + { + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, + }, + ["deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED"], + ), ) ), # Cookies can be disabled setting the corresponding Zyte API parameter # to False. - ( - { - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, - }, - REQUEST_INPUT_COOKIES_EMPTY, - {}, - { - "experimental": { - "responseCookies": False, - } - }, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, - [], - [], - ), - ( - { - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, - }, - REQUEST_INPUT_COOKIES_EMPTY, - {}, - { - "experimental": { - "requestCookies": False, - } - }, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - "experimental": {"responseCookies": True}, - }, - [], - [], - ), - ( - { - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, - }, - REQUEST_INPUT_COOKIES_EMPTY, - {}, - { - "experimental": { - "responseCookies": False, - "requestCookies": False, - } - }, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, - [], - [], - ), - ( - { - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, - }, - REQUEST_INPUT_COOKIES_MINIMAL_DICT, - {}, - { - "experimental": { - "responseCookies": False, - } - }, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - "experimental": { - "requestCookies": REQUEST_OUTPUT_COOKIES_MINIMAL, + # + # By default, setting experimental parameters to False has no effect. + # If ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED is True, then only + # experimental parameters are taken into account instead. + *( + ( + settings, + input_cookies, + {}, + input_params, + { + "httpResponseBody": True, + "httpResponseHeaders": True, + **cast(Dict, output_params), }, - }, - [], - [], + warnings, + [], + ) + for settings, input_cookies, input_params, output_params, warnings in ( + # No cookies, responseCookies disabled. + ( + {}, + REQUEST_INPUT_COOKIES_EMPTY, + { + "responseCookies": False, + }, + {}, + [ + "unnecessarily defines the Zyte API 'responseCookies' parameter with its default value, False." + ], + ), + ( + {}, + REQUEST_INPUT_COOKIES_EMPTY, + { + "experimental": { + "responseCookies": False, + } + }, + {}, + [ + "include experimental.responseCookies, which is deprecated", + "experimental.responseCookies will be removed, and its value will be set as responseCookies", + "unnecessarily defines the Zyte API 'responseCookies' parameter with its default value, False.", + ], + ), + ( + { + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, + }, + REQUEST_INPUT_COOKIES_EMPTY, + { + "responseCookies": False, + }, + {}, + [ + "deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED", + "responseCookies will be removed, and its value will be set as experimental.responseCookies", + ], + ), + ( + { + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, + }, + REQUEST_INPUT_COOKIES_EMPTY, + { + "experimental": { + "responseCookies": False, + } + }, + {}, + [ + "deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED", + ], + ), + # No cookies, requestCookies disabled. + ( + {}, + REQUEST_INPUT_COOKIES_EMPTY, + { + "requestCookies": False, + }, + { + "responseCookies": True, + }, + [], + ), + ( + {}, + REQUEST_INPUT_COOKIES_EMPTY, + { + "experimental": { + "requestCookies": False, + } + }, + { + "responseCookies": True, + }, + [ + "experimental.requestCookies, which is deprecated", + "experimental.requestCookies will be removed, and its value will be set as requestCookies", + ], + ), + ( + { + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, + }, + REQUEST_INPUT_COOKIES_EMPTY, + { + "requestCookies": False, + }, + { + "experimental": {"responseCookies": True}, + }, + [ + "deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED", + "requestCookies will be removed, and its value will be set as experimental.requestCookies", + ], + ), + ( + { + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, + }, + REQUEST_INPUT_COOKIES_EMPTY, + { + "experimental": { + "requestCookies": False, + } + }, + { + "experimental": {"responseCookies": True}, + }, + ["deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED"], + ), + # No cookies, requestCookies and responseCookies disabled. + ( + {}, + REQUEST_INPUT_COOKIES_EMPTY, + { + "requestCookies": False, + "responseCookies": False, + }, + {}, + [ + "unnecessarily defines the Zyte API 'responseCookies' parameter with its default value, False." + ], + ), + ( + {}, + REQUEST_INPUT_COOKIES_EMPTY, + { + "experimental": { + "requestCookies": False, + "responseCookies": False, + } + }, + {}, + [ + "include experimental.requestCookies, which is deprecated", + "include experimental.responseCookies, which is deprecated", + "experimental.responseCookies will be removed, and its value will be set as responseCookies", + "experimental.requestCookies will be removed, and its value will be set as requestCookies", + "unnecessarily defines the Zyte API 'responseCookies' parameter with its default value, False.", + ], + ), + ( + { + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, + }, + REQUEST_INPUT_COOKIES_EMPTY, + { + "requestCookies": False, + "responseCookies": False, + }, + {}, + [ + "deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED", + "requestCookies will be removed, and its value will be set as experimental.requestCookies", + "responseCookies will be removed, and its value will be set as experimental.responseCookies", + ], + ), + ( + { + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, + }, + REQUEST_INPUT_COOKIES_EMPTY, + { + "experimental": { + "requestCookies": False, + "responseCookies": False, + } + }, + {}, + [ + "deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED", + ], + ), + # Cookies, responseCookies disabled. + ( + {}, + REQUEST_INPUT_COOKIES_MINIMAL_DICT, + { + "responseCookies": False, + }, + { + "requestCookies": REQUEST_OUTPUT_COOKIES_MINIMAL, + }, + [ + "unnecessarily defines the Zyte API 'responseCookies' parameter with its default value, False." + ], + ), + ( + {}, + REQUEST_INPUT_COOKIES_MINIMAL_DICT, + { + "experimental": { + "responseCookies": False, + } + }, + { + "requestCookies": REQUEST_OUTPUT_COOKIES_MINIMAL, + }, + [ + "include experimental.responseCookies, which is deprecated", + "experimental.responseCookies will be removed, and its value will be set as responseCookies", + "unnecessarily defines the Zyte API 'responseCookies' parameter with its default value, False.", + ], + ), + ( + { + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, + }, + REQUEST_INPUT_COOKIES_MINIMAL_DICT, + { + "responseCookies": False, + }, + { + "experimental": { + "requestCookies": REQUEST_OUTPUT_COOKIES_MINIMAL, + }, + }, + [ + "deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED", + "responseCookies will be removed, and its value will be set as experimental.responseCookies", + ], + ), + ( + { + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, + }, + REQUEST_INPUT_COOKIES_MINIMAL_DICT, + { + "experimental": { + "responseCookies": False, + } + }, + { + "experimental": { + "requestCookies": REQUEST_OUTPUT_COOKIES_MINIMAL, + }, + }, + ["deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED"], + ), + # Cookies, requestCookies disabled. + ( + {}, + REQUEST_INPUT_COOKIES_MINIMAL_DICT, + { + "requestCookies": False, + }, + { + "responseCookies": True, + }, + [], + ), + ( + {}, + REQUEST_INPUT_COOKIES_MINIMAL_DICT, + { + "experimental": { + "requestCookies": False, + } + }, + { + "responseCookies": True, + }, + [ + "experimental.requestCookies, which is deprecated", + "experimental.requestCookies will be removed, and its value will be set as requestCookies", + ], + ), + ( + { + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, + }, + REQUEST_INPUT_COOKIES_MINIMAL_DICT, + { + "requestCookies": False, + }, + { + "experimental": { + "responseCookies": True, + }, + }, + [ + "deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED", + "requestCookies will be removed, and its value will be set as experimental.requestCookies", + ], + ), + ( + { + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, + }, + REQUEST_INPUT_COOKIES_MINIMAL_DICT, + { + "experimental": { + "requestCookies": False, + } + }, + { + "experimental": { + "responseCookies": True, + }, + }, + ["deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED"], + ), + # Cookies, requestCookies and responseCookies disabled. + ( + {}, + REQUEST_INPUT_COOKIES_MINIMAL_DICT, + { + "requestCookies": False, + "responseCookies": False, + }, + {}, + [ + "unnecessarily defines the Zyte API 'responseCookies' parameter with its default value, False." + ], + ), + ( + {}, + REQUEST_INPUT_COOKIES_MINIMAL_DICT, + { + "experimental": { + "requestCookies": False, + "responseCookies": False, + } + }, + {}, + [ + "include experimental.requestCookies, which is deprecated", + "include experimental.responseCookies, which is deprecated", + "experimental.requestCookies will be removed, and its value will be set as requestCookies", + "experimental.responseCookies will be removed, and its value will be set as responseCookies", + "unnecessarily defines the Zyte API 'responseCookies' parameter with its default value, False.", + ], + ), + ( + { + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, + }, + REQUEST_INPUT_COOKIES_MINIMAL_DICT, + { + "requestCookies": False, + "responseCookies": False, + }, + {}, + [ + "deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED", + "requestCookies will be removed, and its value will be set as experimental.requestCookies", + "responseCookies will be removed, and its value will be set as experimental.responseCookies", + ], + ), + ( + { + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, + }, + REQUEST_INPUT_COOKIES_MINIMAL_DICT, + { + "experimental": { + "requestCookies": False, + "responseCookies": False, + } + }, + {}, + ["deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED"], + ), + ) + ), + # requestCookies, if set manually, prevents automatic mapping. + # + # Setting requestCookies to [] disables automatic mapping, but logs a + # a warning recommending to either use False to achieve the same or + # remove the parameter to let automatic mapping work. + *( + ( + settings, + REQUEST_INPUT_COOKIES_MINIMAL_DICT, + {}, + input_params, + output_params, + warnings, + [], + ) + for override_cookies, override_warnings in ( + ( + cast(List[Dict[str, str]], []), + [ + "is overriding automatic request cookie mapping", + ], + ), + ) + for settings, input_params, output_params, warnings in ( + ( + {}, + { + "requestCookies": override_cookies, + }, + { + "httpResponseBody": True, + "httpResponseHeaders": True, + "responseCookies": True, + }, + [ + "unnecessarily defines the Zyte API 'requestCookies' parameter with its default value, [].", + *override_warnings, + ], + ), + ( + {}, + { + "experimental": { + "requestCookies": override_cookies, + } + }, + { + "httpResponseBody": True, + "httpResponseHeaders": True, + "responseCookies": True, + }, + [ + "experimental.requestCookies, which is deprecated", + "experimental.requestCookies will be removed, and its value will be set as requestCookies", + "unnecessarily defines the Zyte API 'requestCookies' parameter with its default value, [].", + *override_warnings, + ], + ), + ( + { + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, + }, + { + "experimental": { + "requestCookies": override_cookies, + } + }, + { + "httpResponseBody": True, + "httpResponseHeaders": True, + "experimental": { + "responseCookies": True, + }, + }, + [ + *cast(List, override_warnings), + "deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED", + ], + ), + ( + { + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, + }, + { + "requestCookies": override_cookies, + }, + { + "httpResponseBody": True, + "httpResponseHeaders": True, + "experimental": { + "responseCookies": True, + }, + }, + [ + "deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED", + "requestCookies will be removed, and its value will be set as experimental.requestCookies", + *override_warnings, + ], + ), + ) + ), + *( + ( + settings, + REQUEST_INPUT_COOKIES_MINIMAL_DICT, + {}, + input_params, + output_params, + warnings, + [], + ) + for override_cookies in ((REQUEST_OUTPUT_COOKIES_MAXIMAL,),) + for settings, input_params, output_params, warnings in ( + ( + {}, + { + "requestCookies": override_cookies, + }, + { + "httpResponseBody": True, + "httpResponseHeaders": True, + "requestCookies": override_cookies, + "responseCookies": True, + }, + [], + ), + ( + {}, + { + "experimental": { + "requestCookies": override_cookies, + } + }, + { + "httpResponseBody": True, + "httpResponseHeaders": True, + "requestCookies": override_cookies, + "responseCookies": True, + }, + [ + "experimental.requestCookies, which is deprecated", + "experimental.requestCookies will be removed, and its value will be set as requestCookies", + ], + ), + ( + { + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, + }, + { + "experimental": { + "requestCookies": override_cookies, + } + }, + { + "httpResponseBody": True, + "httpResponseHeaders": True, + "experimental": { + "requestCookies": override_cookies, + "responseCookies": True, + }, + }, + [ + "deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED", + ], + ), + ( + { + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, + }, + { + "requestCookies": override_cookies, + }, + { + "httpResponseBody": True, + "httpResponseHeaders": True, + "experimental": { + "requestCookies": override_cookies, + "responseCookies": True, + }, + }, + [ + "deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED", + "requestCookies will be removed, and its value will be set as experimental.requestCookies", + ], + ), + ) + ), + # Cookies work for browser and automatic extraction requests as well. + *( + ( + settings, + REQUEST_INPUT_COOKIES_MINIMAL_DICT, + {}, + params, + { + **params, + **cast(Dict, extra_output_params), + }, + warnings, + [], + ) + for params in ( + { + "browserHtml": True, + }, + { + "screenshot": True, + }, + { + EXTRACT_KEY: True, + }, + ) + for settings, extra_output_params, warnings in ( + ( + {}, + { + "responseCookies": True, + "requestCookies": REQUEST_OUTPUT_COOKIES_MINIMAL, + }, + [], + ), + ( + { + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, + }, + { + "experimental": { + "responseCookies": True, + "requestCookies": REQUEST_OUTPUT_COOKIES_MINIMAL, + }, + }, + ["deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED"], + ), + ) + ), + # Cookies are mapped correctly, both with minimum and maximum cookie + # parameters. + *( + ( + settings, + input_cookies, + {}, + {}, + output_params, + warnings, + [], + ) + for input_cookies, output_cookies in ( + ( + REQUEST_INPUT_COOKIES_MINIMAL_DICT, + REQUEST_OUTPUT_COOKIES_MINIMAL, + ), + ( + REQUEST_INPUT_COOKIES_MINIMAL_LIST, + REQUEST_OUTPUT_COOKIES_MINIMAL, + ), + ( + REQUEST_INPUT_COOKIES_MAXIMAL, + REQUEST_OUTPUT_COOKIES_MAXIMAL, + ), + ) + for settings, output_params, warnings in ( + ( + {}, + { + "httpResponseBody": True, + "httpResponseHeaders": True, + "responseCookies": True, + "requestCookies": output_cookies, + }, + [], + ), + ( + { + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, + }, + { + "httpResponseBody": True, + "httpResponseHeaders": True, + "experimental": { + "responseCookies": True, + "requestCookies": output_cookies, + }, + }, + ["deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED"], + ), + ) + ), + # Mapping multiple cookies works. + *( + ( + settings, + input_cookies, + {}, + {}, + output_params, + warnings, + [], + ) + for input_cookies, output_cookies in ( + ( + {"a": "b", "c": "d"}, + [ + {"name": "a", "value": "b", "domain": "example.com"}, + {"name": "c", "value": "d", "domain": "example.com"}, + ], + ), + ) + for settings, output_params, warnings in ( + ( + {}, + { + "httpResponseBody": True, + "httpResponseHeaders": True, + "responseCookies": True, + "requestCookies": output_cookies, + }, + [], + ), + ( + { + "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, + }, + { + "httpResponseBody": True, + "httpResponseHeaders": True, + "experimental": { + "responseCookies": True, + "requestCookies": output_cookies, + }, + }, + ["deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED"], + ), + ) ), + # If (contradictory) values are set for requestCookies or + # responseCookies both outside and inside the experimental namespace, + # the non-experimental value takes priority. This is so even if + # ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED is True, in which case the + # outside value is moved into the experimental namespace, overriding + # its value. ( - { - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, - }, - REQUEST_INPUT_COOKIES_MINIMAL_DICT, {}, - { - "experimental": { - "requestCookies": False, - } - }, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - "experimental": {"responseCookies": True}, - }, - [], - [], - ), - ( - { - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, - }, - REQUEST_INPUT_COOKIES_MINIMAL_DICT, + REQUEST_INPUT_COOKIES_EMPTY, {}, { + "responseCookies": True, "experimental": { "responseCookies": False, - "requestCookies": False, - } + }, }, { "httpResponseBody": True, "httpResponseHeaders": True, + "responseCookies": True, }, - [], + [ + "include experimental.responseCookies, which is deprecated", + "defines both responseCookies (True) and experimental.responseCookies (False)", + ], [], ), - # Setting requestCookies to [] disables automatic mapping, but logs a - # a warning recommending to either use False to achieve the same or - # remove the parameter to let automatic mapping work. ( - { - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, - }, - REQUEST_INPUT_COOKIES_MINIMAL_DICT, + {}, + REQUEST_INPUT_COOKIES_EMPTY, {}, { + "responseCookies": False, "experimental": { - "requestCookies": [], - } + "responseCookies": True, + }, }, { "httpResponseBody": True, "httpResponseHeaders": True, - "experimental": { - "requestCookies": [], - "responseCookies": True, - }, }, [ - "is overriding automatic request cookie mapping", + "defines both responseCookies (False) and experimental.responseCookies (True)", + "include experimental.responseCookies, which is deprecated", + "unnecessarily defines the Zyte API 'responseCookies' parameter with its default value, False.", ], [], ), - # Cookies work for browser and automatic extraction requests as well. - ( - { - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, - }, - REQUEST_INPUT_COOKIES_MINIMAL_DICT, - {}, - { - "browserHtml": True, - }, - { - "browserHtml": True, - "experimental": { + *( + ( + {}, + REQUEST_INPUT_COOKIES_MINIMAL_DICT, + {}, + { + "requestCookies": [ + {"name": regular_k, "value": regular_v}, + ], + "experimental": { + "requestCookies": [ + {"name": experimental_k, "value": experimental_v}, + ], + }, + }, + { + "httpResponseBody": True, + "httpResponseHeaders": True, + "requestCookies": [ + {"name": regular_k, "value": regular_v}, + ], "responseCookies": True, - "requestCookies": REQUEST_OUTPUT_COOKIES_MINIMAL, }, - }, - [], - [], + [ + "include experimental.requestCookies, which is deprecated", + "experimental.requestCookies will be ignored", + ], + [], + ) + for regular_k, regular_v, experimental_k, experimental_v in ( + ("b", "2", "c", "3"), + ("c", "3", "b", "2"), + ) ), + # Now with ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED=True ( { "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, }, - REQUEST_INPUT_COOKIES_MINIMAL_DICT, + REQUEST_INPUT_COOKIES_EMPTY, {}, { - "screenshot": True, + "responseCookies": True, + "experimental": { + "responseCookies": False, + }, }, { - "screenshot": True, + "httpResponseBody": True, + "httpResponseHeaders": True, "experimental": { "responseCookies": True, - "requestCookies": REQUEST_OUTPUT_COOKIES_MINIMAL, }, }, - [], + [ + "deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED", + "defines both responseCookies (True) and experimental.responseCookies (False)", + ], [], ), ( { "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, }, - REQUEST_INPUT_COOKIES_MINIMAL_DICT, + REQUEST_INPUT_COOKIES_EMPTY, {}, { - EXTRACT_KEY: True, - }, - { - EXTRACT_KEY: True, + "responseCookies": False, "experimental": { "responseCookies": True, - "requestCookies": REQUEST_OUTPUT_COOKIES_MINIMAL, }, }, - [], + { + "httpResponseBody": True, + "httpResponseHeaders": True, + }, + [ + "deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED", + "defines both responseCookies (False) and experimental.responseCookies (True)", + ], [], ), - # Cookies are mapped correctly, both with minimum and maximum cookie - # parameters. *( ( { "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, }, - input, - {}, + REQUEST_INPUT_COOKIES_MINIMAL_DICT, {}, + { + "requestCookies": [ + {"name": regular_k, "value": regular_v}, + ], + "experimental": { + "requestCookies": [ + {"name": experimental_k, "value": experimental_v}, + ], + }, + }, { "httpResponseBody": True, "httpResponseHeaders": True, "experimental": { + "requestCookies": [ + {"name": regular_k, "value": regular_v}, + ], "responseCookies": True, - "requestCookies": output, }, }, - [], + [ + "deprecated ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED", + "requestCookies will be removed, and its value will be set as experimental.requestCookies", + ], [], ) - for input, output in ( - ( - REQUEST_INPUT_COOKIES_MINIMAL_DICT, - REQUEST_OUTPUT_COOKIES_MINIMAL, - ), - ( - REQUEST_INPUT_COOKIES_MINIMAL_LIST, - REQUEST_OUTPUT_COOKIES_MINIMAL, - ), - ( - REQUEST_INPUT_COOKIES_MAXIMAL, - REQUEST_OUTPUT_COOKIES_MAXIMAL, - ), + for regular_k, regular_v, experimental_k, experimental_v in ( + ("b", "2", "c", "3"), + ("c", "3", "b", "2"), ) ), - # requestCookies, if set manually, prevents automatic mapping. - ( - { - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, - }, - REQUEST_INPUT_COOKIES_MINIMAL_DICT, - {}, - { - "experimental": { - "requestCookies": REQUEST_OUTPUT_COOKIES_MAXIMAL, - }, - }, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - "experimental": { - "responseCookies": True, - "requestCookies": REQUEST_OUTPUT_COOKIES_MAXIMAL, - }, - }, - [], - [], - ), - # Mapping multiple cookies works. - ( - { - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, - }, - {"a": "b", "c": "d"}, - {}, - {}, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - "experimental": { - "responseCookies": True, - "requestCookies": [ - {"name": "a", "value": "b", "domain": "example.com"}, - {"name": "c", "value": "d", "domain": "example.com"}, - ], - }, - }, - [], - [], - ), ], ) @ensureDeferred @@ -2787,7 +3471,6 @@ async def test_automap_all_cookies(meta): Zyte API requests should include all cookie jar cookies, regardless of the target URL domain.""" settings: Dict[str, Any] = { - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, "ZYTE_API_TRANSPARENT_MODE": True, } crawler = await get_crawler(settings) @@ -2818,7 +3501,7 @@ async def test_automap_all_cookies(meta): ) cookie_middleware.process_request(request1, spider=None) api_params = param_parser.parse(request1) - assert api_params["experimental"]["requestCookies"] == [ + assert api_params["requestCookies"] == [ {"name": "a", "value": "b", "domain": "a.example"}, # https://github.com/scrapy/scrapy/issues/5841 # {"name": "c", "value": "d", "domain": "b.example"}, @@ -2862,9 +3545,7 @@ async def test_automap_all_cookies(meta): cookie_middleware.process_request(request2, spider=None) api_params = param_parser.parse(request2) - assert sort_dict_list( - api_params["experimental"]["requestCookies"] - ) == sort_dict_list( + assert sort_dict_list(api_params["requestCookies"]) == sort_dict_list( [ {"name": "e", "value": "f", "domain": ".c.example"}, {"name": "i", "value": "j", "domain": ".d.example"}, @@ -2895,7 +3576,6 @@ async def test_automap_cookie_jar(meta): ) request4 = Request(url="https://example.com/4", meta={**meta, "cookiejar": "a"}) settings: Dict[str, Any] = { - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, "ZYTE_API_TRANSPARENT_MODE": True, } crawler = await get_crawler(settings) @@ -2905,20 +3585,18 @@ async def test_automap_cookie_jar(meta): cookie_middleware.process_request(request1, spider=None) api_params = param_parser.parse(request1) - assert api_params["experimental"]["requestCookies"] == [ + assert api_params["requestCookies"] == [ {"name": "z", "value": "y", "domain": "example.com"} ] cookie_middleware.process_request(request2, spider=None) api_params = param_parser.parse(request2) - assert "requestCookies" not in api_params["experimental"] + assert "requestCookies" not in api_params cookie_middleware.process_request(request3, spider=None) api_params = param_parser.parse(request3) - assert sort_dict_list( - api_params["experimental"]["requestCookies"] - ) == sort_dict_list( + assert sort_dict_list(api_params["requestCookies"]) == sort_dict_list( [ {"name": "x", "value": "w", "domain": "example.com"}, {"name": "z", "value": "y", "domain": "example.com"}, @@ -2927,9 +3605,7 @@ async def test_automap_cookie_jar(meta): cookie_middleware.process_request(request4, spider=None) api_params = param_parser.parse(request4) - assert sort_dict_list( - api_params["experimental"]["requestCookies"] - ) == sort_dict_list( + assert sort_dict_list(api_params["requestCookies"]) == sort_dict_list( [ {"name": "x", "value": "w", "domain": "example.com"}, {"name": "z", "value": "y", "domain": "example.com"}, @@ -2947,7 +3623,6 @@ async def test_automap_cookie_jar(meta): @ensureDeferred async def test_automap_cookie_limit(meta, caplog): settings: Dict[str, Any] = { - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, "ZYTE_API_MAX_COOKIES": 1, "ZYTE_API_TRANSPARENT_MODE": True, } @@ -2968,11 +3643,10 @@ async def test_automap_cookie_limit(meta, caplog): caplog.clear() with caplog.at_level("WARNING"): api_params = param_parser.parse(request) - assert api_params["experimental"]["requestCookies"] == [ + assert api_params["requestCookies"] == [ {"name": "z", "value": "y", "domain": "example.com"} ] - assert not caplog.records - caplog.clear() + _assert_log_messages(caplog, []) # Verify that requests with 2 cookies results in only 1 cookie set and a # warning. @@ -2985,13 +3659,16 @@ async def test_automap_cookie_limit(meta, caplog): cookie_middleware.process_request(request, spider=None) with caplog.at_level("WARNING"): api_params = param_parser.parse(request) - assert api_params["experimental"]["requestCookies"] in [ + assert api_params["requestCookies"] in [ [{"name": "z", "value": "y", "domain": "example.com"}], [{"name": "x", "value": "w", "domain": "example.com"}], ] - assert "would get 2 cookies" in caplog.text - assert "limited to 1 cookies" in caplog.text - caplog.clear() + _assert_log_messages( + caplog, + [ + "would get 2 cookies, but request cookie automatic mapping is limited to 1 cookies" + ], + ) # Verify that 1 cookie in the cookie jar and 1 cookie in the request count # as 2 cookies, resulting in only 1 cookie set and a warning. @@ -3010,13 +3687,16 @@ async def test_automap_cookie_limit(meta, caplog): cookie_middleware.process_request(request, spider=None) with caplog.at_level("WARNING"): api_params = param_parser.parse(request) - assert api_params["experimental"]["requestCookies"] in [ + assert api_params["requestCookies"] in [ [{"name": "z", "value": "y", "domain": "example.com"}], [{"name": "x", "value": "w", "domain": "example.com"}], ] - assert "would get 2 cookies" in caplog.text - assert "limited to 1 cookies" in caplog.text - caplog.clear() + _assert_log_messages( + caplog, + [ + "would get 2 cookies, but request cookie automatic mapping is limited to 1 cookies" + ], + ) # Vefify that unrelated-domain cookies count for the limit. pre_request = Request( @@ -3034,13 +3714,16 @@ async def test_automap_cookie_limit(meta, caplog): cookie_middleware.process_request(request, spider=None) with caplog.at_level("WARNING"): api_params = param_parser.parse(request) - assert api_params["experimental"]["requestCookies"] in [ + assert api_params["requestCookies"] in [ [{"name": "z", "value": "y", "domain": "other.example"}], [{"name": "x", "value": "w", "domain": "example.com"}], ] - assert "would get 2 cookies" in caplog.text - assert "limited to 1 cookies" in caplog.text - caplog.clear() + _assert_log_messages( + caplog, + [ + "would get 2 cookies, but request cookie automatic mapping is limited to 1 cookies" + ], + ) class CustomCookieJar(CookieJar): @@ -3083,7 +3766,6 @@ async def test_automap_custom_cookie_middleware(): f"{mw_cls.__module__}.{mw_cls.__qualname__}": 700, }, "ZYTE_API_COOKIE_MIDDLEWARE": f"{mw_cls.__module__}.{mw_cls.__qualname__}", - "ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True, "ZYTE_API_TRANSPARENT_MODE": True, } crawler = await get_crawler(settings) @@ -3094,7 +3776,7 @@ async def test_automap_custom_cookie_middleware(): request = Request(url="https://example.com/1") cookie_middleware.process_request(request, spider=None) api_params = param_parser.parse(request) - assert api_params["experimental"]["requestCookies"] == [ + assert api_params["requestCookies"] == [ {"name": "z", "value": "y", "domain": "example.com"} ] @@ -3107,8 +3789,7 @@ async def test_automap_custom_cookie_middleware(): "a", {}, { - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, "httpRequestBody": "YQ==", }, [], @@ -3119,8 +3800,7 @@ async def test_automap_custom_cookie_middleware(): "a", {"httpRequestBody": "Yg=="}, { - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, "httpRequestBody": "Yg==", }, [ @@ -3134,8 +3814,7 @@ async def test_automap_custom_cookie_middleware(): "a", {"httpRequestBody": "YQ=="}, { - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, "httpRequestBody": "YQ==", }, ["Use Request.body instead"], @@ -3147,6 +3826,7 @@ async def test_automap_custom_cookie_middleware(): { "browserHtml": True, "httpRequestBody": "YQ==", + "responseCookies": True, }, [], ), @@ -3156,6 +3836,7 @@ async def test_automap_custom_cookie_middleware(): { "httpRequestBody": "YQ==", "screenshot": True, + "responseCookies": True, }, [], ), @@ -3165,6 +3846,7 @@ async def test_automap_custom_cookie_middleware(): { "httpRequestBody": "YQ==", EXTRACT_KEY: True, + "responseCookies": True, }, [], ), @@ -3189,6 +3871,7 @@ async def test_automap_body(body, meta, expected, warnings, caplog): }, { "browserHtml": True, + "responseCookies": True, }, ["unnecessarily defines"], ), @@ -3196,20 +3879,14 @@ async def test_automap_body(body, meta, expected, warnings, caplog): { "browserHtml": False, }, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, + DEFAULT_AUTOMAP_PARAMS, ["unnecessarily defines"], ), ( { "screenshot": False, }, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, + DEFAULT_AUTOMAP_PARAMS, ["unnecessarily defines"], ), ( @@ -3219,6 +3896,7 @@ async def test_automap_body(body, meta, expected, warnings, caplog): }, { "screenshot": True, + "responseCookies": True, }, ["do not need to set httpResponseHeaders to False"], ), @@ -3226,10 +3904,7 @@ async def test_automap_body(body, meta, expected, warnings, caplog): { EXTRACT_KEY: False, }, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, + DEFAULT_AUTOMAP_PARAMS, ["unnecessarily defines"], ), ( @@ -3239,6 +3914,7 @@ async def test_automap_body(body, meta, expected, warnings, caplog): }, { EXTRACT_KEY: True, + "responseCookies": True, }, ["do not need to set httpResponseHeaders to False"], ), @@ -3255,10 +3931,7 @@ async def test_automap_default_parameter_cleanup(meta, expected, warnings, caplo ( {}, {}, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, + DEFAULT_AUTOMAP_PARAMS, [], ), ( @@ -3266,6 +3939,7 @@ async def test_automap_default_parameter_cleanup(meta, expected, warnings, caplo {"screenshot": True, "browserHtml": False}, { "screenshot": True, + "responseCookies": True, }, [], ), @@ -3277,16 +3951,14 @@ async def test_automap_default_parameter_cleanup(meta, expected, warnings, caplo {"networkCapture": None}, { "browserHtml": True, + "responseCookies": True, }, [], ), ( {"device": "mobile"}, {"device": "desktop"}, - { - "httpResponseBody": True, - "httpResponseHeaders": True, - }, + DEFAULT_AUTOMAP_PARAMS, [], ), ], @@ -3309,12 +3981,8 @@ async def test_default_params_automap(default_params, meta, expected, warnings, with caplog.at_level("WARNING"): api_params = param_parser.parse(request) api_params.pop("url") - assert api_params == expected - if warnings: - for warning in warnings: - assert warning in caplog.text - else: - assert not caplog.records + assert expected == api_params + _assert_log_messages(caplog, warnings) @pytest.mark.parametrize( @@ -3339,6 +4007,103 @@ async def test_default_params_false(default_params): assert api_params is None +@pytest.mark.parametrize( + "field", + [ + "responseCookies", + "requestCookies", + "cookieManagement", + ], +) +@ensureDeferred +async def test_field_deprecation_warnings(field, caplog): + input_params = {"experimental": {field: "foo"}} + + # Raw + raw_request = Request( + url="https://example.com", + meta={"zyte_api": input_params}, + ) + crawler = await get_crawler(SETTINGS) + handler = get_download_handler(crawler, "https") + param_parser = handler._param_parser + with caplog.at_level("WARNING"): + output_params = param_parser.parse(raw_request) + output_params.pop("url") + assert input_params == output_params + _assert_log_messages(caplog, [f"experimental.{field}, which is deprecated"]) + with caplog.at_level("WARNING"): + # Only warn once per field. + param_parser.parse(raw_request) + _assert_log_messages(caplog, []) + + # Automap + raw_request = Request( + url="https://example.com", + meta={"zyte_api_automap": input_params}, + ) + crawler = await get_crawler(SETTINGS) + handler = get_download_handler(crawler, "https") + param_parser = handler._param_parser + with caplog.at_level("WARNING"): + output_params = param_parser.parse(raw_request) + output_params.pop("url") + for key, value in input_params["experimental"].items(): + assert output_params[key] == value + _assert_log_messages( + caplog, + [ + f"experimental.{field}, which is deprecated", + f"experimental.{field} will be removed, and its value will be set as {field}", + ], + ) + with caplog.at_level("WARNING"): + # Only warn once per field. + param_parser.parse(raw_request) + _assert_log_messages( + caplog, + [f"experimental.{field} will be removed, and its value will be set as {field}"], + ) + + +@ensureDeferred +async def test_field_deprecation_warnings_false_positives(caplog): + """Make sure that the code tested by test_field_deprecation_warnings does + not trigger for unrelated fields that just happen to share their name space + (experimental).""" + + input_params = {"experimental": {"foo": "bar"}} + + # Raw + raw_request = Request( + url="https://example.com", + meta={"zyte_api": input_params}, + ) + crawler = await get_crawler(SETTINGS) + handler = get_download_handler(crawler, "https") + param_parser = handler._param_parser + with caplog.at_level("WARNING"): + output_params = param_parser.parse(raw_request) + output_params.pop("url") + assert input_params == output_params + _assert_log_messages(caplog, []) + + # Automap + raw_request = Request( + url="https://example.com", + meta={"zyte_api_automap": input_params}, + ) + crawler = await get_crawler(SETTINGS) + handler = get_download_handler(crawler, "https") + param_parser = handler._param_parser + with caplog.at_level("WARNING"): + output_params = param_parser.parse(raw_request) + output_params.pop("url") + for key, value in input_params.items(): + assert output_params[key] == value + _assert_log_messages(caplog, []) + + @ensureDeferred async def _process_request(crawler, request, is_start_request=False): spider = crawler.spider @@ -3809,22 +4574,22 @@ async def test_serp_header_mapping(extract_from, headers, warnings, caplog): [ ( {}, - {"httpResponseBody": True, "httpResponseHeaders": True}, + DEFAULT_AUTOMAP_PARAMS, [], ), ( {"device": "desktop"}, - {"httpResponseBody": True, "httpResponseHeaders": True}, + DEFAULT_AUTOMAP_PARAMS, ["'device' parameter with its default value, 'desktop'"], ), ( {"device": "mobile"}, - {"device": "mobile", "httpResponseBody": True, "httpResponseHeaders": True}, + {"device": "mobile", **DEFAULT_AUTOMAP_PARAMS}, [], ), ( {"device": "auto"}, # Unknown parameter value - {"device": "auto", "httpResponseBody": True, "httpResponseHeaders": True}, + {"device": "auto", **DEFAULT_AUTOMAP_PARAMS}, [], ), ], diff --git a/tests/test_middlewares.py b/tests/test_middlewares.py index 3959b0f6..b3eec6d2 100644 --- a/tests/test_middlewares.py +++ b/tests/test_middlewares.py @@ -111,8 +111,7 @@ async def test_preserve_delay(mw_cls, processor, settings, preserve): async def test_cookies(): """Make sure that the downloader middleware does not crash on Zyte API requests with cookies.""" - settings = {"ZYTE_API_EXPERIMENTAL_COOKIES_ENABLED": True} - crawler = get_crawler(settings_dict=settings) + crawler = get_crawler() await crawler.crawl("a") spider = crawler.spider middleware = create_instance( diff --git a/tests/test_providers.py b/tests/test_providers.py index b62aecb8..5bf51426 100644 --- a/tests/test_providers.py +++ b/tests/test_providers.py @@ -47,7 +47,7 @@ from scrapy_zyte_api.handler import ScrapyZyteAPIDownloadHandler from scrapy_zyte_api.providers import _AUTO_PAGES, _ITEM_KEYWORDS, ZyteApiProvider -from . import SETTINGS +from . import DEFAULT_AUTOMAP_PARAMS, SETTINGS from .mockserver import get_ephemeral_port PROVIDER_PARAMS = {"geolocation": "IE"} @@ -751,10 +751,10 @@ def parse_(self, response: DummyResponse, page: SomePage): assert len(params) == 1 assert params[0] == { "url": url, - "product": True, "httpResponseBody": True, - "productOptions": product_options, "httpResponseHeaders": True, + "product": True, + "productOptions": product_options, } assert type(item["page"].response) is AnyResponse @@ -826,9 +826,8 @@ def parse_(self, response: DummyResponse, page: SomePage): assert params[0] == { "url": url, "product": True, - "httpResponseBody": True, - "httpResponseHeaders": True, "productOptions": product_options, + **DEFAULT_AUTOMAP_PARAMS, } assert type(item["page"].response) is AnyResponse @@ -946,8 +945,7 @@ def parse_(self, response: DummyResponse, page: SomePage): assert len(params) == 1 assert params[0] == { "url": url, - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, } assert type(item["page"].response) is AnyResponse @@ -979,8 +977,7 @@ def parse_(self, response: DummyResponse, page: SomePage): assert len(params) == 2 assert params[0] == { "url": url, - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, } assert params[1] == {"url": url, "browserHtml": True} @@ -1019,8 +1016,7 @@ def parse_(self, response: DummyResponse, page1: FirstPage, page2: SecondPage): assert len(params) == 1 assert params[0] == { "url": url, - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, } assert type(item["page1"].http_response) is HttpResponse assert type(item["page2"].http_response) is HttpResponse @@ -1055,8 +1051,7 @@ def parse_(self, response: DummyResponse, page1: FirstPage, page2: SecondPage): assert len(params) == 2 assert params[0] == { "url": url, - "httpResponseBody": True, - "httpResponseHeaders": True, + **DEFAULT_AUTOMAP_PARAMS, } assert params[1] == {"url": url, "browserHtml": True} diff --git a/tests/test_request_fingerprinter.py b/tests/test_request_fingerprinter.py index 619be9e3..9b6a5de5 100644 --- a/tests/test_request_fingerprinter.py +++ b/tests/test_request_fingerprinter.py @@ -108,6 +108,69 @@ async def test_headers(): assert fingerprint1 == fingerprint2 +@ensureDeferred +async def test_cookies(): + crawler = await get_crawler() + fingerprinter = create_instance( + ScrapyZyteAPIRequestFingerprinter, settings=crawler.settings, crawler=crawler + ) + request1 = Request( + "https://example.com", + meta={ + "zyte_api": { + "responseCookies": False, + "requestCookies": [{"name": "foo", "value": "bar"}], + "cookieManagement": False, + "experimental": { + "responseCookies": False, + "requestCookies": [{"name": "foo", "value": "bar"}], + "cookieManagement": False, + }, + } + }, + ) + # Same with responseCookies set to `True`. + request2 = Request( + "https://example.com", + meta={ + "zyte_api": { + "responseCookies": True, + "requestCookies": [{"name": "foo", "value": "bar"}], + "cookieManagement": False, + "experimental": { + "responseCookies": False, + "requestCookies": [{"name": "foo", "value": "bar"}], + "cookieManagement": False, + }, + } + }, + ) + # Same with experimental.responseCookies set to `True`. + request3 = Request( + "https://example.com", + meta={ + "zyte_api": { + "requestCookies": [{"name": "foo", "value": "bar"}], + "cookieManagement": False, + "experimental": { + "responseCookies": True, + "requestCookies": [{"name": "foo", "value": "bar"}], + "cookieManagement": False, + }, + } + }, + ) + request4 = Request("https://example.com", meta={"zyte_api": True}) + fingerprint1 = fingerprinter.fingerprint(request1) + fingerprint2 = fingerprinter.fingerprint(request2) + fingerprint3 = fingerprinter.fingerprint(request3) + fingerprint4 = fingerprinter.fingerprint(request4) + assert fingerprint1 != fingerprint2 + assert fingerprint1 != fingerprint3 + assert fingerprint1 == fingerprint4 + assert fingerprint2 == fingerprint3 + + @pytest.mark.parametrize( "url,params,fingerprint", ( @@ -263,9 +326,7 @@ async def test_only_end_parameters_matter(): "zyte_api": { "httpResponseBody": True, "httpResponseHeaders": True, - "experimental": { - "responseCookies": True, - }, + "responseCookies": True, } }, ) diff --git a/tests/test_responses.py b/tests/test_responses.py index fa6cdae6..25547f10 100644 --- a/tests/test_responses.py +++ b/tests/test_responses.py @@ -67,9 +67,7 @@ def raw_api_response_browser(): {"name": "Content-Length", "value": str(len(PAGE_CONTENT))}, ], "statusCode": 200, - "experimental": { - "responseCookies": INPUT_COOKIES, - }, + "responseCookies": INPUT_COOKIES, } @@ -83,9 +81,7 @@ def raw_api_response_body(): {"name": "Content-Length", "value": str(len(PAGE_CONTENT))}, ], "statusCode": 200, - "experimental": { - "responseCookies": INPUT_COOKIES, - }, + "responseCookies": INPUT_COOKIES, } @@ -100,9 +96,7 @@ def raw_api_response_mixed(): {"name": "Content-Length", "value": str(len(PAGE_CONTENT_2))}, ], "statusCode": 200, - "experimental": { - "responseCookies": INPUT_COOKIES, - }, + "responseCookies": INPUT_COOKIES, } @@ -568,3 +562,48 @@ def test_status_code(base_kwargs_func, kwargs, expected_status_code): response = _process_response(api_response, Request(api_response["url"])) assert response is not None assert response.status == expected_status_code + + +@pytest.mark.parametrize( + "api_response", + [ + { + "url": "https://example.com", + "httpResponseBody": b64encode(PAGE_CONTENT.encode("utf-8")), + "statusCode": 200, + "responseCookies": INPUT_COOKIES, + }, + { + "url": "https://example.com", + "httpResponseBody": b64encode(PAGE_CONTENT.encode("utf-8")), + "statusCode": 200, + "experimental": { + "responseCookies": INPUT_COOKIES, + }, + }, + { + "url": "https://example.com", + "httpResponseBody": b64encode(PAGE_CONTENT.encode("utf-8")), + "statusCode": 200, + "responseCookies": INPUT_COOKIES, + "experimental": { + "responseCookies": [ + { + "name": "foo", + "value": "bar", + }, + ], + }, + }, + ], +) +def test_cookies(api_response, caplog): + with caplog.at_level("WARNING"): + response = _process_response(api_response, Request(api_response["url"])) + assert response is not None + assert response.headers == { + **OUTPUT_COOKIE_HEADERS, + } + # Do not warn about the deprecated experimental.responseCookies response + # parameter, we already warn about it when found among request parameters. + assert not caplog.text