diff --git a/duckduckgo_search/duckduckgo_search.py b/duckduckgo_search/duckduckgo_search.py index c973092..9d6f346 100644 --- a/duckduckgo_search/duckduckgo_search.py +++ b/duckduckgo_search/duckduckgo_search.py @@ -30,6 +30,7 @@ def __init__(self, headers=None, proxies=None, timeout=10) -> None: headers = { "User-Agent": choice(USERAGENTS), "Accept": "application/json, text/javascript, */*; q=0.01", + "Accept-Language": "en-US,en;q=0.5", "Referer": "https://duckduckgo.com/", } self._client = httpx.Client(headers=headers, proxies=proxies, timeout=timeout, http2=True) @@ -44,9 +45,11 @@ def _get_url(self, method: str, url: str, **kwargs) -> Optional[httpx._models.Re for i in range(3): try: resp = self._client.request(method, url, follow_redirects=True, **kwargs) - if _is_500_in_url(str(resp.url)) or resp.status_code == 202: + if _is_500_in_url(str(resp.url)): raise httpx._exceptions.HTTPError("") resp.raise_for_status() + if resp.status_code == 202: + return 202 if resp.status_code == 200: return resp except Exception as ex: @@ -129,7 +132,7 @@ def _text_api( "kl": region, "l": region, "bing_market": f"{region.split('-')[0]}-{region.split('-')[-1].upper()}", - "s": 0, + "s": "0", "df": timelimit, "vqd": vqd, "o": "json", @@ -144,10 +147,13 @@ def _text_api( payload["p"] = "1" cache = set() - for _ in range(10): + for _ in range(11): resp = self._get_url("GET", "https://links.duckduckgo.com/d.js", params=payload) if resp is None: return + if resp == 202: + payload["s"] = f"{int(payload['s']) + 50}" + continue try: page_data = resp.json().get("results", None) except Exception: @@ -200,15 +206,19 @@ def _text_html( safesearch_base = {"on": 1, "moderate": -1, "off": -2} payload = { "q": keywords, + "s": "0", "kl": region, "p": safesearch_base[safesearch.lower()], "df": timelimit, } cache: Set[str] = set() - for _ in range(10): + for _ in range(11): resp = self._get_url("POST", "https://html.duckduckgo.com/html", data=payload) if resp is None: return + if resp == 202: + payload["s"] = f"{int(payload['s']) + 50}" + continue tree = html.fromstring(resp.content) if tree.xpath('//div[@class="no-results"]/text()'): @@ -239,7 +249,6 @@ def _text_html( names = next_page.xpath('.//input[@type="hidden"]/@name') values = next_page.xpath('.//input[@type="hidden"]/@value') payload = {n: v for n, v in zip(names, values)} - # sleep(0.75) def _text_lite( self, @@ -271,10 +280,13 @@ def _text_lite( "df": timelimit, } cache: Set[str] = set() - for _ in range(10): + for _ in range(11): resp = self._get_url("POST", "https://lite.duckduckgo.com/lite/", data=payload) if resp is None: return + if resp == 202: + payload["s"] = f"{int(payload['s']) + 50}" + continue if b"No more results." in resp.content: return @@ -309,7 +321,6 @@ def _text_lite( return payload["s"] = next_page_s[0] payload["vqd"] = _extract_vqd(resp.content) - # sleep(0.75) def images( self, diff --git a/duckduckgo_search/duckduckgo_search_async.py b/duckduckgo_search/duckduckgo_search_async.py index 4816c31..cf812ac 100644 --- a/duckduckgo_search/duckduckgo_search_async.py +++ b/duckduckgo_search/duckduckgo_search_async.py @@ -30,6 +30,7 @@ def __init__(self, headers=None, proxies=None, timeout=10) -> None: headers = { "User-Agent": choice(USERAGENTS), "Accept": "application/json, text/javascript, */*; q=0.01", + "Accept-Language": "en-US,en;q=0.5", "Referer": "https://duckduckgo.com/", } self._client = httpx.AsyncClient(headers=headers, proxies=proxies, timeout=timeout, http2=True) @@ -44,9 +45,11 @@ async def _get_url(self, method: str, url: str, **kwargs) -> Optional[httpx._mod for i in range(3): try: resp = await self._client.request(method, url, follow_redirects=True, **kwargs) - if _is_500_in_url(str(resp.url)) or resp.status_code == 202: + if _is_500_in_url(str(resp.url)): raise httpx._exceptions.HTTPError("") resp.raise_for_status() + if resp.status_code == 202: + return 202 if resp.status_code == 200: return resp except Exception as ex: @@ -131,7 +134,7 @@ async def _text_api( "kl": region, "l": region, "bing_market": region, - "s": 0, + "s": "0", "df": timelimit, "vqd": vqd, "o": "json", @@ -146,10 +149,13 @@ async def _text_api( payload["p"] = "1" cache = set() - for _ in range(10): + for _ in range(11): resp = await self._get_url("GET", "https://links.duckduckgo.com/d.js", params=payload) if resp is None: return + if resp == 202: + payload["s"] = f"{int(payload['s']) + 50}" + continue try: page_data = resp.json().get("results", None) except Exception: @@ -202,15 +208,19 @@ async def _text_html( safesearch_base = {"on": 1, "moderate": -1, "off": -2} payload = { "q": keywords, + "s": "0", "kl": region, "p": safesearch_base[safesearch.lower()], "df": timelimit, } cache: Set[str] = set() - for _ in range(10): + for _ in range(11): resp = await self._get_url("POST", "https://html.duckduckgo.com/html", data=payload) if resp is None: return + if resp == 202: + payload["s"] = f"{int(payload['s']) + 50}" + continue tree = html.fromstring(resp.content) if tree.xpath('//div[@class="no-results"]/text()'): @@ -241,7 +251,6 @@ async def _text_html( names = next_page.xpath('.//input[@type="hidden"]/@name') values = next_page.xpath('.//input[@type="hidden"]/@value') payload = {n: v for n, v in zip(names, values)} - # await asyncio.sleep(0.75) async def _text_lite( self, @@ -273,10 +282,13 @@ async def _text_lite( "df": timelimit, } cache: Set[str] = set() - for _ in range(10): + for _ in range(11): resp = await self._get_url("POST", "https://lite.duckduckgo.com/lite/", data=payload) if resp is None: return + if resp == 202: + payload["s"] = f"{int(payload['s']) + 50}" + continue if b"No more results." in resp.content: return @@ -311,7 +323,6 @@ async def _text_lite( return payload["s"] = next_page_s[0] payload["vqd"] = _extract_vqd(resp.content) - # await asyncio.sleep(0.75) async def images( self, diff --git a/duckduckgo_search/version.py b/duckduckgo_search/version.py index 6ea38dc..481f569 100755 --- a/duckduckgo_search/version.py +++ b/duckduckgo_search/version.py @@ -1 +1 @@ -__version__ = "3.9.5" +__version__ = "3.9.6"