DDGS.text: deprecate backend='api'

deedy5 · Dec 26, 2024 · 3ee8e08 · 3ee8e08
1 parent 630d598
commit 3ee8e08
Show file tree

Hide file tree

Showing 3 changed files with 9 additions and 80 deletions.
diff --git a/duckduckgo_search/duckduckgo_search.py b/duckduckgo_search/duckduckgo_search.py
@@ -22,7 +22,6 @@
     _extract_vqd,
     _normalize,
     _normalize_url,
-    _text_extract_json,
     json_loads,
 )
 
@@ -221,7 +220,6 @@ def text(
             timelimit: d, w, m, y. Defaults to None.
             backend: auto, api, html, lite. Defaults to auto.
                 auto - try all backends in random order,
-                api - collect data from https://duckduckgo.com,
                 html - collect data from https://html.duckduckgo.com,
                 lite - collect data from https://lite.duckduckgo.com,
                 ecosia - collect data from https://www.ecosia.com.
@@ -235,16 +233,16 @@ def text(
             RatelimitException: Inherits from DuckDuckGoSearchException, raised for exceeding API request rate limits.
             TimeoutException: Inherits from DuckDuckGoSearchException, raised for API request timeouts.
         """
-
-        backends = ["api", "html", "lite", "ecosia"] if backend == "auto" else [backend]
+        if backend == "api":
+            warnings.warn("'api' backend is deprecated, using backend='auto'", stacklevel=2)
+            backend = "auto"
+        backends = ["html", "lite", "ecosia"] if backend == "auto" else [backend]
         shuffle(backends)
 
         results, err = [], None
         for b in backends:
             try:
-                if b == "api":
-                    results = self._text_api(keywords, region, safesearch, timelimit, max_results)
-                elif b == "html":
+                if b == "html":
                     results = self._text_html(keywords, region, timelimit, max_results)
                 elif b == "lite":
                     results = self._text_lite(keywords, region, timelimit, max_results)
@@ -257,58 +255,6 @@ def text(
 
         raise DuckDuckGoSearchException(err)
 
-    def _text_api(
-        self,
-        keywords: str,
-        region: str = "wt-wt",
-        safesearch: str = "moderate",
-        timelimit: str | None = None,
-        max_results: int | None = None,
-    ) -> list[dict[str, str]]:
-        assert keywords, "keywords is mandatory"
-
-        vqd = self._get_vqd(keywords)
-
-        payload = {
-            "q": keywords,
-            "kl": region,
-            "l": region,
-            "p": "1" if safesearch == "on" else "",
-            "s": "0",
-            "df": timelimit or "",
-            "vqd": vqd,
-            "bing_market": f"{region[3:]}-{region[:2].upper()}",
-            "ex": "-1" if safesearch == "moderate" else "-2" if safesearch == "off" else "",
-        }
-
-        cache = set()
-        results: list[dict[str, str]] = []
-
-        for _ in range(3):
-            resp_content = self._get_url("GET", "https://links.duckduckgo.com/d.js", params=payload)
-            page_data = _text_extract_json(resp_content, keywords)
-            for row in page_data:
-                href = row.get("u")
-                if href and href not in cache and href != f"http://www.google.com/search?q={keywords}":
-                    cache.add(href)
-                    body = _normalize(row["a"])
-                    if body:
-                        results.append(
-                            {
-                                "title": _normalize(row["t"]),
-                                "href": _normalize_url(href),
-                                "body": body,
-                            }
-                        )
-                        if max_results and len(results) >= max_results:
-                            return results
-                else:
-                    next_page_url = row.get("n")
-                    if not next_page_url or not max_results:
-                        return results
-                    payload["s"] = next_page_url.split("s=")[1].split("&")[0]
-        return results
-
     def _text_html(
         self,
         keywords: str,

diff --git a/duckduckgo_search/utils.py b/duckduckgo_search/utils.py
@@ -51,19 +51,6 @@ def _extract_vqd(html_bytes: bytes, keywords: str) -> str:
     raise DuckDuckGoSearchException(f"_extract_vqd() {keywords=} Could not extract vqd.")
 
 
-def _text_extract_json(html_bytes: bytes, keywords: str) -> list[dict[str, str]]:
-    """text(backend="api") -> extract json from html."""
-    try:
-        start = html_bytes.index(b"DDG.pageLayout.load('d',") + 24
-        end = html_bytes.index(b");DDG.", start)
-        data = html_bytes[start:end]
-        result: list[dict[str, str]] = json_loads(data)
-        return result
-    except Exception as ex:
-        raise DuckDuckGoSearchException(f"_text_extract_json() {keywords=} {type(ex).__name__}: {ex}") from ex
-    raise DuckDuckGoSearchException(f"_text_extract_json() {keywords=} return None")
-
-
 def _normalize(raw_html: str) -> str:
     """Strip HTML tags from the raw_html string."""
     return unescape(REGEX_STRIP_TAGS.sub("", raw_html)) if raw_html else ""

diff --git a/tests/test_duckduckgo_search.py b/tests/test_duckduckgo_search.py
@@ -21,25 +21,21 @@ def test_chat(model):
     assert  len(results) >= 1
 
 
-def test_text():
-    results = DDGS().text("cat", safesearch="off", timelimit="m", max_results=20)
-    assert 15 <= len(results) <= 20
-
-
 def test_text_html():
-    results = DDGS().text("eagle", backend="html", max_results=20)
+    results = DDGS().text("eagle", backend="html", region="br-pt", timelimit="y", max_results=20)
     assert 15 <= len(results) <= 20
 
 
 def test_text_lite():
-    results = DDGS().text("dog", backend="lite", max_results=20)
+    results = DDGS().text("dog", backend="lite", region="br-pt", timelimit="y", max_results=20)
     assert 15 <= len(results) <= 20
 
 
 def test_text_ecosia():
-    results = DDGS().text("dog", region="br-pt", safesearch="off", backend="ecosia", max_results=20)
+    results = DDGS().text("cat", backend="ecosia", region="br-pt", safesearch="off", max_results=20)
     assert 15 <= len(results) <= 20
 
+
 def test_images():
     results = DDGS().images("flower", max_results=200)
     assert 85 <= len(results) <= 200