-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathjina_reader.py
65 lines (61 loc) · 2.04 KB
/
jina_reader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
import requests
from typing import Optional, Dict, Any, List
class JinaReader:
def __init__(self, api_key: str):
if not api_key:
raise ValueError("Jina API key is required")
self.api_key = api_key
self.base_url = "https://r.jina.ai"
self.headers = {
"Authorization": f"Bearer {api_key}",
"Content-Type": "application/json"
}
def read_url(self, url: str) -> Dict[str, Any]:
"""
Extract content from a URL using Jina Reader API
"""
try:
response = requests.get(
f"{self.base_url}/read",
params={"url": url},
headers=self.headers
)
response.raise_for_status()
return response.json()
except requests.RequestException as e:
print(f"Error reading URL: {e}")
return {}
def summarize(self, text: str, max_length: Optional[int] = None) -> str:
"""
Summarize text content using Jina Reader API
"""
try:
data = {
"text": text,
"max_length": max_length
}
response = requests.post(
f"{self.base_url}/summarize",
json=data,
headers=self.headers
)
response.raise_for_status()
return response.json().get("summary", "")
except requests.RequestException as e:
print(f"Error summarizing text: {e}")
return ""
def extract_images(self, url: str) -> List[Dict[str, str]]:
"""
Extract images with captions from a URL
"""
try:
response = requests.get(
f"{self.base_url}/images",
params={"url": url},
headers=self.headers
)
response.raise_for_status()
return response.json().get("images", [])
except requests.RequestException as e:
print(f"Error extracting images: {e}")
return []