forked from zhayujie/chatgpt-on-wechat
-
Notifications
You must be signed in to change notification settings - Fork 238
/
Copy pathjina_sum.py
153 lines (133 loc) · 6.6 KB
/
jina_sum.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# encoding:utf-8
import json
import os
import html
from urllib.parse import urlparse
import requests
import plugins
from bridge.context import ContextType
from bridge.reply import Reply, ReplyType
from common.log import logger
from plugins import *
@plugins.register(
name="JinaSum",
desire_priority=10,
hidden=False,
enabled=False,
desc="Sum url link content with jina reader and llm",
version="0.0.1",
author="hanfangyuan",
)
class JinaSum(Plugin):
jina_reader_base = "https://r.jina.ai"
open_ai_api_base = "https://api.openai.com/v1"
open_ai_model = "gpt-3.5-turbo"
max_words = 8000
prompt = "我需要对下面引号内文档进行总结,总结输出包括以下三个部分:\n📖 一句话总结\n🔑 关键要点,用数字序号列出3-5个文章的核心内容\n🏷 标签: #xx #xx\n请使用emoji让你的表达更生动\n\n"
white_url_list = []
black_url_list = [
"https://support.weixin.qq.com", # 视频号视频
"https://channels-aladin.wxqcloud.qq.com", # 视频号音乐
]
def __init__(self):
super().__init__()
try:
self.config = super().load_config()
if not self.config:
self.config = self._load_config_template()
self.jina_reader_base = self.config.get("jina_reader_base", self.jina_reader_base)
self.open_ai_api_base = self.config.get("open_ai_api_base", self.open_ai_api_base)
self.open_ai_api_key = self.config.get("open_ai_api_key", "")
self.open_ai_model = self.config.get("open_ai_model", self.open_ai_model)
self.max_words = self.config.get("max_words", self.max_words)
self.prompt = self.config.get("prompt", self.prompt)
self.white_url_list = self.config.get("white_url_list", self.white_url_list)
self.black_url_list = self.config.get("black_url_list", self.black_url_list)
logger.info(f"[JinaSum] inited, config={self.config}")
self.handlers[Event.ON_HANDLE_CONTEXT] = self.on_handle_context
except Exception as e:
logger.error(f"[JinaSum] 初始化异常:{e}")
raise "[JinaSum] init failed, ignore "
def on_handle_context(self, e_context: EventContext, retry_count: int = 0):
try:
context = e_context["context"]
content = context.content
if context.type != ContextType.SHARING and context.type != ContextType.TEXT:
return
if not self._check_url(content):
logger.debug(f"[JinaSum] {content} is not a valid url, skip")
return
if retry_count == 0:
logger.debug("[JinaSum] on_handle_context. content: %s" % content)
reply = Reply(ReplyType.TEXT, "🎉正在为您生成总结,请稍候...")
channel = e_context["channel"]
channel.send(reply, context)
target_url = html.unescape(content) # 解决公众号卡片链接校验问题,参考 https://github.com/fatwang2/sum4all/commit/b983c49473fc55f13ba2c44e4d8b226db3517c45
jina_url = self._get_jina_url(target_url)
headers = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/126.0.0.0 Safari/537.36"}
response = requests.get(jina_url, headers=headers, timeout=60)
response.raise_for_status()
target_url_content = response.text
openai_chat_url = self._get_openai_chat_url()
openai_headers = self._get_openai_headers()
openai_payload = self._get_openai_payload(target_url_content)
logger.debug(f"[JinaSum] openai_chat_url: {openai_chat_url}, openai_headers: {openai_headers}, openai_payload: {openai_payload}")
response = requests.post(openai_chat_url, headers={**openai_headers, **headers}, json=openai_payload, timeout=60)
response.raise_for_status()
result = response.json()['choices'][0]['message']['content']
reply = Reply(ReplyType.TEXT, result)
e_context["reply"] = reply
e_context.action = EventAction.BREAK_PASS
except Exception as e:
if retry_count < 3:
logger.warning(f"[JinaSum] {str(e)}, retry {retry_count + 1}")
self.on_handle_context(e_context, retry_count + 1)
return
logger.exception(f"[JinaSum] {str(e)}")
reply = Reply(ReplyType.ERROR, "我暂时无法总结链接,请稍后再试")
e_context["reply"] = reply
e_context.action = EventAction.BREAK_PASS
def get_help_text(self, verbose, **kwargs):
return f'使用jina reader和ChatGPT总结网页链接内容'
def _load_config_template(self):
logger.debug("No Suno plugin config.json, use plugins/jina_sum/config.json.template")
try:
plugin_config_path = os.path.join(self.path, "config.json.template")
if os.path.exists(plugin_config_path):
with open(plugin_config_path, "r", encoding="utf-8") as f:
plugin_conf = json.load(f)
return plugin_conf
except Exception as e:
logger.exception(e)
def _get_jina_url(self, target_url):
return self.jina_reader_base + "/" + target_url
def _get_openai_chat_url(self):
return self.open_ai_api_base + "/chat/completions"
def _get_openai_headers(self):
return {
'Authorization': f"Bearer {self.open_ai_api_key}",
'Host': urlparse(self.open_ai_api_base).netloc
}
def _get_openai_payload(self, target_url_content):
target_url_content = target_url_content[:self.max_words] # 通过字符串长度简单进行截断
sum_prompt = f"{self.prompt}\n\n'''{target_url_content}'''"
messages = [{"role": "user", "content": sum_prompt}]
payload = {
'model': self.open_ai_model,
'messages': messages
}
return payload
def _check_url(self, target_url: str):
stripped_url = target_url.strip()
# 简单校验是否是url
if not stripped_url.startswith("http://") and not stripped_url.startswith("https://"):
return False
# 检查白名单
if len(self.white_url_list):
if not any(stripped_url.startswith(white_url) for white_url in self.white_url_list):
return False
# 排除黑名单,黑名单优先级>白名单
for black_url in self.black_url_list:
if stripped_url.startswith(black_url):
return False
return True