Skip to content

Commit

Permalink
feat: integrate jina sum plugin (#16)
Browse files Browse the repository at this point in the history
  • Loading branch information
hanfangyuan4396 authored Apr 19, 2024
1 parent a4cbeca commit e26eba4
Show file tree
Hide file tree
Showing 7 changed files with 183 additions and 0 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -30,4 +30,5 @@ plugins/banwords/lib/__pycache__
!plugins/role
!plugins/keyword
!plugins/linkai
!plugins/jina_sum
client_config.json
2 changes: 2 additions & 0 deletions plugins/jina_sum/.gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
__pycache__/
config.json
21 changes: 21 additions & 0 deletions plugins/jina_sum/LICENSE
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
MIT License

Copyright (c) 2024 Han Fangyuan

Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:

The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.

THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.
20 changes: 20 additions & 0 deletions plugins/jina_sum/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
# jina_sumary
ChatGPT on WeChat项目插件, 使用jina reader和ChatGPT总结网页链接内容

支持总结公众号、小红书、csdn等分享卡片链接(有的卡片链接会触发验证,一般直链没有此问题)

![wechat_mp](./docs/images/wechat_mp.jpg)
![red](./docs/images/red.jpg)
![csdn](./docs/images/csdn.jpg)

config.json 配置说明
```bash
{
"jina_reader_base": "https://r.jina.ai", # jina reader链接,默认为https://r.jina.ai
"open_ai_api_base": "https://api.openai.com/v1", # chatgpt chat url
"open_ai_api_key": "sk-xxx", # chatgpt api key
"open_ai_model": "gpt-3.5-turbo", # chatgpt model
"max_words": 8000, # 网页链接内容的最大字数,防止超过最大输入token,使用字符串长度简单计数
"prompt": "我需要对下面的文本进行总结,总结输出包括以下三个部分:\n📖 一句话总结\n🔑 关键要点,用数字序号列出3-5个文章的核心内容\n🏷 标签: #xx #xx\n请使用emoji让你的表达更生动。" # 链接内容总结提示词
}
```
1 change: 1 addition & 0 deletions plugins/jina_sum/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
from .jina_sum import *
8 changes: 8 additions & 0 deletions plugins/jina_sum/config.json.template
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
{
"jina_reader_base": "https://r.jina.ai",
"open_ai_api_base": "https://api.openai.com/v1",
"open_ai_api_key": "sk-xxx",
"open_ai_model": "gpt-3.5-turbo",
"max_words": 8000,
"prompt": "我需要对下面的文本进行总结,总结输出包括以下三个部分:\n📖 一句话总结\n🔑 关键要点,用数字序号列出3-5个文章的核心内容\n🏷 标签: #xx #xx\n请使用emoji让你的表达更生动。"
}
130 changes: 130 additions & 0 deletions plugins/jina_sum/jina_sum.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
# encoding:utf-8
import json
import os
import html
from urllib.parse import urlparse

import requests

import plugins
from bridge.context import ContextType
from bridge.reply import Reply, ReplyType
from common.log import logger
from plugins import *

@plugins.register(
name="JinaSum",
desire_priority=10,
hidden=False,
desc="Sum url link content with jina reader and llm",
version="v0.0.1",
author="hanfangyuan",
)
class JinaSum(Plugin):

jina_reader_base = "https://r.jina.ai"
open_ai_api_base = "https://api.openai.com/v1"
open_ai_model = "gpt-3.5-turbo"
max_words = 8000
prompt = "我需要对下面引号内文档进行总结,总结输出包括以下三个部分:\n📖 一句话总结\n🔑 关键要点,用数字序号列出3-5个文章的核心内容\n🏷 标签: #xx #xx\n请使用emoji让你的表达更生动\n\n"

def __init__(self):
super().__init__()
try:
self.config = super().load_config()
if not self.config:
self.config = self._load_config_template()
self.jina_reader_base = self.config.get("jina_reader_base", self.jina_reader_base)
self.open_ai_api_base = self.config.get("open_ai_api_base", self.open_ai_api_base)
self.open_ai_api_key = self.config.get("open_ai_api_key", "")
self.open_ai_model = self.config.get("open_ai_model", self.open_ai_model)
self.max_words = self.config.get("max_words", self.max_words)
self.prompt = self.config.get("prompt", self.prompt)
logger.info(f"[JinaSum] inited, config={self.config}")
self.handlers[Event.ON_HANDLE_CONTEXT] = self.on_handle_context
except Exception as e:
logger.error(f"[JinaSum] 初始化异常:{e}")
raise "[JinaSum] init failed, ignore "

def on_handle_context(self, e_context: EventContext, retry_count: int = 0):
try:
context = e_context["context"]
content = context.content
if context.type != ContextType.SHARING and context.type != ContextType.TEXT:
return
if not self._check_url(content):
logger.debug(f"[JinaSum] {content} not a url, skip")
return
if retry_count == 0:
logger.debug("[JinaSum] on_handle_context. content: %s" % content)
reply = Reply(ReplyType.TEXT, "🎉正在为您生成总结,请稍候...")
channel = e_context["channel"]
channel.send(reply, context)

target_url = html.unescape(content) # 解决公众号卡片链接校验问题,参考 https://github.com/fatwang2/sum4all/commit/b983c49473fc55f13ba2c44e4d8b226db3517c45
jina_url = self._get_jina_url(target_url)
response = requests.get(jina_url, timeout=60)
response.raise_for_status()
target_url_content = response.text

openai_chat_url = self._get_openai_chat_url()
openai_headers = self._get_openai_headers()
openai_payload = self._get_openai_payload(target_url_content)
logger.debug(f"[JinaSum] openai_chat_url: {openai_chat_url}, openai_headers: {openai_headers}, openai_payload: {openai_payload}")
response = requests.post(openai_chat_url, headers=openai_headers, json=openai_payload, timeout=60)
response.raise_for_status()
result = response.json()['choices'][0]['message']['content']
reply = Reply(ReplyType.TEXT, result)
e_context["reply"] = reply
e_context.action = EventAction.BREAK_PASS

except Exception as e:
if retry_count < 3:
logger.warning(f"[JinaSum] {str(e)}, retry {retry_count + 1}")
self.on_handle_context(e_context, retry_count + 1)
return

logger.exception(f"[JinaSum] {str(e)}")
reply = Reply(ReplyType.ERROR, "我暂时无法总结链接,请稍后再试")
e_context["reply"] = reply
e_context.action = EventAction.BREAK_PASS

def get_help_text(self, verbose, **kwargs):
return f'使用jina reader和ChatGPT总结网页链接内容'

def _load_config_template(self):
logger.debug("No Suno plugin config.json, use plugins/jina_sum/config.json.template")
try:
plugin_config_path = os.path.join(self.path, "config.json.template")
if os.path.exists(plugin_config_path):
with open(plugin_config_path, "r", encoding="utf-8") as f:
plugin_conf = json.load(f)
return plugin_conf
except Exception as e:
logger.exception(e)

def _get_jina_url(self, target_url):
return self.jina_reader_base + "/" + target_url

def _get_openai_chat_url(self):
return self.open_ai_api_base + "/chat/completions"

def _get_openai_headers(self):
return {
'Authorization': f"Bearer {self.open_ai_api_key}",
'Host': urlparse(self.open_ai_api_base).netloc
}

def _get_openai_payload(self, target_url_content):
target_url_content = target_url_content[:self.max_words] # 通过字符串长度简单进行截断
sum_prompt = f"{self.prompt}\n\n'''{target_url_content}'''"
messages = [{"role": "user", "content": sum_prompt}]
payload = {
'model': self.open_ai_model,
'messages': messages
}
return payload

def _check_url(self, target_url: str):
# 简单校验是否是url
return target_url.strip().startswith("http://") or target_url.strip().startswith("https://")

0 comments on commit e26eba4

Please sign in to comment.