diff --git a/src/api_edition/api.py b/src/api_edition/api.py
index cf841f9..452ba1e 100644
--- a/src/api_edition/api.py
+++ b/src/api_edition/api.py
@@ -23,7 +23,7 @@
import multiprocessing
import queue
import threading
-from multiprocessing import Process
+from multiprocessing import Process, Manager
import time
import fanqie_api as fa
from flask import Flask, request, jsonify
@@ -46,11 +46,18 @@ def __init__(self):
@staticmethod
def crawl(url):
try:
- # 创建一个新的进程来运行爬虫函数
- p = Process(target=fa.fanqie_l, args=(url, 'utf-8'))
- p.start()
- time.sleep(2)
- return True
+ print(f"Crawling for URL: {url}")
+ with Manager() as manager:
+ return_dict = manager.dict()
+ # 创建一个新的进程来运行爬虫函数
+ p = Process(target=fa.fanqie_l, args=(url, 'utf-8', return_dict))
+ p.start()
+ p.join() # 等待进程结束
+ if 'error' in return_dict:
+ print(f"Error: {return_dict['error']}")
+ return False
+ else:
+ return True
except Exception as e:
print(f"Error: {e}")
return False
@@ -61,6 +68,7 @@ def worker(self):
try:
# 从URL队列中获取URL
url = self.url_queue.get(timeout=1)
+ self.task_status[url] = "进行中"
# 调用爬虫函数爬取URL,如果出错则标记为失败并跳过这个任务进行下一个
if Spider.crawl(url):
self.task_status[url] = "已完成"
@@ -84,9 +92,9 @@ def add_url(self, url):
if url not in self.task_status or self.task_status[url] == "失败":
self.url_queue.put(url)
self.task_status[url] = "等待中"
- return "URL已添加到下载队列"
+ return "此书籍已添加到下载队列"
else:
- return "URL已存在"
+ return "此书籍已存在"
def stop(self):
# 设置运行状态为False以停止工作线程
@@ -102,9 +110,9 @@ def stop(self):
def api():
# 获取请求数据
data = request.get_json()
- # 检查请求数据是否包含'class'和'id'字段,如果没有则返回400错误
+ # 检查请求数据是否包含'class'和'id'字段,如果没有则返回418错误
if 'class' not in data or 'id' not in data:
- return jsonify({'error': 'Bad Request'}), 400
+ return jsonify({'error': 'I\'m a teapot' }), 418
# 如果'class'字段的值为'add',则尝试将URL添加到队列中,并返回相应的信息和位置
if data['class'] == 'add':
@@ -122,7 +130,7 @@ def api():
return jsonify({'exists': status is not None, 'position': position, 'status': status})
else:
- return jsonify({'error': 'Bad Request'}), 400
+ return jsonify({'error': 'I\'m a teapot'}), 418
if __name__ == "__main__":
diff --git a/src/api_edition/call_example.html b/src/api_edition/call_example.html
index e312a7d..11564b7 100644
--- a/src/api_edition/call_example.html
+++ b/src/api_edition/call_example.html
@@ -1,11 +1,14 @@
+
+
+
-API调用表单
+API调用示例
+此页面是番茄小说下载项目API模式的调用示例
+项目地址:https://github.com/xing-yv/fanqie-novel-download
Gitee:https://gitee.com/xingyv1024/fanqie-novel-download
+使用教程:
1.在API URL里填入你获取或自建的API地址
2.选择是添加下载列表还是查询下载状态
3.在书籍ID里填入你要下载或查询的书籍的ID(如何获取见下文)
4.点击“提交”,等待API服务告诉你是否添加至下载列表或者查询到的信息
5.获取下载的书籍(见下文)
+
+
+
如何获取书籍ID:
+电脑端:
在书籍目录页复制网址
网址中“/page/”后的一串数字即为书籍ID(问号前)
+手机端:
分享书籍并复制链接
链接中“book_id=”后的一串数字即为书籍ID
+
+
+如何获取下载好的文件:
如果您使用别人提供的API:请向提供者咨询
如果您自建API,请到API程序文件夹下output文件夹中寻找
+
+
+ document.getElementById("apiForm").addEventListener("submit", function(event){
+ event.preventDefault()
+
+ var url = document.getElementById('url').value;
+
+ var data = {
+ class: document.getElementById('class').value,
+ id: document.getElementById('id').value
+ };
+
+ var requestClass = data.class; // 存储请求中的class字段
+
+ console.log("发送的数据:", data);
+
+ fetch(url, {
+ method: 'POST',
+ headers: {
+ 'Content-Type': 'application/json'
+ },
+ body: JSON.stringify(data)
+ })
+ .then(response => {
+ console.log("收到的响应:", response);
+ return response.json();
+ })
+ .then(data => {
+ console.log("解析后的数据:", data);
+
+ var resultText = '';
+ if (data.error) {
+ resultText = '错误:' + data.error;
+ } else if (requestClass == 'add') { // 使用存储的requestClass变量
+ resultText = '消息:' + data.message + '
' +
+ (data.position ? ('位置:' + data.position + '
') : '') +
+ (data.status ? ('状态:' + data.status) : '');
+ } else if (requestClass == 'search') { // 使用存储的requestClass变量
+ resultText = '存在:' + (data.exists ? '是' : '否') + '
' +
+ (data.position ? ('位置:' + data.position + '
') : '') +
+ (data.status ? ('状态:' + data.status) : '');
+ }
+
+
+ console.log("显示的结果:", resultText);
+
+ document.getElementById('result').innerHTML = resultText;
+ })
+ .catch((error) => {
+ console.error('Error:', error);
+ });
+ });
+
+
+
diff --git a/src/api_edition/fanqie_api.py b/src/api_edition/fanqie_api.py
index fcb39a4..66d97a8 100644
--- a/src/api_edition/fanqie_api.py
+++ b/src/api_edition/fanqie_api.py
@@ -32,33 +32,36 @@
# 定义正常模式用来下载番茄小说的函数
-def fanqie_l(url, encoding):
+def fanqie_l(url, encoding, return_dict):
+ try:
+ headers = {
+ "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0"
+ }
- headers = {
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0"
- }
+ # 提取书籍ID
+ book_id = re.search(r'page/(\d+)', url).group(1)
- # 获取网页源码
- response = requests.get(url, headers=headers)
- html = response.text
+ # 获取网页源码
+ response = requests.get(url, headers=headers)
+ html = response.text
- # 解析网页源码
- soup = BeautifulSoup(html, "html.parser")
+ # 解析网页源码
+ soup = BeautifulSoup(html, "html.parser")
- # 获取小说标题
- title = soup.find("h1").get_text()
- # , class_ = "info-name"
- # 替换非法字符
- title = p.rename(title)
+ # 获取小说标题
+ title = soup.find("h1").get_text()
+ # , class_ = "info-name"
+ # 替换非法字符
+ title = p.rename(title)
- # 获取小说信息
- info = soup.find("div", class_="page-header-info").get_text()
+ # 获取小说信息
+ info = soup.find("div", class_="page-header-info").get_text()
- # 获取小说简介
- intro = soup.find("div", class_="page-abstract-content").get_text()
+ # 获取小说简介
+ intro = soup.find("div", class_="page-abstract-content").get_text()
- # 拼接小说内容字符串
- content = f"""使用 @星隅(xing-yv) 所作开源工具下载
+ # 拼接小说内容字符串
+ content = f"""使用 @星隅(xing-yv) 所作开源工具下载
开源仓库地址:https://github.com/xing-yv/fanqie-novel-download
Gitee:https://gitee.com/xingyv1024/fanqie-novel-download/
任何人无权限制您访问本工具,如果有向您提供代下载服务者未事先告知您工具的获取方式,请向作者举报:xing_yv@outlook.com
@@ -66,78 +69,84 @@ def fanqie_l(url, encoding):
{title}
{info}
{intro}
-"""
+ """
- # 获取所有章节链接
- chapters = soup.find_all("div", class_="chapter-item")
+ # 获取所有章节链接
+ chapters = soup.find_all("div", class_="chapter-item")
- # 定义文件名
- file_path = path.join('output', f'{title}.txt')
+ # 定义文件名
+ file_path = path.join('output', f'{title}_{book_id}.txt')
- os.makedirs("output", exist_ok=True)
+ os.makedirs("output", exist_ok=True)
- try:
- # 遍历每个章节链接
- for chapter in chapters:
- time.sleep(0.5)
- # 获取章节标题
- chapter_title = chapter.find("a").get_text()
+ try:
+ # 遍历每个章节链接
+ for chapter in chapters:
+ time.sleep(0.5)
+ # 获取章节标题
+ chapter_title = chapter.find("a").get_text()
+
+ # 获取章节网址
+ chapter_url = urljoin(url, chapter.find("a")["href"])
+
+ # 获取章节 id
+ chapter_id = re.search(r"/(\d+)", chapter_url).group(1)
+
+ # 构造 api 网址
+ api_url = f"https://novel.snssdk.com/api/novel/book/reader/full/v1/?device_platform=android&parent_enterfrom=novel_channel_search.tab.&aid=2329&platform_id=1&group_id={chapter_id}&item_id={chapter_id}"
- # 获取章节网址
- chapter_url = urljoin(url, chapter.find("a")["href"])
+ # 尝试获取章节内容
+ chapter_content = None
+ retry_count = 1
+ while retry_count < 4: # 设置最大重试次数
+ # 获取 api 响应
+ api_response = requests.get(api_url, headers=headers)
- # 获取章节 id
- chapter_id = re.search(r"/(\d+)", chapter_url).group(1)
+ # 解析 api 响应为 json 数据
+ api_data = api_response.json()
- # 构造 api 网址
- api_url = f"https://novel.snssdk.com/api/novel/book/reader/full/v1/?device_platform=android&parent_enterfrom=novel_channel_search.tab.&aid=2329&platform_id=1&group_id={chapter_id}&item_id={chapter_id}"
+ if "data" in api_data and "content" in api_data["data"]:
+ chapter_content = api_data["data"]["content"]
+ break # 如果成功获取章节内容,跳出重试循环
+ else:
+ retry_count += 1 # 否则重试
- # 尝试获取章节内容
- chapter_content = None
- retry_count = 1
- while retry_count < 4: # 设置最大重试次数
- # 获取 api 响应
- api_response = requests.get(api_url, headers=headers)
+ if retry_count == 4:
+ continue # 重试次数过多后,跳过当前章节
- # 解析 api 响应为 json 数据
- api_data = api_response.json()
+ # 提取文章标签中的文本
+ chapter_text = re.search(r"([\s\S]*?)", chapter_content).group(1)
- if "data" in api_data and "content" in api_data["data"]:
- chapter_content = api_data["data"]["content"]
- break # 如果成功获取章节内容,跳出重试循环
- else:
- retry_count += 1 # 否则重试
+ # 将 标签替换为换行符
+ chapter_text = re.sub(r"
", "\n", chapter_text)
- if retry_count == 4:
- continue # 重试次数过多后,跳过当前章节
+ # 去除其他 html 标签
+ chapter_text = re.sub(r"?\w+>", "", chapter_text)
- # 提取文章标签中的文本
- chapter_text = re.search(r"([\s\S]*?)", chapter_content).group(1)
+ chapter_text = p.fix_publisher(chapter_text)
- # 将
标签替换为换行符
- chapter_text = re.sub(r"
", "\n", chapter_text)
+ # 在小说内容字符串中添加章节标题和内容
+ content += f"\n\n\n{chapter_title}\n{chapter_text}"
- # 去除其他 html 标签
- chapter_text = re.sub(r"?\w+>", "", chapter_text)
+ # 根据编码转换小说内容字符串为二进制数据
+ data = content.encode(encoding, errors='ignore')
- chapter_text = p.fix_publisher(chapter_text)
+ # 保存文件
+ with open(file_path, "wb") as f:
+ f.write(data)
- # 在小说内容字符串中添加章节标题和内容
- content += f"\n\n\n{chapter_title}\n{chapter_text}"
+ pass
- # 根据编码转换小说内容字符串为二进制数据
- data = content.encode(encoding, errors='ignore')
+ except BaseException as e:
+ # 捕获所有异常,及时保存文件
+ # 根据转换小说内容字符串为二进制数据
+ data = content.encode(encoding, errors='ignore')
- # 保存文件
- with open(file_path, "wb") as f:
- f.write(data)
+ # 保存文件
+ with open(file_path, "wb") as f:
+ f.write(data)
+ raise Exception(f"下载失败: {e}")
+ pass
except BaseException as e:
- # 捕获所有异常,及时保存文件
- # 根据转换小说内容字符串为二进制数据
- data = content.encode(encoding, errors='ignore')
-
- # 保存文件
- with open(file_path, "wb") as f:
- f.write(data)
- raise Exception(f"下载失败: {e}")
+ return_dict['error'] = str(e)