diff --git a/src/api_edition/api.py b/src/api_edition/api.py index cf841f9..452ba1e 100644 --- a/src/api_edition/api.py +++ b/src/api_edition/api.py @@ -23,7 +23,7 @@ import multiprocessing import queue import threading -from multiprocessing import Process +from multiprocessing import Process, Manager import time import fanqie_api as fa from flask import Flask, request, jsonify @@ -46,11 +46,18 @@ def __init__(self): @staticmethod def crawl(url): try: - # 创建一个新的进程来运行爬虫函数 - p = Process(target=fa.fanqie_l, args=(url, 'utf-8')) - p.start() - time.sleep(2) - return True + print(f"Crawling for URL: {url}") + with Manager() as manager: + return_dict = manager.dict() + # 创建一个新的进程来运行爬虫函数 + p = Process(target=fa.fanqie_l, args=(url, 'utf-8', return_dict)) + p.start() + p.join() # 等待进程结束 + if 'error' in return_dict: + print(f"Error: {return_dict['error']}") + return False + else: + return True except Exception as e: print(f"Error: {e}") return False @@ -61,6 +68,7 @@ def worker(self): try: # 从URL队列中获取URL url = self.url_queue.get(timeout=1) + self.task_status[url] = "进行中" # 调用爬虫函数爬取URL,如果出错则标记为失败并跳过这个任务进行下一个 if Spider.crawl(url): self.task_status[url] = "已完成" @@ -84,9 +92,9 @@ def add_url(self, url): if url not in self.task_status or self.task_status[url] == "失败": self.url_queue.put(url) self.task_status[url] = "等待中" - return "URL已添加到下载队列" + return "此书籍已添加到下载队列" else: - return "URL已存在" + return "此书籍已存在" def stop(self): # 设置运行状态为False以停止工作线程 @@ -102,9 +110,9 @@ def stop(self): def api(): # 获取请求数据 data = request.get_json() - # 检查请求数据是否包含'class'和'id'字段,如果没有则返回400错误 + # 检查请求数据是否包含'class'和'id'字段,如果没有则返回418错误 if 'class' not in data or 'id' not in data: - return jsonify({'error': 'Bad Request'}), 400 + return jsonify({'error': 'I\'m a teapot' }), 418 # 如果'class'字段的值为'add',则尝试将URL添加到队列中,并返回相应的信息和位置 if data['class'] == 'add': @@ -122,7 +130,7 @@ def api(): return jsonify({'exists': status is not None, 'position': position, 'status': status}) else: - return jsonify({'error': 'Bad Request'}), 400 + return jsonify({'error': 'I\'m a teapot'}), 418 if __name__ == "__main__": diff --git a/src/api_edition/call_example.html b/src/api_edition/call_example.html index e312a7d..11564b7 100644 --- a/src/api_edition/call_example.html +++ b/src/api_edition/call_example.html @@ -1,11 +1,14 @@ + + + -

API调用表单

+

API调用示例

-
+



-
+

+

此页面是番茄小说下载项目API模式的调用示例

+

项目地址:https://github.com/xing-yv/fanqie-novel-download
Gitee:https://gitee.com/xingyv1024/fanqie-novel-download

+

使用教程:
1.在API URL里填入你获取或自建的API地址
2.选择是添加下载列表还是查询下载状态
3.在书籍ID里填入你要下载或查询的书籍的ID(如何获取见下文)
4.点击“提交”,等待API服务告诉你是否添加至下载列表或者查询到的信息
5.获取下载的书籍(见下文)

+ +
+

如何获取书籍ID:
+电脑端:
在书籍目录页复制网址
网址中“/page/”后的一串数字即为书籍ID(问号前)
+手机端:
分享书籍并复制链接
链接中“book_id=”后的一串数字即为书籍ID

+
+ +

如何获取下载好的文件:
如果您使用别人提供的API:请向提供者咨询
如果您自建API,请到API程序文件夹下output文件夹中寻找

+ + + document.getElementById("apiForm").addEventListener("submit", function(event){ + event.preventDefault() + + var url = document.getElementById('url').value; + + var data = { + class: document.getElementById('class').value, + id: document.getElementById('id').value + }; + + var requestClass = data.class; // 存储请求中的class字段 + + console.log("发送的数据:", data); + + fetch(url, { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify(data) + }) + .then(response => { + console.log("收到的响应:", response); + return response.json(); + }) + .then(data => { + console.log("解析后的数据:", data); + + var resultText = ''; + if (data.error) { + resultText = '错误:' + data.error; + } else if (requestClass == 'add') { // 使用存储的requestClass变量 + resultText = '消息:' + data.message + '
' + + (data.position ? ('位置:' + data.position + '
') : '') + + (data.status ? ('状态:' + data.status) : ''); + } else if (requestClass == 'search') { // 使用存储的requestClass变量 + resultText = '存在:' + (data.exists ? '是' : '否') + '
' + + (data.position ? ('位置:' + data.position + '
') : '') + + (data.status ? ('状态:' + data.status) : ''); + } + + + console.log("显示的结果:", resultText); + + document.getElementById('result').innerHTML = resultText; + }) + .catch((error) => { + console.error('Error:', error); + }); + }); + + + diff --git a/src/api_edition/fanqie_api.py b/src/api_edition/fanqie_api.py index fcb39a4..66d97a8 100644 --- a/src/api_edition/fanqie_api.py +++ b/src/api_edition/fanqie_api.py @@ -32,33 +32,36 @@ # 定义正常模式用来下载番茄小说的函数 -def fanqie_l(url, encoding): +def fanqie_l(url, encoding, return_dict): + try: + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0" + } - headers = { - "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0" - } + # 提取书籍ID + book_id = re.search(r'page/(\d+)', url).group(1) - # 获取网页源码 - response = requests.get(url, headers=headers) - html = response.text + # 获取网页源码 + response = requests.get(url, headers=headers) + html = response.text - # 解析网页源码 - soup = BeautifulSoup(html, "html.parser") + # 解析网页源码 + soup = BeautifulSoup(html, "html.parser") - # 获取小说标题 - title = soup.find("h1").get_text() - # , class_ = "info-name" - # 替换非法字符 - title = p.rename(title) + # 获取小说标题 + title = soup.find("h1").get_text() + # , class_ = "info-name" + # 替换非法字符 + title = p.rename(title) - # 获取小说信息 - info = soup.find("div", class_="page-header-info").get_text() + # 获取小说信息 + info = soup.find("div", class_="page-header-info").get_text() - # 获取小说简介 - intro = soup.find("div", class_="page-abstract-content").get_text() + # 获取小说简介 + intro = soup.find("div", class_="page-abstract-content").get_text() - # 拼接小说内容字符串 - content = f"""使用 @星隅(xing-yv) 所作开源工具下载 + # 拼接小说内容字符串 + content = f"""使用 @星隅(xing-yv) 所作开源工具下载 开源仓库地址:https://github.com/xing-yv/fanqie-novel-download Gitee:https://gitee.com/xingyv1024/fanqie-novel-download/ 任何人无权限制您访问本工具,如果有向您提供代下载服务者未事先告知您工具的获取方式,请向作者举报:xing_yv@outlook.com @@ -66,78 +69,84 @@ def fanqie_l(url, encoding): {title} {info} {intro} -""" + """ - # 获取所有章节链接 - chapters = soup.find_all("div", class_="chapter-item") + # 获取所有章节链接 + chapters = soup.find_all("div", class_="chapter-item") - # 定义文件名 - file_path = path.join('output', f'{title}.txt') + # 定义文件名 + file_path = path.join('output', f'{title}_{book_id}.txt') - os.makedirs("output", exist_ok=True) + os.makedirs("output", exist_ok=True) - try: - # 遍历每个章节链接 - for chapter in chapters: - time.sleep(0.5) - # 获取章节标题 - chapter_title = chapter.find("a").get_text() + try: + # 遍历每个章节链接 + for chapter in chapters: + time.sleep(0.5) + # 获取章节标题 + chapter_title = chapter.find("a").get_text() + + # 获取章节网址 + chapter_url = urljoin(url, chapter.find("a")["href"]) + + # 获取章节 id + chapter_id = re.search(r"/(\d+)", chapter_url).group(1) + + # 构造 api 网址 + api_url = f"https://novel.snssdk.com/api/novel/book/reader/full/v1/?device_platform=android&parent_enterfrom=novel_channel_search.tab.&aid=2329&platform_id=1&group_id={chapter_id}&item_id={chapter_id}" - # 获取章节网址 - chapter_url = urljoin(url, chapter.find("a")["href"]) + # 尝试获取章节内容 + chapter_content = None + retry_count = 1 + while retry_count < 4: # 设置最大重试次数 + # 获取 api 响应 + api_response = requests.get(api_url, headers=headers) - # 获取章节 id - chapter_id = re.search(r"/(\d+)", chapter_url).group(1) + # 解析 api 响应为 json 数据 + api_data = api_response.json() - # 构造 api 网址 - api_url = f"https://novel.snssdk.com/api/novel/book/reader/full/v1/?device_platform=android&parent_enterfrom=novel_channel_search.tab.&aid=2329&platform_id=1&group_id={chapter_id}&item_id={chapter_id}" + if "data" in api_data and "content" in api_data["data"]: + chapter_content = api_data["data"]["content"] + break # 如果成功获取章节内容,跳出重试循环 + else: + retry_count += 1 # 否则重试 - # 尝试获取章节内容 - chapter_content = None - retry_count = 1 - while retry_count < 4: # 设置最大重试次数 - # 获取 api 响应 - api_response = requests.get(api_url, headers=headers) + if retry_count == 4: + continue # 重试次数过多后,跳过当前章节 - # 解析 api 响应为 json 数据 - api_data = api_response.json() + # 提取文章标签中的文本 + chapter_text = re.search(r"
([\s\S]*?)
", chapter_content).group(1) - if "data" in api_data and "content" in api_data["data"]: - chapter_content = api_data["data"]["content"] - break # 如果成功获取章节内容,跳出重试循环 - else: - retry_count += 1 # 否则重试 + # 将

标签替换为换行符 + chapter_text = re.sub(r"

", "\n", chapter_text) - if retry_count == 4: - continue # 重试次数过多后,跳过当前章节 + # 去除其他 html 标签 + chapter_text = re.sub(r"", "", chapter_text) - # 提取文章标签中的文本 - chapter_text = re.search(r"

([\s\S]*?)
", chapter_content).group(1) + chapter_text = p.fix_publisher(chapter_text) - # 将

标签替换为换行符 - chapter_text = re.sub(r"

", "\n", chapter_text) + # 在小说内容字符串中添加章节标题和内容 + content += f"\n\n\n{chapter_title}\n{chapter_text}" - # 去除其他 html 标签 - chapter_text = re.sub(r"", "", chapter_text) + # 根据编码转换小说内容字符串为二进制数据 + data = content.encode(encoding, errors='ignore') - chapter_text = p.fix_publisher(chapter_text) + # 保存文件 + with open(file_path, "wb") as f: + f.write(data) - # 在小说内容字符串中添加章节标题和内容 - content += f"\n\n\n{chapter_title}\n{chapter_text}" + pass - # 根据编码转换小说内容字符串为二进制数据 - data = content.encode(encoding, errors='ignore') + except BaseException as e: + # 捕获所有异常,及时保存文件 + # 根据转换小说内容字符串为二进制数据 + data = content.encode(encoding, errors='ignore') - # 保存文件 - with open(file_path, "wb") as f: - f.write(data) + # 保存文件 + with open(file_path, "wb") as f: + f.write(data) + raise Exception(f"下载失败: {e}") + pass except BaseException as e: - # 捕获所有异常,及时保存文件 - # 根据转换小说内容字符串为二进制数据 - data = content.encode(encoding, errors='ignore') - - # 保存文件 - with open(file_path, "wb") as f: - f.write(data) - raise Exception(f"下载失败: {e}") + return_dict['error'] = str(e)