Skip to content
This repository has been archived by the owner on Mar 22, 2024. It is now read-only.

Commit

Permalink
Merge remote-tracking branch 'origin/main'
Browse files Browse the repository at this point in the history
  • Loading branch information
weiwei-cool committed Oct 30, 2023
2 parents f343fc6 + 7d82223 commit 0c8ae9f
Show file tree
Hide file tree
Showing 3 changed files with 178 additions and 115 deletions.
30 changes: 19 additions & 11 deletions src/api_edition/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
import multiprocessing
import queue
import threading
from multiprocessing import Process
from multiprocessing import Process, Manager
import time
import fanqie_api as fa
from flask import Flask, request, jsonify
Expand All @@ -46,11 +46,18 @@ def __init__(self):
@staticmethod
def crawl(url):
try:
# 创建一个新的进程来运行爬虫函数
p = Process(target=fa.fanqie_l, args=(url, 'utf-8'))
p.start()
time.sleep(2)
return True
print(f"Crawling for URL: {url}")
with Manager() as manager:
return_dict = manager.dict()
# 创建一个新的进程来运行爬虫函数
p = Process(target=fa.fanqie_l, args=(url, 'utf-8', return_dict))
p.start()
p.join() # 等待进程结束
if 'error' in return_dict:
print(f"Error: {return_dict['error']}")
return False
else:
return True
except Exception as e:
print(f"Error: {e}")
return False
Expand All @@ -61,6 +68,7 @@ def worker(self):
try:
# 从URL队列中获取URL
url = self.url_queue.get(timeout=1)
self.task_status[url] = "进行中"
# 调用爬虫函数爬取URL,如果出错则标记为失败并跳过这个任务进行下一个
if Spider.crawl(url):
self.task_status[url] = "已完成"
Expand All @@ -84,9 +92,9 @@ def add_url(self, url):
if url not in self.task_status or self.task_status[url] == "失败":
self.url_queue.put(url)
self.task_status[url] = "等待中"
return "URL已添加到下载队列"
return "此书籍已添加到下载队列"
else:
return "URL已存在"
return "此书籍已存在"

def stop(self):
# 设置运行状态为False以停止工作线程
Expand All @@ -102,9 +110,9 @@ def stop(self):
def api():
# 获取请求数据
data = request.get_json()
# 检查请求数据是否包含'class'和'id'字段,如果没有则返回400错误
# 检查请求数据是否包含'class'和'id'字段,如果没有则返回418错误
if 'class' not in data or 'id' not in data:
return jsonify({'error': 'Bad Request'}), 400
return jsonify({'error': 'I\'m a teapot' }), 418

# 如果'class'字段的值为'add',则尝试将URL添加到队列中,并返回相应的信息和位置
if data['class'] == 'add':
Expand All @@ -122,7 +130,7 @@ def api():
return jsonify({'exists': status is not None, 'position': position, 'status': status})

else:
return jsonify({'error': 'Bad Request'}), 400
return jsonify({'error': 'I\'m a teapot'}), 418


if __name__ == "__main__":
Expand Down
104 changes: 75 additions & 29 deletions src/api_edition/call_example.html
Original file line number Diff line number Diff line change
@@ -1,52 +1,98 @@
<!DOCTYPE html>
<html>
<head>
<meta charset="UTF-8">
</head>
<body>

<h2>API调用表单</h2>
<h2>API调用示例</h2>

<form id="apiForm">
<label for="url">URL:</label><br>
<label for="url">API URL:</label><br>
<input type="text" id="url" name="url" value="http://localhost:5000/api"><br>
<label for="class">类别:</label><br>
<select id="class" name="class">
<option value="add">添加</option>
<option value="search">查询</option>
<!-- 在这里添加更多的选项 -->
</select><br>
<label for="id">ID:</label><br>
<label for="id">书籍ID:</label><br>
<input type="text" id="id" name="id"><br>
<input type="submit" value="提交">
</form>

<p id="result"></p>
<p>此页面是番茄小说下载项目API模式的调用示例</p>
<p>项目地址:https://github.com/xing-yv/fanqie-novel-download<br>Gitee:https://gitee.com/xingyv1024/fanqie-novel-download</p>
<p>使用教程:<br>1.在API URL里填入你获取或自建的API地址<br>2.选择是添加下载列表还是查询下载状态<br>3.在书籍ID里填入你要下载或查询的书籍的ID(如何获取见下文)<br>4.点击“提交”,等待API服务告诉你是否添加至下载列表或者查询到的信息<br>5.获取下载的书籍(见下文)</p>

<div>
<p>如何获取书籍ID:<br>
电脑端:<br>在书籍目录页复制网址<br>网址中“/page/”后的一串数字即为书籍ID(问号前)<br>
手机端:<br>分享书籍并复制链接<br>链接中“book_id=”后的一串数字即为书籍ID</p>
</div>

<p>如何获取下载好的文件:<br>如果您使用别人提供的API:请向提供者咨询<br>如果您自建API,请到API程序文件夹下output文件夹中寻找</p>

<footer>
<p>&copy; 2023 星隅 (xing-yv, shingyu). 版权所有。</p>

</footer>

<script>
document.getElementById("apiForm").addEventListener("submit", function(event){
event.preventDefault()

var url = document.getElementById('url').value;

var data = {
class: document.getElementById('class').value,
id: document.getElementById('id').value
};

fetch(url, {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify(data)
})
.then(response => response.json())
.then(data => {
document.getElementById('result').textContent = JSON.stringify(data);
})
.catch((error) => {
console.error('Error:', error);
});
});
</script>
document.getElementById("apiForm").addEventListener("submit", function(event){
event.preventDefault()

var url = document.getElementById('url').value;

var data = {
class: document.getElementById('class').value,
id: document.getElementById('id').value
};

var requestClass = data.class; // 存储请求中的class字段

console.log("发送的数据:", data);

fetch(url, {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify(data)
})
.then(response => {
console.log("收到的响应:", response);
return response.json();
})
.then(data => {
console.log("解析后的数据:", data);

var resultText = '';
if (data.error) {
resultText = '错误:' + data.error;
} else if (requestClass == 'add') { // 使用存储的requestClass变量
resultText = '消息:' + data.message + '<br>' +
(data.position ? ('位置:' + data.position + '<br>') : '') +
(data.status ? ('状态:' + data.status) : '');
} else if (requestClass == 'search') { // 使用存储的requestClass变量
resultText = '存在:' + (data.exists ? '是' : '否') + '<br>' +
(data.position ? ('位置:' + data.position + '<br>') : '') +
(data.status ? ('状态:' + data.status) : '');
}


console.log("显示的结果:", resultText);

document.getElementById('result').innerHTML = resultText;
})
.catch((error) => {
console.error('Error:', error);
});
});

</script>


</body>
</html>
159 changes: 84 additions & 75 deletions src/api_edition/fanqie_api.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,112 +32,121 @@


# 定义正常模式用来下载番茄小说的函数
def fanqie_l(url, encoding):
def fanqie_l(url, encoding, return_dict):
try:
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0"
}

headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0"
}
# 提取书籍ID
book_id = re.search(r'page/(\d+)', url).group(1)

# 获取网页源码
response = requests.get(url, headers=headers)
html = response.text
# 获取网页源码
response = requests.get(url, headers=headers)
html = response.text

# 解析网页源码
soup = BeautifulSoup(html, "html.parser")
# 解析网页源码
soup = BeautifulSoup(html, "html.parser")

# 获取小说标题
title = soup.find("h1").get_text()
# , class_ = "info-name"
# 替换非法字符
title = p.rename(title)
# 获取小说标题
title = soup.find("h1").get_text()
# , class_ = "info-name"
# 替换非法字符
title = p.rename(title)

# 获取小说信息
info = soup.find("div", class_="page-header-info").get_text()
# 获取小说信息
info = soup.find("div", class_="page-header-info").get_text()

# 获取小说简介
intro = soup.find("div", class_="page-abstract-content").get_text()
# 获取小说简介
intro = soup.find("div", class_="page-abstract-content").get_text()

# 拼接小说内容字符串
content = f"""使用 @星隅(xing-yv) 所作开源工具下载
# 拼接小说内容字符串
content = f"""使用 @星隅(xing-yv) 所作开源工具下载
开源仓库地址:https://github.com/xing-yv/fanqie-novel-download
Gitee:https://gitee.com/xingyv1024/fanqie-novel-download/
任何人无权限制您访问本工具,如果有向您提供代下载服务者未事先告知您工具的获取方式,请向作者举报:[email protected]
{title}
{info}
{intro}
"""
"""

# 获取所有章节链接
chapters = soup.find_all("div", class_="chapter-item")
# 获取所有章节链接
chapters = soup.find_all("div", class_="chapter-item")

# 定义文件名
file_path = path.join('output', f'{title}.txt')
# 定义文件名
file_path = path.join('output', f'{title}_{book_id}.txt')

os.makedirs("output", exist_ok=True)
os.makedirs("output", exist_ok=True)

try:
# 遍历每个章节链接
for chapter in chapters:
time.sleep(0.5)
# 获取章节标题
chapter_title = chapter.find("a").get_text()
try:
# 遍历每个章节链接
for chapter in chapters:
time.sleep(0.5)
# 获取章节标题
chapter_title = chapter.find("a").get_text()

# 获取章节网址
chapter_url = urljoin(url, chapter.find("a")["href"])

# 获取章节 id
chapter_id = re.search(r"/(\d+)", chapter_url).group(1)

# 构造 api 网址
api_url = f"https://novel.snssdk.com/api/novel/book/reader/full/v1/?device_platform=android&parent_enterfrom=novel_channel_search.tab.&aid=2329&platform_id=1&group_id={chapter_id}&item_id={chapter_id}"

# 获取章节网址
chapter_url = urljoin(url, chapter.find("a")["href"])
# 尝试获取章节内容
chapter_content = None
retry_count = 1
while retry_count < 4: # 设置最大重试次数
# 获取 api 响应
api_response = requests.get(api_url, headers=headers)

# 获取章节 id
chapter_id = re.search(r"/(\d+)", chapter_url).group(1)
# 解析 api 响应为 json 数据
api_data = api_response.json()

# 构造 api 网址
api_url = f"https://novel.snssdk.com/api/novel/book/reader/full/v1/?device_platform=android&parent_enterfrom=novel_channel_search.tab.&aid=2329&platform_id=1&group_id={chapter_id}&item_id={chapter_id}"
if "data" in api_data and "content" in api_data["data"]:
chapter_content = api_data["data"]["content"]
break # 如果成功获取章节内容,跳出重试循环
else:
retry_count += 1 # 否则重试

# 尝试获取章节内容
chapter_content = None
retry_count = 1
while retry_count < 4: # 设置最大重试次数
# 获取 api 响应
api_response = requests.get(api_url, headers=headers)
if retry_count == 4:
continue # 重试次数过多后,跳过当前章节

# 解析 api 响应为 json 数据
api_data = api_response.json()
# 提取文章标签中的文本
chapter_text = re.search(r"<article>([\s\S]*?)</article>", chapter_content).group(1)

if "data" in api_data and "content" in api_data["data"]:
chapter_content = api_data["data"]["content"]
break # 如果成功获取章节内容,跳出重试循环
else:
retry_count += 1 # 否则重试
# 将 <p> 标签替换为换行符
chapter_text = re.sub(r"<p>", "\n", chapter_text)

if retry_count == 4:
continue # 重试次数过多后,跳过当前章节
# 去除其他 html 标签
chapter_text = re.sub(r"</?\w+>", "", chapter_text)

# 提取文章标签中的文本
chapter_text = re.search(r"<article>([\s\S]*?)</article>", chapter_content).group(1)
chapter_text = p.fix_publisher(chapter_text)

# 将 <p> 标签替换为换行符
chapter_text = re.sub(r"<p>", "\n", chapter_text)
# 在小说内容字符串中添加章节标题和内容
content += f"\n\n\n{chapter_title}\n{chapter_text}"

# 去除其他 html 标签
chapter_text = re.sub(r"</?\w+>", "", chapter_text)
# 根据编码转换小说内容字符串为二进制数据
data = content.encode(encoding, errors='ignore')

chapter_text = p.fix_publisher(chapter_text)
# 保存文件
with open(file_path, "wb") as f:
f.write(data)

# 在小说内容字符串中添加章节标题和内容
content += f"\n\n\n{chapter_title}\n{chapter_text}"
pass

# 根据编码转换小说内容字符串为二进制数据
data = content.encode(encoding, errors='ignore')
except BaseException as e:
# 捕获所有异常,及时保存文件
# 根据转换小说内容字符串为二进制数据
data = content.encode(encoding, errors='ignore')

# 保存文件
with open(file_path, "wb") as f:
f.write(data)
# 保存文件
with open(file_path, "wb") as f:
f.write(data)
raise Exception(f"下载失败: {e}")
pass

except BaseException as e:
# 捕获所有异常,及时保存文件
# 根据转换小说内容字符串为二进制数据
data = content.encode(encoding, errors='ignore')

# 保存文件
with open(file_path, "wb") as f:
f.write(data)
raise Exception(f"下载失败: {e}")
return_dict['error'] = str(e)

0 comments on commit 0c8ae9f

Please sign in to comment.