Merge remote-tracking branch 'origin/main'

shing-yu · Oct 30, 2023 · 0c8ae9f · 0c8ae9f
2 parents f343fc6 + 7d82223
commit 0c8ae9f
Show file tree

Hide file tree

Showing 3 changed files with 178 additions and 115 deletions.
diff --git a/src/api_edition/api.py b/src/api_edition/api.py
@@ -23,7 +23,7 @@
 import multiprocessing
 import queue
 import threading
-from multiprocessing import Process
+from multiprocessing import Process, Manager
 import time
 import fanqie_api as fa
 from flask import Flask, request, jsonify
@@ -46,11 +46,18 @@ def __init__(self):
     @staticmethod
     def crawl(url):
         try:
-            # 创建一个新的进程来运行爬虫函数
-            p = Process(target=fa.fanqie_l, args=(url, 'utf-8'))
-            p.start()
-            time.sleep(2)
-            return True
+            print(f"Crawling for URL: {url}")
+            with Manager() as manager:
+                return_dict = manager.dict()
+                # 创建一个新的进程来运行爬虫函数
+                p = Process(target=fa.fanqie_l, args=(url, 'utf-8', return_dict))
+                p.start()
+                p.join()  # 等待进程结束
+                if 'error' in return_dict:
+                    print(f"Error: {return_dict['error']}")
+                    return False
+                else:
+                    return True
         except Exception as e:
             print(f"Error: {e}")
             return False
@@ -61,6 +68,7 @@ def worker(self):
             try:
                 # 从URL队列中获取URL
                 url = self.url_queue.get(timeout=1)
+                self.task_status[url] = "进行中"
                 # 调用爬虫函数爬取URL，如果出错则标记为失败并跳过这个任务进行下一个
                 if Spider.crawl(url):
                     self.task_status[url] = "已完成"
@@ -84,9 +92,9 @@ def add_url(self, url):
             if url not in self.task_status or self.task_status[url] == "失败":
                 self.url_queue.put(url)
                 self.task_status[url] = "等待中"
-                return "URL已添加到下载队列"
+                return "此书籍已添加到下载队列"
             else:
-                return "URL已存在"
+                return "此书籍已存在"
 
     def stop(self):
         # 设置运行状态为False以停止工作线程
@@ -102,9 +110,9 @@ def stop(self):
 def api():
     # 获取请求数据
     data = request.get_json()
-    # 检查请求数据是否包含'class'和'id'字段，如果没有则返回400错误
+    # 检查请求数据是否包含'class'和'id'字段，如果没有则返回418错误
     if 'class' not in data or 'id' not in data:
-        return jsonify({'error': 'Bad Request'}), 400
+        return jsonify({'error': 'I\'m a teapot' }), 418
 
     # 如果'class'字段的值为'add'，则尝试将URL添加到队列中，并返回相应的信息和位置
     if data['class'] == 'add':
@@ -122,7 +130,7 @@ def api():
         return jsonify({'exists': status is not None, 'position': position, 'status': status})
 
     else:
-        return jsonify({'error': 'Bad Request'}), 400
+        return jsonify({'error': 'I\'m a teapot'}), 418
 
 
 if __name__ == "__main__":

diff --git a/src/api_edition/call_example.html b/src/api_edition/call_example.html
@@ -1,52 +1,98 @@
 <!DOCTYPE html>
 <html>
+<head>
+<meta charset="UTF-8">
+</head>
 <body>
 
-<h2>API调用表单</h2>
+<h2>API调用示例</h2>
 
 <form id="apiForm">
-  <label for="url">URL:</label><br>
+  <label for="url">API URL:</label><br>
   <input type="text" id="url" name="url" value="http://localhost:5000/api"><br>
   <label for="class">类别:</label><br>
   <select id="class" name="class">
     <option value="add">添加</option>
     <option value="search">查询</option>
     <!-- 在这里添加更多的选项 -->
   </select><br>
-  <label for="id">ID:</label><br>
+  <label for="id">书籍ID:</label><br>
   <input type="text" id="id" name="id"><br>
   <input type="submit" value="提交">
 </form>
 
 <p id="result"></p>
+<p>此页面是番茄小说下载项目API模式的调用示例</p>
+<p>项目地址：https://github.com/xing-yv/fanqie-novel-download<br>Gitee：https://gitee.com/xingyv1024/fanqie-novel-download</p>
+<p>使用教程：<br>1.在API URL里填入你获取或自建的API地址<br>2.选择是添加下载列表还是查询下载状态<br>3.在书籍ID里填入你要下载或查询的书籍的ID（如何获取见下文）<br>4.点击“提交”，等待API服务告诉你是否添加至下载列表或者查询到的信息<br>5.获取下载的书籍（见下文）</p>
+
+<div>
+<p>如何获取书籍ID：<br>
+电脑端：<br>在书籍目录页复制网址<br>网址中“/page/”后的一串数字即为书籍ID（问号前）<br>
+手机端：<br>分享书籍并复制链接<br>链接中“book_id=”后的一串数字即为书籍ID</p>
+</div>
+
+<p>如何获取下载好的文件：<br>如果您使用别人提供的API：请向提供者咨询<br>如果您自建API，请到API程序文件夹下output文件夹中寻找</p>
+
+<footer>
+  <p>&copy; 2023 星隅 (xing-yv, shingyu). 版权所有。</p>
+
+</footer>
 
 <script>
-document.getElementById("apiForm").addEventListener("submit", function(event){
-  event.preventDefault()
-
-  var url = document.getElementById('url').value;
-
-  var data = {
-    class: document.getElementById('class').value,
-    id: document.getElementById('id').value
-  };
-
-  fetch(url, {
-    method: 'POST', 
-    headers: {
-      'Content-Type': 'application/json'
-    },
-    body: JSON.stringify(data) 
-  })
-  .then(response => response.json())
-  .then(data => {
-    document.getElementById('result').textContent = JSON.stringify(data);
-  })
-  .catch((error) => {
-    console.error('Error:', error);
-  });
-});
-</script>
+  document.getElementById("apiForm").addEventListener("submit", function(event){
+   event.preventDefault()
+
+   var url = document.getElementById('url').value;
+
+   var data = {
+     class: document.getElementById('class').value,
+     id: document.getElementById('id').value
+   };
+
+   var requestClass = data.class; // 存储请求中的class字段
+
+   console.log("发送的数据：", data);
+
+   fetch(url, {
+     method: 'POST',
+     headers: {
+       'Content-Type': 'application/json'
+     },
+     body: JSON.stringify(data)
+   })
+   .then(response => {
+     console.log("收到的响应：", response);
+     return response.json();
+   })
+   .then(data => {
+     console.log("解析后的数据：", data);
+
+     var resultText = '';
+     if (data.error) {
+        resultText = '错误：' + data.error;
+      } else if (requestClass == 'add') { // 使用存储的requestClass变量
+        resultText = '消息：' + data.message + '<br>' +
+                      (data.position ? ('位置：' + data.position + '<br>') : '') +
+                      (data.status ? ('状态：' + data.status) : '');
+      } else if (requestClass == 'search') { // 使用存储的requestClass变量
+        resultText = '存在：' + (data.exists ? '是' : '否') + '<br>' +
+                      (data.position ? ('位置：' + data.position + '<br>') : '') +
+                      (data.status ? ('状态：' + data.status) : '');
+      }
+
+
+     console.log("显示的结果：", resultText);
+
+     document.getElementById('result').innerHTML = resultText;
+   })
+   .catch((error) => {
+     console.error('Error:', error);
+   });
+ });
+
+  </script>
+
 
 </body>
 </html>
diff --git a/src/api_edition/fanqie_api.py b/src/api_edition/fanqie_api.py
@@ -32,112 +32,121 @@
 
 
 # 定义正常模式用来下载番茄小说的函数
-def fanqie_l(url, encoding):
+def fanqie_l(url, encoding, return_dict):
+    try:
+        headers = {
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0"
+        }
 
-    headers = {
-        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36 Edg/119.0.0.0"
-    }
+        # 提取书籍ID
+        book_id = re.search(r'page/(\d+)', url).group(1)
 
-    # 获取网页源码
-    response = requests.get(url, headers=headers)
-    html = response.text
+        # 获取网页源码
+        response = requests.get(url, headers=headers)
+        html = response.text
 
-    # 解析网页源码
-    soup = BeautifulSoup(html, "html.parser")
+        # 解析网页源码
+        soup = BeautifulSoup(html, "html.parser")
 
-    # 获取小说标题
-    title = soup.find("h1").get_text()
-    # , class_ = "info-name"
-    # 替换非法字符
-    title = p.rename(title)
+        # 获取小说标题
+        title = soup.find("h1").get_text()
+        # , class_ = "info-name"
+        # 替换非法字符
+        title = p.rename(title)
 
-    # 获取小说信息
-    info = soup.find("div", class_="page-header-info").get_text()
+        # 获取小说信息
+        info = soup.find("div", class_="page-header-info").get_text()
 
-    # 获取小说简介
-    intro = soup.find("div", class_="page-abstract-content").get_text()
+        # 获取小说简介
+        intro = soup.find("div", class_="page-abstract-content").get_text()
 
-    # 拼接小说内容字符串
-    content = f"""使用 @星隅(xing-yv) 所作开源工具下载
+        # 拼接小说内容字符串
+        content = f"""使用 @星隅(xing-yv) 所作开源工具下载
 开源仓库地址:https://github.com/xing-yv/fanqie-novel-download
 Gitee:https://gitee.com/xingyv1024/fanqie-novel-download/
 任何人无权限制您访问本工具，如果有向您提供代下载服务者未事先告知您工具的获取方式，请向作者举报:[email protected]
 
 {title}
 {info}
 {intro}
-"""
+    """
 
-    # 获取所有章节链接
-    chapters = soup.find_all("div", class_="chapter-item")
+        # 获取所有章节链接
+        chapters = soup.find_all("div", class_="chapter-item")
 
-    # 定义文件名
-    file_path = path.join('output', f'{title}.txt')
+        # 定义文件名
+        file_path = path.join('output', f'{title}_{book_id}.txt')
 
-    os.makedirs("output", exist_ok=True)
+        os.makedirs("output", exist_ok=True)
 
-    try:
-        # 遍历每个章节链接
-        for chapter in chapters:
-            time.sleep(0.5)
-            # 获取章节标题
-            chapter_title = chapter.find("a").get_text()
+        try:
+            # 遍历每个章节链接
+            for chapter in chapters:
+                time.sleep(0.5)
+                # 获取章节标题
+                chapter_title = chapter.find("a").get_text()
+
+                # 获取章节网址
+                chapter_url = urljoin(url, chapter.find("a")["href"])
+
+                # 获取章节 id
+                chapter_id = re.search(r"/(\d+)", chapter_url).group(1)
+
+                # 构造 api 网址
+                api_url = f"https://novel.snssdk.com/api/novel/book/reader/full/v1/?device_platform=android&parent_enterfrom=novel_channel_search.tab.&aid=2329&platform_id=1&group_id={chapter_id}&item_id={chapter_id}"
 
-            # 获取章节网址
-            chapter_url = urljoin(url, chapter.find("a")["href"])
+                # 尝试获取章节内容
+                chapter_content = None
+                retry_count = 1
+                while retry_count < 4:  # 设置最大重试次数
+                    # 获取 api 响应
+                    api_response = requests.get(api_url, headers=headers)
 
-            # 获取章节 id
-            chapter_id = re.search(r"/(\d+)", chapter_url).group(1)
+                    # 解析 api 响应为 json 数据
+                    api_data = api_response.json()
 
-            # 构造 api 网址
-            api_url = f"https://novel.snssdk.com/api/novel/book/reader/full/v1/?device_platform=android&parent_enterfrom=novel_channel_search.tab.&aid=2329&platform_id=1&group_id={chapter_id}&item_id={chapter_id}"
+                    if "data" in api_data and "content" in api_data["data"]:
+                        chapter_content = api_data["data"]["content"]
+                        break  # 如果成功获取章节内容，跳出重试循环
+                    else:
+                        retry_count += 1  # 否则重试
 
-            # 尝试获取章节内容
-            chapter_content = None
-            retry_count = 1
-            while retry_count < 4:  # 设置最大重试次数
-                # 获取 api 响应
-                api_response = requests.get(api_url, headers=headers)
+                if retry_count == 4:
+                    continue  # 重试次数过多后，跳过当前章节
 
-                # 解析 api 响应为 json 数据
-                api_data = api_response.json()
+                # 提取文章标签中的文本
+                chapter_text = re.search(r"<article>([\s\S]*?)</article>", chapter_content).group(1)
 
-                if "data" in api_data and "content" in api_data["data"]:
-                    chapter_content = api_data["data"]["content"]
-                    break  # 如果成功获取章节内容，跳出重试循环
-                else:
-                    retry_count += 1  # 否则重试
+                # 将 <p> 标签替换为换行符
+                chapter_text = re.sub(r"<p>", "\n", chapter_text)
 
-            if retry_count == 4:
-                continue  # 重试次数过多后，跳过当前章节
+                # 去除其他 html 标签
+                chapter_text = re.sub(r"</?\w+>", "", chapter_text)
 
-            # 提取文章标签中的文本
-            chapter_text = re.search(r"<article>([\s\S]*?)</article>", chapter_content).group(1)
+                chapter_text = p.fix_publisher(chapter_text)
 
-            # 将 <p> 标签替换为换行符
-            chapter_text = re.sub(r"<p>", "\n", chapter_text)
+                # 在小说内容字符串中添加章节标题和内容
+                content += f"\n\n\n{chapter_title}\n{chapter_text}"
 
-            # 去除其他 html 标签
-            chapter_text = re.sub(r"</?\w+>", "", chapter_text)
+            # 根据编码转换小说内容字符串为二进制数据
+            data = content.encode(encoding, errors='ignore')
 
-            chapter_text = p.fix_publisher(chapter_text)
+            # 保存文件
+            with open(file_path, "wb") as f:
+                f.write(data)
 
-            # 在小说内容字符串中添加章节标题和内容
-            content += f"\n\n\n{chapter_title}\n{chapter_text}"
+            pass
 
-        # 根据编码转换小说内容字符串为二进制数据
-        data = content.encode(encoding, errors='ignore')
+        except BaseException as e:
+            # 捕获所有异常，及时保存文件
+            # 根据转换小说内容字符串为二进制数据
+            data = content.encode(encoding, errors='ignore')
 
-        # 保存文件
-        with open(file_path, "wb") as f:
-            f.write(data)
+            # 保存文件
+            with open(file_path, "wb") as f:
+                f.write(data)
+            raise Exception(f"下载失败: {e}")
+            pass
 
     except BaseException as e:
-        # 捕获所有异常，及时保存文件
-        # 根据转换小说内容字符串为二进制数据
-        data = content.encode(encoding, errors='ignore')
-
-        # 保存文件
-        with open(file_path, "wb") as f:
-            f.write(data)
-        raise Exception(f"下载失败: {e}")
+        return_dict['error'] = str(e)