Merge pull request #6 from weiwei-cool/main

初步解决之前提交的格式错误issues
shing-yu · Oct 14, 2023 · 6e3e1b7 · 6e3e1b7
2 parents 3e0dcde + 200135f
commit 6e3e1b7
Show file tree

Hide file tree

Showing 4 changed files with 16 additions and 0 deletions.
diff --git a/src/fanqie_batch.py b/src/fanqie_batch.py
@@ -182,6 +182,10 @@ def download_novels(url, encoding, user_agent, path_choice, folder_path, data_fo
         # 去除其他 html 标签
         chapter_text = re.sub(r"</?\w+>", "", chapter_text)
 
+        # 去除带class的p标签
+        # TODO 格式乱码
+        chapter_text = re.sub(r'<p class=".*?">', '', chapter_text)
+
         # 在小说内容字符串中添加章节标题和内容
         content += f"\n\n\n{chapter_title}\n{chapter_text}"
 

diff --git a/src/fanqie_chapter.py b/src/fanqie_chapter.py
@@ -137,6 +137,10 @@ def fanqie_c(url, encoding, user_agent, path_choice, start_chapter_id):
         # 去除其他 html 标签
         chapter_text = re.sub(r"</?\w+>", "", chapter_text)
 
+        # 去除带class的p标签
+        # TODO 格式乱码
+        chapter_text = re.sub(r'<p class=".*?">', '', chapter_text)
+
         # 在章节内容字符串中添加章节标题和内容
         content_all = f"{chapter_title}\n{chapter_text}"
 

diff --git a/src/fanqie_debug.py b/src/fanqie_debug.py
@@ -151,6 +151,10 @@ def fanqie_d(url, encoding, user_agent, path_choice, data_folder, start_chapter_
         # 去除其他 html 标签
         chapter_text = re.sub(r"</?\w+>", "", chapter_text)
 
+        # 去除带class的p标签
+        # TODO 格式乱码
+        chapter_text = re.sub(r'<p class=".*?">', '', chapter_text)
+
         # 在小说内容字符串中添加章节标题和内容
         content += f"\n\n\n{chapter_title}\n{chapter_text}"
 

diff --git a/src/fanqie_normal.py b/src/fanqie_normal.py
@@ -130,6 +130,10 @@ def fanqie_n(url, encoding, user_agent, path_choice, data_folder, start_chapter_
         # 去除其他 html 标签
         chapter_text = re.sub(r"</?\w+>", "", chapter_text)
 
+        # 去除带class的p标签
+        # TODO 格式乱码
+        chapter_text = re.sub(r'<p class=".*?">', '', chapter_text)
+
         # 在小说内容字符串中添加章节标题和内容
         content += f"\n\n\n{chapter_title}\n{chapter_text}"