fix bug #55

CheshireCC · Dec 29, 2023 · 6fac605 · 6fac605
1 parent 1e9fd1a
commit 6fac605
Show file tree

Hide file tree

Showing 7 changed files with 52 additions and 22 deletions.
diff --git a/fasterWhisperGUIConfig.json b/fasterWhisperGUIConfig.json
@@ -8,15 +8,15 @@
     "model_param": {
         "localModel": true,
         "onlineModel": false,
-        "model_path": "F:/WhisperModels/faster-whisper/large-v3-float32",
+        "model_path": "",
         "modelName": 0,
         "use_v3_model": true,
         "device": 1,
         "deviceIndex": "0",
         "preciese": 0,
         "thread_num": "4",
         "num_worker": "1",
-        "download_root": "C:/Users/12059/.cache/huggingface/hub",
+        "download_root": "",
         "local_files_only": false
     },
     "vad_param": {
@@ -30,7 +30,7 @@
     },
     "setting": {
         "saveConfig": true,
-        "autoLoadModel": false,
+        "autoLoadModel": true,
         "language": 2,
         "huggingface_user_token": "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",
         "autoGoToOutputPage": 2,
@@ -43,7 +43,7 @@
         "best_of": "1",
         "patience": "1.0",
         "length_penalty": "1.0",
-        "temperature": "0",
+        "temperature": "0.5",
         "compression_ratio_threshold": "2.4",
         "log_prob_threshold": "-1.0",
         "no_speech_threshold": "0.6",

diff --git a/faster_whisper_GUI/UI_MainWindows.py b/faster_whisper_GUI/UI_MainWindows.py
@@ -11,8 +11,8 @@
                             )
 
 from PySide6.QtWidgets import  (
-                                # QSpacerItem,
                                 # QApplication,
+                                QSpacerItem,
                                 QWidget
                                 , QStackedWidget
                                 , QVBoxLayout
@@ -231,8 +231,8 @@ def setupUI(self):
         self.setCentralWidget(self.mainWindowsWidget)
 
         # TODO: 创建一个空对象 用于改善布局顶部
-        # self.spacer_main = QSpacerItem(0,25)
-        # self.vBoxLayout.addItem(self.spacer_main)
+        self.spacer_main = QSpacerItem(0,25)
+        self.vBoxLayout.addItem(self.spacer_main)
 
         # 设置显示图层到最后避免遮挡窗体按钮
         self.mainWindowsWidget.lower()

diff --git a/faster_whisper_GUI/fileNameListViewInterface.py b/faster_whisper_GUI/fileNameListViewInterface.py
@@ -129,7 +129,7 @@ def testFileWithAudioTrackOrNot(self, fileNames:list[str]) -> list[str]:
         for fileName in fileNames:
             cont = None
             try:
-                cont = av.open(fileName)
+                cont = av.open(fileName, metadata_errors="ignore")
             except Exception as e:
                 print(f"InvalidDataError : {fileName} \nerror:{str(e)}")
                 ignoreFile.append(fileName)
@@ -147,6 +147,9 @@ def testFileWithAudioTrackOrNot(self, fileNames:list[str]) -> list[str]:
 
                 else:
                     fileNames_.append(fileName)
+
+                cont.close()
+
         if len(ignoreFile) > 0:
             ifi = ignore_files_info(ignore_files=ignoreFile, ignore_reason=self.__tr("不包含音频流的文件将被忽略："))
             self.ignore_files_signal.emit(ifi)

diff --git a/faster_whisper_GUI/mainWindows.py b/faster_whisper_GUI/mainWindows.py
@@ -136,6 +136,7 @@ def getDownloadCacheDir(self):
                                                     , self.tr("选择缓存文件夹")
                                                     , self.page_model.LineEdit_download_root.text()
                                                 ):
+
             self.page_model.LineEdit_download_root.setText(path)
             self.download_cache_path = path
 
@@ -425,7 +426,7 @@ def transcribeProcess(self):
                 num_worker = 1
 
             # 创建进程
-            self.log.write(f"create transcribe process with {num_worker} workers")
+            self.log.write(f"create transcribe process with {num_worker} workers\n")
             self.transcribe_thread = TranscribeWorker(model = self.FasterWhisperModel
                                                     , parameters = Transcribe_params
                                                     , vad_filter = vad_filter
@@ -440,7 +441,7 @@ def transcribeProcess(self):
             self.page_process.button_process.setIcon(r":/resource/Image/Cancel_red.svg")
 
             # 启动进程
-            self.log.write(f"start transcribe process")
+            self.log.write(f"start transcribe process\n")
             # self.transcribe_thread.is_running == True
             self.transcribe_thread.start()
             self.setStateTool(self.tr("音频处理"), self.tr("正在处理中"), False)
@@ -1278,7 +1279,26 @@ def unloadWhisperModel(self):
             print("unload model failed")
             print(str(e))
             self.raiseErrorInfoBar(self.tr("卸载模型失败"), self.tr("卸载模型失败，请在转写之前禁用温度回退配置"))
-
+
+    def outputAudioPartWithSpeaker(self):
+        """
+        output audio part with speaker
+        """
+        outputWithDateTime("SegmentAudioFileWithSpeaker")
+        for segments, file_path, info in self.current_result:
+            with av.open(file_path) as av_file:
+
+                stream_ = next(s for s in av_file.streams if s.codec_context.type == 'audio')
+                # stream_.
+                audio_channel_num = stream_.channels
+                if audio_channel_num < 2:
+                    print("单声道音频")
+                    split_setore = False
+                else:
+                    print("双声道音频")
+
+
+
     def singleAndSlotProcess(self):
         """
         process single connect and others
@@ -1313,6 +1333,7 @@ def singleAndSlotProcess(self):
 
         self.page_output.tableTab.signal_delete_table.connect(self.deleteResultTableEvent)
         self.page_output.unloadWhisperModelPushbutton.clicked.connect(self.unloadWhisperModel)
+        self.page_output.outputAudioPartWithSpeakerButton.clicked.connect(self.outputAudioPartWithSpeaker)
 
         self.page_demucs.fileListView.ignore_files_signal.connect(lambda ignore_files_info: self.raiseInfoBar(self.tr("忽略文件"), ignore_files_info["ignore_reason"]+"\n"+"\n".join(ignore_files_info["ignore_files"])))
         self.page_process.fileNameListView.ignore_files_signal.connect(lambda ignore_files_info: self.raiseInfoBar(self.tr("忽略文件"), ignore_files_info["ignore_reason"]+"\n"+"\n".join(ignore_files_info["ignore_files"])))

diff --git a/faster_whisper_GUI/modelLoad.py b/faster_whisper_GUI/modelLoad.py
@@ -4,7 +4,7 @@
 from typing import (List, Optional, TypedDict, Union)
 from PySide6.QtCore import QThread, Signal
 from faster_whisper import WhisperModel
-from concurrent import futures 
+
 
 class modelParamDict(TypedDict):
     model_size_or_path: str

diff --git a/faster_whisper_GUI/outputPageNavigationInterface.py b/faster_whisper_GUI/outputPageNavigationInterface.py
@@ -54,6 +54,7 @@ def getparam(self) -> dict:
         param["outputEncoding"] = self.combox_output_code.currentIndex()
 
         return param
+
     def setupUI(self):
         # -----------------------------------------------------------------------------------------
 
@@ -68,9 +69,14 @@ def setupUI(self):
         self.WhisperXSpeakerDiarizeButton.setText(self.tr("WhisperX 说话人分离"))
         self.WhisperXSpeakerDiarizeButton.setToolTip(self.tr("speachBrain 模型声纹聚类分析，将不同语音段的不同说话人进行分离"))
 
+        self.outputAudioPartWithSpeakerButton = PushButton()
+        self.outputAudioPartWithSpeakerButton.setText(self.tr("输出音频分段"))
+        self.outputAudioPartWithSpeakerButton.setToolTip(self.tr("将音频按照说话人进行分段，并输出"))
+
         self.WhisperXHBoxLayout = QHBoxLayout()
         self.WhisperXHBoxLayout.addWidget(self.WhisperXAligmentTimeStampleButton, 0, Qt.AlignmentFlag.AlignLeft)
         self.WhisperXHBoxLayout.addWidget(self.WhisperXSpeakerDiarizeButton, 0, Qt.AlignmentFlag.AlignLeft)
+        self.WhisperXHBoxLayout.addWidget(self.outputAudioPartWithSpeakerButton, 0, Qt.AlignmentFlag.AlignLeft)
         self.WhisperXHBoxLayout.addSpacing(10)
 
         self.outputSubtitleFileButton = PushButton()

diff --git a/faster_whisper_GUI/transcribe.py b/faster_whisper_GUI/transcribe.py
@@ -211,15 +211,15 @@ def __init__(self
 
 
     def transcribe_file(self, file) -> (TranscriptionInfo, List):
-        try:
-            is_av_file = self.try_decode_avFile(file)
-            if not is_av_file:
-                return (None,None)
-        except Exception as e: # 捕获异常
-            print(f'    {file.split("/")[-1]} 不是一个有效的音视频文件\n')
-            print(f"    error:{str(e)}")
-            print(f"    ignore File : {file} \n")
-            return (None, None)
+        # try:
+        #     is_av_file = self.try_decode_avFile(file)
+        #     if not is_av_file:
+        #         return (None,None)
+        # except Exception as e: # 捕获异常
+        #     print(f'    {file.split("/")[-1]} 不是一个有效的音视频文件\n')
+        #     print(f"    error:{str(e)}")
+        #     print(f"    ignore File : {file} \n")
+        #     return (None, None)
 
         print("开始处理音频...")
         segments, info = self.model.transcribe(
@@ -303,7 +303,7 @@ def try_decode_avFile(self, file) -> bool:
         print("\n")
         print(f"current task: {file}")
         print("  尝试解析文件")
-        container = av.open(file) # 尝试打开文件      
+        container = av.open(file, metadata_errors="ignore") # 尝试打开文件      
         av_cont = container.streams
         for stream in av_cont:
             if stream.codec_context.type == "audio":