0.8.5

CheshireCC · Dec 6, 2024 · dc06f25 · dc06f25
1 parent 7ef7369
commit dc06f25
Show file tree

Hide file tree

Showing 12 changed files with 4,084 additions and 3,944 deletions.
diff --git a/en.ts b/en.ts
diff --git a/fasterWhisperGUIConfig.json b/fasterWhisperGUIConfig.json
@@ -8,12 +8,12 @@
     "model_param": {
         "localModel": true,
         "onlineModel": false,
-        "model_path": "F:/WhisperModels/faster-whisper/whisper-large-v2-ct2-32",
+        "model_path": "F:/WhisperModels/faster-whisper/large-v3-turbo",
         "modelName": 11,
-        "use_v3_model": false,
+        "use_v3_model": true,
         "device": 1,
         "deviceIndex": "0",
-        "preciese": 5,
+        "preciese": 4,
         "thread_num": "4",
         "num_worker": "1",
         "download_root": "C:/Users/12059/.cache/huggingface/hub",
@@ -22,24 +22,25 @@
     "vad_param": {
         "use_VAD": true,
         "threshold": 0.2,
-        "minSpeechDuration": "250",
+        "minSpeechDuration": "0",
         "minSilenceDuration": "2000",
         "maxSpeechDuration": "inf",
         "speechPad": "400"
     },
     "setting": {
         "saveConfig": true,
         "autoLoadModel": false,
-        "language": 2,
+        "language": 0,
         "huggingface_user_token": "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",
         "autoGoToOutputPage": 2,
         "autoClearTempFiles": true,
         "themeColor": "#e64ecf"
     },
     "Transcription_param": {
         "aggregate_contents": true,
-        "language": -1,
+        "language": 0,
         "task": false,
+        "multilingual": false,
         "beam_size": "1",
         "best_of": "5",
         "patience": "1.0",
@@ -67,7 +68,7 @@
         "clip_timestamps": "",
         "hallucination_silence_threshold": "0.5",
         "hotwords": "",
-        "language_detection_threshold": "",
+        "language_detection_threshold": "0.5",
         "language_detection_segments": "1"
     },
     "output_whisperX": {

diff --git a/faster_whisper_GUI/aboutPageNavigationInterface.py b/faster_whisper_GUI/aboutPageNavigationInterface.py
@@ -94,6 +94,10 @@ def __init__(self, parent = None) -> None:
         - 侮辱或诽谤他人，侵害他人合法权益的。
         - 含有法律、行政法规禁止的其他内容的。
     · 因您的数据的产生、收集、处理、使用等任何相关事项存在违反法律法规等情况而造成的全部后果及责任均由您自行承担。
+    · 本软件不保证其服务不会中断，对服务的及时性、安全性、准确性也都不作保证。
+    · 本软件不对任何因使用本软件而可能遭致的意外或损失承担责任。
+    · 本软件所提供的任何信息、内容、材料、产品和服务均不构成任何投资建议。您应自行独立判断并承担可能的风险。
+    · 本软件完全免费，您不应该直接将本软件安装包本体用于商业售卖。
         """)
 
         self.setupUI()

diff --git a/faster_whisper_GUI/config.py b/faster_whisper_GUI/config.py
@@ -127,6 +127,7 @@
                 "large-v1", 
                 "large-v2",
                 "large-v3",
+                "large-v3-turbo",
                 "distil-large-v3",
                 "distil-large-v2",
                 "distil-medium.en",

diff --git a/faster_whisper_GUI/mainWindows.py b/faster_whisper_GUI/mainWindows.py
@@ -83,8 +83,11 @@ def flush( self ):
     def fileno( self ):
         return -1
     def write( self, text ):
-        if ( not self.signalsBlocked() ):
-            self.outputSignal.emit(str(text))
+        try:
+            if ( not self.signalsBlocked() ):
+                self.outputSignal.emit(str(text))
+        except:
+            pass
 
 class statusToolsSignalStore(QObject):
     StateToolSignal = Signal(bool)
@@ -697,6 +700,8 @@ def getParamTranscribe(self) -> dict:
         # task = Task_list[int(task)]
         Transcribe_params["task"] = task
 
+        Transcribe_params["log_progress"] = False
+
         beam_size = int(self.page_transcribes.LineEdit_beam_size.text().replace(" ", ""))
         Transcribe_params["beam_size"] = beam_size
 
@@ -768,6 +773,9 @@ def getParamTranscribe(self) -> dict:
         append_punctuations = self.page_transcribes.LineEdit_append_punctuations.text().replace(" ","")
         Transcribe_params["append_punctuations"] = append_punctuations
 
+        multilingual = self.page_transcribes.switchButton_multilingual.isChecked()
+        Transcribe_params["multilingual"] = multilingual
+
         repetition_penalty = self.page_transcribes.LineEdit_repetition_penalty.text().strip()
         repetition_penalty = float(repetition_penalty)
         Transcribe_params['repetition_penalty'] = repetition_penalty  
@@ -795,7 +803,7 @@ def getParamTranscribe(self) -> dict:
         chunk_length = self.page_transcribes.LineEdit_chunk_length.text().strip()
         if chunk_length != "":
             if chunk_length.isdigit():
-                chunk_length = float(chunk_length)
+                chunk_length = int(chunk_length)
             else:
                 chunk_length = None
         else :
@@ -862,15 +870,15 @@ def getVADparam(self) -> dict:
         if not vad_filter:
             return VAD_param
 
-        threshold = round(self.page_VAD.doubleSpin_VAD_param_threshold.value(),2)
+        onset = round(self.page_VAD.doubleSpin_VAD_param_threshold.value(),2)
         min_speech_duration_ms = int(self.page_VAD.LineEdit_VAD_param_min_speech_duration_ms.text().replace(" ", ""))
         max_speech_duration_s = float(self.page_VAD.LineEdit_VAD_param_max_speech_duration_s.text().replace(" ", ""))
         min_silence_duration_ms = int(self.page_VAD.LineEdit_VAD_param_min_silence_duration_ms.text().replace(" ", ""))
         # window_size_samples = int(self.page_VAD.combox_VAD_param_window_size_samples.currentText())
         speech_pad_ms = int(self.page_VAD.LineEdit_VAD_param_speech_pad_ms.text().replace(" ", ""))
 
         VAD_param["param"] = VADParameters()
-        VAD_param["param"]["threshold"] = threshold
+        VAD_param["param"]["onset"] = onset
         VAD_param["param"]["min_speech_duration_ms"] = min_speech_duration_ms
         VAD_param["param"]["max_speech_duration_s"] = max_speech_duration_s
         VAD_param["param"]["min_silence_duration_ms"] = min_silence_duration_ms

diff --git a/faster_whisper_GUI/tranccribePageNavigationInterface.py b/faster_whisper_GUI/tranccribePageNavigationInterface.py
@@ -150,6 +150,7 @@ def setupUI(self):
 
         # --------------------------------------------------------------------------------------------
         self.LineEdit_language_detection_threshold = LineEdit()
+        self.LineEdit_language_detection_threshold.setText("0.5")
         self.language_detection_threshold_param_widget = ParamWidget(
                                                                         self.__tr("语言检测阈值"),
                                                                         self.__tr("自动检测音频时，语言检测的阈值。如果某种语言的最大概率高于此值，则会检测为该语言。"),
@@ -178,6 +179,17 @@ def setupUI(self):
                                             )
         widget_list.append(self.task_param_widget)
 
+        # --------------------------------------------------------------------------------------------
+
+        self.switchButton_multilingual = SwitchButton()
+        self.switchButton_multilingual.setChecked(False)
+
+        self.multilingual_param_widget = ParamWidget(self.__tr("多语言模式"),
+                                                self.__tr("多语言模式，允许模型处理包含多种语言的音频。"),
+                                                self.switchButton_multilingual
+                                            )
+        widget_list.append(self.multilingual_param_widget)
+
         # --------------------------------------------------------------------------------------------
         self.switchButton_without_timestamps = SwitchButton()
         self.switchButton_without_timestamps.setChecked(False)
@@ -668,7 +680,8 @@ def setParam(self, Transcribe_params:dict) -> None:
             self.lineEdit_hallucination_silence_threshold.setText(Transcribe_params["hallucination_silence_threshold"])
             self.LineEdit_hotwords.setText(Transcribe_params["hotwords"])
             self.LineEdit_language_detection_threshold.setText(Transcribe_params["language_detection_threshold"])
-            self.language_detection_segments_param_widget.setText(Transcribe_params["language_detection_segments"])
+            self.lienEdit_language_detection_segments.setText(Transcribe_params["language_detection_segments"])
+            self.switchButton_multilingual.setChecked(Transcribe_params["multilingual"])
         except:
             pass
 
@@ -687,6 +700,9 @@ def getParam(self) -> dict:
         # task = STR_BOOL[task]
         Transcribe_params["task"] = task
 
+        multilingual = self.switchButton_multilingual.isChecked()
+        Transcribe_params["multilingual"] = multilingual
+
         beam_size = self.LineEdit_beam_size.text().replace(" ", "")
         Transcribe_params["beam_size"] = beam_size
 
@@ -779,6 +795,5 @@ def getParam(self) -> dict:
         Transcribe_params["language_detection_threshold"] = self.LineEdit_language_detection_threshold.text().strip()
         Transcribe_params["language_detection_segments"] = self.lienEdit_language_detection_segments.text().strip()
 
-
         return Transcribe_params
 
diff --git a/faster_whisper_GUI/transcribe.py b/faster_whisper_GUI/transcribe.py
@@ -232,6 +232,7 @@ def transcribe_file(self, file) -> (TranscriptionInfo, List): # type: ignore
                                                 audio=file,
                                                 language=self.parameters["language"],
                                                 task=Task_list[int(self.parameters["task"])],
+                                                log_progress = False,
                                                 beam_size=self.parameters["beam_size"],
                                                 best_of=self.parameters["best_of"],
                                                 patience=self.parameters["patience"],
@@ -253,6 +254,7 @@ def transcribe_file(self, file) -> (TranscriptionInfo, List): # type: ignore
                                                 word_timestamps=self.parameters["word_timestamps"],
                                                 prepend_punctuations=self.parameters["prepend_punctuations"],
                                                 append_punctuations=self.parameters["append_punctuations"],
+                                                multilingual = self.parameters["multilingual"],
                                                 max_new_tokens=self.parameters["max_new_tokens"],
                                                 chunk_length=self.parameters["chunk_length"],
                                                 clip_timestamps=self.parameters["clip_timestamps"],
@@ -385,7 +387,7 @@ def run(self) -> None:
                 # 保存临时文件
                 temp_output_save_file = getSaveFileName(audioFile=path, format="SRT", rootDir=r"./temp")
                 writeSubtitles(temp_output_save_file, segments=segments, format="SRT",language=info.language, fileName=path)
-                print(f"save temp file: {temp_output_save_file}")
+                print(f"save temp file: {os.path.abspath(temp_output_save_file)}")
 
         if torch.cuda.is_available():
             torch.cuda.empty_cache()
@@ -427,9 +429,9 @@ def writeSubtitles(outputFileName:str,
     elif format == "ASS":
         writeASS(outputFileName, segments, file_code=file_code)
 
-    print(f"write over | {outputFileName}")
+    print(f"write over | {os.path.abspath(outputFileName)}")
 
-def writeJson(fileName:str,segments:List[segment_Transcribe],language:str,avFile="",file_code="utf8"):
+def writeJson(fileName:str, segments:List[segment_Transcribe], language:str,avFile="", file_code="utf8"):
 
     _id = getMd5HashId(avFile, file_code=file_code)
 

diff --git a/faster_whisper_GUI/util.py b/faster_whisper_GUI/util.py
@@ -15,6 +15,7 @@ class VADParameters(TypedDict):
 class WhisperParameters(TypedDict):
     language:str = ""
     task:str = "transcribe"
+    log_progress: bool = False
     beam_size:int = 5
     best_of:int = 5
     patience:float = 0.0
@@ -36,6 +37,7 @@ class WhisperParameters(TypedDict):
     word_timestamps:bool = False
     prepend_punctuations:str = ""
     append_punctuations:str = ""
+    multilingual: bool = False
     max_new_tokens:int = None
     chunk_length:int = None
     clip_mode:int = 0
@@ -50,6 +52,7 @@ def outputWithDateTime(text:str):
     print(f"\n=========={dateTime_}==========")
     print(f"=========={text}==========\n")
 
+# ---------------------------------------------------------------------------------------------------------------------------
 def secondsToHMS(t) -> str:
     try:
         t_f:float = float(t)
@@ -95,7 +98,7 @@ def HMSToSeconds(t:str) -> float:
 
     return float(hh) * 3600 + float(mm) * 60 + float(ss)
 
-
+# ---------------------------------------------------------------------------------------------------------------------------
 def secondsToMS(t) -> str:
     try:
         t_f:float = float(t)

diff --git a/faster_whisper_GUI/vadPageNavigationInterface.py b/faster_whisper_GUI/vadPageNavigationInterface.py
@@ -5,6 +5,7 @@
 from PySide6.QtWidgets import (
                                 QGridLayout
                                 , QHBoxLayout
+                                , QSizePolicy
                             )
 
 from qfluentwidgets import (
@@ -64,19 +65,23 @@ def setupUI(self):
         # ------------------------------------------------------------------------------------------------------------------------------------
 
         self.doubleSpin_VAD_param_threshold = DoubleSpinBox()
+        # self.doubleSpin_VAD_param_threshold = LineEdit()
         self.doubleSpin_VAD_param_threshold.setRange(0.0, 1.0)
         self.doubleSpin_VAD_param_threshold.setSingleStep(0.05)
         self.doubleSpin_VAD_param_threshold.setValue(0.50)
         # self.doubleSpin_VAD_param_threshold.setSuffix("%")
+        # self.doubleSpin_VAD_param_threshold.setSizePolicy(QSizePolicy.Policy.Expanding, QSizePolicy.Policy.Expanding)
+        self.doubleSpin_VAD_param_threshold.setFixedWidth(200)
 
-        self.VAD_param_threshold_param_widget = ParamWidget(self.__tr("概率阈值"), 
-                                                            self.__tr("语音概率阈值。 Silero VAD为每个音频块输出语音概率, 概率高于此值的认为是语音。\n最好对每个数据集单独调整此参数, 但“懒散”的 0.5 对大多数数据集来说都非常好。"),
-                                                            self.doubleSpin_VAD_param_threshold
-                                                        )
-
+        self.VAD_param_threshold_param_widget = ParamWidget(
+                                                                self.__tr("概率阈值"), 
+                                                                self.__tr("语音概率阈值。 Silero VAD为每个音频块输出语音概率, 概率高于此值的认为是语音。\n最好对每个数据集单独调整此参数, 但“懒散”的 0.5 对大多数数据集来说都非常好。"),
+                                                                self.doubleSpin_VAD_param_threshold
+                                                            )
 
-        # self.VAD_param_threshold_param_widget.mainHLayout.setStretch(2,5)
 
+        # self.VAD_param_threshold_param_widget.mainHLayout.setStretch(1,10)
+        # self.VAD_param_threshold_param_widget.setFixedSize(300, 50)
 
         self.GridLayout_VAD_param.addWidget(self.VAD_param_threshold_param_widget, 0, 0)
 

diff --git a/faster_whisper_GUI/version.py b/faster_whisper_GUI/version.py
@@ -1,6 +1,6 @@
 # coding:utf-8
 
-__version__ = "0.8.1"
-__FasterWhisper_version__ = "1.0.3"
+__version__ = "0.8.5"
+__FasterWhisper_version__ = "1.1.0"
 __WhisperX_version__ = "3.1.1"
 __Demucs_version__ = "v4.0"
diff --git a/resource/_rc/Translater/en.qm b/resource/_rc/Translater/en.qm