fixed bugs

CheshireCC · Jun 2, 2024 · 608e0bb · 608e0bb
1 parent 7f9f0b7
commit 608e0bb
Show file tree

Hide file tree

Showing 6 changed files with 37 additions and 28 deletions.
diff --git a/fasterWhisperGUIConfig.json b/fasterWhisperGUIConfig.json
@@ -8,20 +8,20 @@
     "model_param": {
         "localModel": true,
         "onlineModel": false,
-        "model_path": "",
-        "modelName": 0,
-        "use_v3_model": false,
+        "model_path": "F:/WhisperModels/faster-whisper/large-v3-float32",
+        "modelName": 11,
+        "use_v3_model": true,
         "device": 1,
         "deviceIndex": "0",
-        "preciese": 4,
+        "preciese": 5,
         "thread_num": "4",
         "num_worker": "1",
         "download_root": "C:/Users/12059/.cache/huggingface/hub",
         "local_files_only": false
     },
     "vad_param": {
         "use_VAD": true,
-        "threshold": 0.5,
+        "threshold": 0.30000000000000004,
         "minSpeechDuration": "250",
         "minSilenceDuration": "2000",
         "maxSpeechDuration": "inf",
@@ -39,35 +39,35 @@
     },
     "Transcription_param": {
         "aggregate_contents": true,
-        "language": 0,
+        "language": 3,
         "task": false,
         "beam_size": "5",
         "best_of": "5",
         "patience": "1.0",
-        "length_penalty": "1.8",
+        "length_penalty": "1.0",
         "temperature": "0.0,0.2,0.4,0.6,0.8,1.0",
-        "compression_ratio_threshold": "2.4",
-        "log_prob_threshold": "-1.0",
-        "no_speech_threshold": "0.6",
+        "compression_ratio_threshold": "1.4",
+        "log_prob_threshold": "-10",
+        "no_speech_threshold": "0.9",
         "condition_on_previous_text": false,
         "initial_prompt": "",
         "prefix": "",
         "suppress_blank": true,
         "suppress_tokens": "-1",
         "without_timestamps": false,
         "max_initial_timestamp": "1.0",
-        "word_timestamps": true,
+        "word_timestamps": false,
         "prepend_punctuations": "\"'“¿([{-",
         "append_punctuations": "\"'.。,，!！?？:：”)]}、",
         "repetition_penalty": "1.0",
         "no_repeat_ngram_size": "0",
         "prompt_reset_on_temperature": "0.5",
         "chunk_length": "30",
-        "clip_mode": 1,
+        "clip_mode": 0,
         "max_new_tokens": "448",
         "clip_timestamps": "",
-        "hallucination_silence_threshold": "0",
-        "hotwords": "",
+        "hallucination_silence_threshold": "0.5",
+        "hotwords": "这个音频是关于酒窝的",
         "language_detection_threshold": "",
         "language_detection_segments": "1"
     },
@@ -77,8 +77,8 @@
         "tabShadowEnabled": false,
         "tabMaxWidth": 259,
         "closeDisplayMode": 0,
-        "whisperXMinSpeaker": 0,
-        "whisperXMaxSpeaker": 0,
+        "whisperXMinSpeaker": 3,
+        "whisperXMaxSpeaker": 3,
         "outputFormat": 0,
         "outputEncoding": 1
     }

diff --git a/faster_whisper_GUI/mainWindows.py b/faster_whisper_GUI/mainWindows.py
@@ -59,12 +59,15 @@
 # from .style_sheet import StyleSheet
 from .subtitleFileRead import readSRTFileToSegments, readJSONFileToSegments
 from .config import ENCODING_DICT
+
 from .util import (
                     outputWithDateTime,
                     HMSToSeconds,
                     MSToSeconds,
-                    WhisperParameters
+                    WhisperParameters,
+                    VADParameters
                 )
+
 from .split_audio import SplitAudioFileWithSpeakersWorker
 
 import opencc
@@ -813,10 +816,10 @@ def getParamTranscribe(self) -> dict:
         Transcribe_params["hotwords"] = hotwords
 
         language_detaction_th = self.page_transcribes.LineEdit_language_detection_threshold.text().strip()
-        Transcribe_params["language_detection_th"] = float(language_detaction_th)
+        Transcribe_params["language_detection_threshold"] = float(language_detaction_th) if language_detaction_th != "" else None
 
         language_detaction_segments = self.page_transcribes.lienEdit_language_detection_segments.text().strip()
-        Transcribe_params["language_detaction_segments"] = int(language_detaction_segments)
+        Transcribe_params["language_detection_segments"] = int(language_detaction_segments) if language_detaction_segments != "" else None
 
         return Transcribe_params
 
@@ -866,7 +869,7 @@ def getVADparam(self) -> dict:
         window_size_samples = int(self.page_VAD.combox_VAD_param_window_size_samples.currentText())
         speech_pad_ms = int(self.page_VAD.LineEdit_VAD_param_speech_pad_ms.text().replace(" ", ""))
 
-        VAD_param["param"] = {}
+        VAD_param["param"] = VADParameters()
         VAD_param["param"]["threshold"] = threshold
         VAD_param["param"]["min_speech_duration_ms"] = min_speech_duration_ms
         VAD_param["param"]["max_speech_duration_s"] = max_speech_duration_s

diff --git a/faster_whisper_GUI/paramItemWidget.py b/faster_whisper_GUI/paramItemWidget.py
@@ -7,7 +7,6 @@
                                 QWidget
                             )
 
-
 from qfluentwidgets import (
                             CaptionLabel
                             , StrongBodyLabel
@@ -62,5 +61,5 @@ def setupUI(self):
         self.titleVLayout.setAlignment(Qt.AlignmentFlag.AlignVCenter | Qt.AlignmentFlag.AlignLeft)
         self.widgetVLayout.setAlignment(Qt.AlignmentFlag.AlignVCenter | Qt.AlignmentFlag.AlignLeft)
 
-        # self.mainHLayout.setStretch(0,8)
+        self.mainHLayout.setStretch(0,8)
         self.mainHLayout.setStretch(1,1)
diff --git a/faster_whisper_GUI/util.py b/faster_whisper_GUI/util.py
@@ -4,6 +4,14 @@
 
 from typing import List, TypedDict, Union
 
+class VADParameters(TypedDict):
+    threshold:float = 0.5
+    min_speech_duration_ms:float = 250
+    max_speech_duration_s:float = float("inf")
+    min_silence_duration_ms:float = 2000
+    window_size_samples:int = 1024
+    speech_pad_ms:float = 400
+
 class WhisperParameters(TypedDict):
     language:str = ""
     task:str = "transcribe"
@@ -27,7 +35,7 @@ class WhisperParameters(TypedDict):
     max_initial_timestamp:float = 0.0
     word_timestamps:bool = False
     prepend_punctuations:str = ""
-    append_punctuations: str = ""
+    append_punctuations:str = ""
     max_new_tokens:int = None
     chunk_length:int = None
     clip_mode:int = 0
@@ -51,7 +59,7 @@ def secondsToHMS(t) -> str:
 
     H = int(t_f // 3600)
     M = int((t_f - H * 3600) // 60)
-    S = (t_f - H *3600 - M *60)
+    S = (t_f - H * 3600 - M * 60)
 
     H = str(H)
 

diff --git a/faster_whisper_GUI/version.py b/faster_whisper_GUI/version.py
@@ -1,7 +1,6 @@
 # coding:utf-8
 
-__version__ = "0.7.6"
-__FasterWhisper_version__ = "1.0.1"
+__version__ = "0.7.8"
+__FasterWhisper_version__ = "1.0.2"
 __WhisperX_version__ = "3.1.1"
 __Demucs_version__ = "v4.0"
-
diff --git a/参数说明：.md b/参数说明：.md
@@ -71,7 +71,7 @@
 - 一个元组，包含:
 
     - \- 转录段的生成器
-    - \- TranscriptionInfo的一个实例
+    - \- `TranscriptionInfo` 的一个实例
 
 ### 1.2 VAD 参数