Skip to content

Commit

Permalink
fixed bugs
Browse files Browse the repository at this point in the history
  • Loading branch information
CheshireCC committed Jun 2, 2024
1 parent 7f9f0b7 commit 608e0bb
Show file tree
Hide file tree
Showing 6 changed files with 37 additions and 28 deletions.
32 changes: 16 additions & 16 deletions fasterWhisperGUIConfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,20 +8,20 @@
"model_param": {
"localModel": true,
"onlineModel": false,
"model_path": "",
"modelName": 0,
"use_v3_model": false,
"model_path": "F:/WhisperModels/faster-whisper/large-v3-float32",
"modelName": 11,
"use_v3_model": true,
"device": 1,
"deviceIndex": "0",
"preciese": 4,
"preciese": 5,
"thread_num": "4",
"num_worker": "1",
"download_root": "C:/Users/12059/.cache/huggingface/hub",
"local_files_only": false
},
"vad_param": {
"use_VAD": true,
"threshold": 0.5,
"threshold": 0.30000000000000004,
"minSpeechDuration": "250",
"minSilenceDuration": "2000",
"maxSpeechDuration": "inf",
Expand All @@ -39,35 +39,35 @@
},
"Transcription_param": {
"aggregate_contents": true,
"language": 0,
"language": 3,
"task": false,
"beam_size": "5",
"best_of": "5",
"patience": "1.0",
"length_penalty": "1.8",
"length_penalty": "1.0",
"temperature": "0.0,0.2,0.4,0.6,0.8,1.0",
"compression_ratio_threshold": "2.4",
"log_prob_threshold": "-1.0",
"no_speech_threshold": "0.6",
"compression_ratio_threshold": "1.4",
"log_prob_threshold": "-10",
"no_speech_threshold": "0.9",
"condition_on_previous_text": false,
"initial_prompt": "",
"prefix": "",
"suppress_blank": true,
"suppress_tokens": "-1",
"without_timestamps": false,
"max_initial_timestamp": "1.0",
"word_timestamps": true,
"word_timestamps": false,
"prepend_punctuations": "\"'“¿([{-",
"append_punctuations": "\"'.。,,!!??::”)]}、",
"repetition_penalty": "1.0",
"no_repeat_ngram_size": "0",
"prompt_reset_on_temperature": "0.5",
"chunk_length": "30",
"clip_mode": 1,
"clip_mode": 0,
"max_new_tokens": "448",
"clip_timestamps": "",
"hallucination_silence_threshold": "0",
"hotwords": "",
"hallucination_silence_threshold": "0.5",
"hotwords": "这个音频是关于酒窝的",
"language_detection_threshold": "",
"language_detection_segments": "1"
},
Expand All @@ -77,8 +77,8 @@
"tabShadowEnabled": false,
"tabMaxWidth": 259,
"closeDisplayMode": 0,
"whisperXMinSpeaker": 0,
"whisperXMaxSpeaker": 0,
"whisperXMinSpeaker": 3,
"whisperXMaxSpeaker": 3,
"outputFormat": 0,
"outputEncoding": 1
}
Expand Down
11 changes: 7 additions & 4 deletions faster_whisper_GUI/mainWindows.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,12 +59,15 @@
# from .style_sheet import StyleSheet
from .subtitleFileRead import readSRTFileToSegments, readJSONFileToSegments
from .config import ENCODING_DICT

from .util import (
outputWithDateTime,
HMSToSeconds,
MSToSeconds,
WhisperParameters
WhisperParameters,
VADParameters
)

from .split_audio import SplitAudioFileWithSpeakersWorker

import opencc
Expand Down Expand Up @@ -813,10 +816,10 @@ def getParamTranscribe(self) -> dict:
Transcribe_params["hotwords"] = hotwords

language_detaction_th = self.page_transcribes.LineEdit_language_detection_threshold.text().strip()
Transcribe_params["language_detection_th"] = float(language_detaction_th)
Transcribe_params["language_detection_threshold"] = float(language_detaction_th) if language_detaction_th != "" else None

language_detaction_segments = self.page_transcribes.lienEdit_language_detection_segments.text().strip()
Transcribe_params["language_detaction_segments"] = int(language_detaction_segments)
Transcribe_params["language_detection_segments"] = int(language_detaction_segments) if language_detaction_segments != "" else None

return Transcribe_params

Expand Down Expand Up @@ -866,7 +869,7 @@ def getVADparam(self) -> dict:
window_size_samples = int(self.page_VAD.combox_VAD_param_window_size_samples.currentText())
speech_pad_ms = int(self.page_VAD.LineEdit_VAD_param_speech_pad_ms.text().replace(" ", ""))

VAD_param["param"] = {}
VAD_param["param"] = VADParameters()
VAD_param["param"]["threshold"] = threshold
VAD_param["param"]["min_speech_duration_ms"] = min_speech_duration_ms
VAD_param["param"]["max_speech_duration_s"] = max_speech_duration_s
Expand Down
3 changes: 1 addition & 2 deletions faster_whisper_GUI/paramItemWidget.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@
QWidget
)


from qfluentwidgets import (
CaptionLabel
, StrongBodyLabel
Expand Down Expand Up @@ -62,5 +61,5 @@ def setupUI(self):
self.titleVLayout.setAlignment(Qt.AlignmentFlag.AlignVCenter | Qt.AlignmentFlag.AlignLeft)
self.widgetVLayout.setAlignment(Qt.AlignmentFlag.AlignVCenter | Qt.AlignmentFlag.AlignLeft)

# self.mainHLayout.setStretch(0,8)
self.mainHLayout.setStretch(0,8)
self.mainHLayout.setStretch(1,1)
12 changes: 10 additions & 2 deletions faster_whisper_GUI/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,14 @@

from typing import List, TypedDict, Union

class VADParameters(TypedDict):
threshold:float = 0.5
min_speech_duration_ms:float = 250
max_speech_duration_s:float = float("inf")
min_silence_duration_ms:float = 2000
window_size_samples:int = 1024
speech_pad_ms:float = 400

class WhisperParameters(TypedDict):
language:str = ""
task:str = "transcribe"
Expand All @@ -27,7 +35,7 @@ class WhisperParameters(TypedDict):
max_initial_timestamp:float = 0.0
word_timestamps:bool = False
prepend_punctuations:str = ""
append_punctuations: str = ""
append_punctuations:str = ""
max_new_tokens:int = None
chunk_length:int = None
clip_mode:int = 0
Expand All @@ -51,7 +59,7 @@ def secondsToHMS(t) -> str:

H = int(t_f // 3600)
M = int((t_f - H * 3600) // 60)
S = (t_f - H *3600 - M *60)
S = (t_f - H * 3600 - M * 60)

H = str(H)

Expand Down
5 changes: 2 additions & 3 deletions faster_whisper_GUI/version.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
# coding:utf-8

__version__ = "0.7.6"
__FasterWhisper_version__ = "1.0.1"
__version__ = "0.7.8"
__FasterWhisper_version__ = "1.0.2"
__WhisperX_version__ = "3.1.1"
__Demucs_version__ = "v4.0"

2 changes: 1 addition & 1 deletion 参数说明:.md
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@
- 一个元组,包含:

- \- 转录段的生成器
- \- TranscriptionInfo的一个实例
- \- `TranscriptionInfo` 的一个实例

### 1.2 VAD 参数

Expand Down

0 comments on commit 608e0bb

Please sign in to comment.