Skip to content

Commit

Permalink
0.8.5
Browse files Browse the repository at this point in the history
  • Loading branch information
CheshireCC committed Dec 6, 2024
1 parent 7ef7369 commit dc06f25
Show file tree
Hide file tree
Showing 12 changed files with 4,084 additions and 3,944 deletions.
522 changes: 286 additions & 236 deletions en.ts

Large diffs are not rendered by default.

15 changes: 8 additions & 7 deletions fasterWhisperGUIConfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,12 +8,12 @@
"model_param": {
"localModel": true,
"onlineModel": false,
"model_path": "F:/WhisperModels/faster-whisper/whisper-large-v2-ct2-32",
"model_path": "F:/WhisperModels/faster-whisper/large-v3-turbo",
"modelName": 11,
"use_v3_model": false,
"use_v3_model": true,
"device": 1,
"deviceIndex": "0",
"preciese": 5,
"preciese": 4,
"thread_num": "4",
"num_worker": "1",
"download_root": "C:/Users/12059/.cache/huggingface/hub",
Expand All @@ -22,24 +22,25 @@
"vad_param": {
"use_VAD": true,
"threshold": 0.2,
"minSpeechDuration": "250",
"minSpeechDuration": "0",
"minSilenceDuration": "2000",
"maxSpeechDuration": "inf",
"speechPad": "400"
},
"setting": {
"saveConfig": true,
"autoLoadModel": false,
"language": 2,
"language": 0,
"huggingface_user_token": "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",
"autoGoToOutputPage": 2,
"autoClearTempFiles": true,
"themeColor": "#e64ecf"
},
"Transcription_param": {
"aggregate_contents": true,
"language": -1,
"language": 0,
"task": false,
"multilingual": false,
"beam_size": "1",
"best_of": "5",
"patience": "1.0",
Expand Down Expand Up @@ -67,7 +68,7 @@
"clip_timestamps": "",
"hallucination_silence_threshold": "0.5",
"hotwords": "",
"language_detection_threshold": "",
"language_detection_threshold": "0.5",
"language_detection_segments": "1"
},
"output_whisperX": {
Expand Down
4 changes: 4 additions & 0 deletions faster_whisper_GUI/aboutPageNavigationInterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,10 @@ def __init__(self, parent = None) -> None:
- 侮辱或诽谤他人,侵害他人合法权益的。
- 含有法律、行政法规禁止的其他内容的。
· 因您的数据的产生、收集、处理、使用等任何相关事项存在违反法律法规等情况而造成的全部后果及责任均由您自行承担。
· 本软件不保证其服务不会中断,对服务的及时性、安全性、准确性也都不作保证。
· 本软件不对任何因使用本软件而可能遭致的意外或损失承担责任。
· 本软件所提供的任何信息、内容、材料、产品和服务均不构成任何投资建议。您应自行独立判断并承担可能的风险。
· 本软件完全免费,您不应该直接将本软件安装包本体用于商业售卖。
""")

self.setupUI()
Expand Down
1 change: 1 addition & 0 deletions faster_whisper_GUI/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -127,6 +127,7 @@
"large-v1",
"large-v2",
"large-v3",
"large-v3-turbo",
"distil-large-v3",
"distil-large-v2",
"distil-medium.en",
Expand Down
18 changes: 13 additions & 5 deletions faster_whisper_GUI/mainWindows.py
Original file line number Diff line number Diff line change
Expand Up @@ -83,8 +83,11 @@ def flush( self ):
def fileno( self ):
return -1
def write( self, text ):
if ( not self.signalsBlocked() ):
self.outputSignal.emit(str(text))
try:
if ( not self.signalsBlocked() ):
self.outputSignal.emit(str(text))
except:
pass

class statusToolsSignalStore(QObject):
StateToolSignal = Signal(bool)
Expand Down Expand Up @@ -697,6 +700,8 @@ def getParamTranscribe(self) -> dict:
# task = Task_list[int(task)]
Transcribe_params["task"] = task

Transcribe_params["log_progress"] = False

beam_size = int(self.page_transcribes.LineEdit_beam_size.text().replace(" ", ""))
Transcribe_params["beam_size"] = beam_size

Expand Down Expand Up @@ -768,6 +773,9 @@ def getParamTranscribe(self) -> dict:
append_punctuations = self.page_transcribes.LineEdit_append_punctuations.text().replace(" ","")
Transcribe_params["append_punctuations"] = append_punctuations

multilingual = self.page_transcribes.switchButton_multilingual.isChecked()
Transcribe_params["multilingual"] = multilingual

repetition_penalty = self.page_transcribes.LineEdit_repetition_penalty.text().strip()
repetition_penalty = float(repetition_penalty)
Transcribe_params['repetition_penalty'] = repetition_penalty
Expand Down Expand Up @@ -795,7 +803,7 @@ def getParamTranscribe(self) -> dict:
chunk_length = self.page_transcribes.LineEdit_chunk_length.text().strip()
if chunk_length != "":
if chunk_length.isdigit():
chunk_length = float(chunk_length)
chunk_length = int(chunk_length)
else:
chunk_length = None
else :
Expand Down Expand Up @@ -862,15 +870,15 @@ def getVADparam(self) -> dict:
if not vad_filter:
return VAD_param

threshold = round(self.page_VAD.doubleSpin_VAD_param_threshold.value(),2)
onset = round(self.page_VAD.doubleSpin_VAD_param_threshold.value(),2)
min_speech_duration_ms = int(self.page_VAD.LineEdit_VAD_param_min_speech_duration_ms.text().replace(" ", ""))
max_speech_duration_s = float(self.page_VAD.LineEdit_VAD_param_max_speech_duration_s.text().replace(" ", ""))
min_silence_duration_ms = int(self.page_VAD.LineEdit_VAD_param_min_silence_duration_ms.text().replace(" ", ""))
# window_size_samples = int(self.page_VAD.combox_VAD_param_window_size_samples.currentText())
speech_pad_ms = int(self.page_VAD.LineEdit_VAD_param_speech_pad_ms.text().replace(" ", ""))

VAD_param["param"] = VADParameters()
VAD_param["param"]["threshold"] = threshold
VAD_param["param"]["onset"] = onset
VAD_param["param"]["min_speech_duration_ms"] = min_speech_duration_ms
VAD_param["param"]["max_speech_duration_s"] = max_speech_duration_s
VAD_param["param"]["min_silence_duration_ms"] = min_silence_duration_ms
Expand Down
19 changes: 17 additions & 2 deletions faster_whisper_GUI/tranccribePageNavigationInterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -150,6 +150,7 @@ def setupUI(self):

# --------------------------------------------------------------------------------------------
self.LineEdit_language_detection_threshold = LineEdit()
self.LineEdit_language_detection_threshold.setText("0.5")
self.language_detection_threshold_param_widget = ParamWidget(
self.__tr("语言检测阈值"),
self.__tr("自动检测音频时,语言检测的阈值。如果某种语言的最大概率高于此值,则会检测为该语言。"),
Expand Down Expand Up @@ -178,6 +179,17 @@ def setupUI(self):
)
widget_list.append(self.task_param_widget)

# --------------------------------------------------------------------------------------------

self.switchButton_multilingual = SwitchButton()
self.switchButton_multilingual.setChecked(False)

self.multilingual_param_widget = ParamWidget(self.__tr("多语言模式"),
self.__tr("多语言模式,允许模型处理包含多种语言的音频。"),
self.switchButton_multilingual
)
widget_list.append(self.multilingual_param_widget)

# --------------------------------------------------------------------------------------------
self.switchButton_without_timestamps = SwitchButton()
self.switchButton_without_timestamps.setChecked(False)
Expand Down Expand Up @@ -668,7 +680,8 @@ def setParam(self, Transcribe_params:dict) -> None:
self.lineEdit_hallucination_silence_threshold.setText(Transcribe_params["hallucination_silence_threshold"])
self.LineEdit_hotwords.setText(Transcribe_params["hotwords"])
self.LineEdit_language_detection_threshold.setText(Transcribe_params["language_detection_threshold"])
self.language_detection_segments_param_widget.setText(Transcribe_params["language_detection_segments"])
self.lienEdit_language_detection_segments.setText(Transcribe_params["language_detection_segments"])
self.switchButton_multilingual.setChecked(Transcribe_params["multilingual"])
except:
pass

Expand All @@ -687,6 +700,9 @@ def getParam(self) -> dict:
# task = STR_BOOL[task]
Transcribe_params["task"] = task

multilingual = self.switchButton_multilingual.isChecked()
Transcribe_params["multilingual"] = multilingual

beam_size = self.LineEdit_beam_size.text().replace(" ", "")
Transcribe_params["beam_size"] = beam_size

Expand Down Expand Up @@ -779,6 +795,5 @@ def getParam(self) -> dict:
Transcribe_params["language_detection_threshold"] = self.LineEdit_language_detection_threshold.text().strip()
Transcribe_params["language_detection_segments"] = self.lienEdit_language_detection_segments.text().strip()


return Transcribe_params

8 changes: 5 additions & 3 deletions faster_whisper_GUI/transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,6 +232,7 @@ def transcribe_file(self, file) -> (TranscriptionInfo, List): # type: ignore
audio=file,
language=self.parameters["language"],
task=Task_list[int(self.parameters["task"])],
log_progress = False,
beam_size=self.parameters["beam_size"],
best_of=self.parameters["best_of"],
patience=self.parameters["patience"],
Expand All @@ -253,6 +254,7 @@ def transcribe_file(self, file) -> (TranscriptionInfo, List): # type: ignore
word_timestamps=self.parameters["word_timestamps"],
prepend_punctuations=self.parameters["prepend_punctuations"],
append_punctuations=self.parameters["append_punctuations"],
multilingual = self.parameters["multilingual"],
max_new_tokens=self.parameters["max_new_tokens"],
chunk_length=self.parameters["chunk_length"],
clip_timestamps=self.parameters["clip_timestamps"],
Expand Down Expand Up @@ -385,7 +387,7 @@ def run(self) -> None:
# 保存临时文件
temp_output_save_file = getSaveFileName(audioFile=path, format="SRT", rootDir=r"./temp")
writeSubtitles(temp_output_save_file, segments=segments, format="SRT",language=info.language, fileName=path)
print(f"save temp file: {temp_output_save_file}")
print(f"save temp file: {os.path.abspath(temp_output_save_file)}")

if torch.cuda.is_available():
torch.cuda.empty_cache()
Expand Down Expand Up @@ -427,9 +429,9 @@ def writeSubtitles(outputFileName:str,
elif format == "ASS":
writeASS(outputFileName, segments, file_code=file_code)

print(f"write over | {outputFileName}")
print(f"write over | {os.path.abspath(outputFileName)}")

def writeJson(fileName:str,segments:List[segment_Transcribe],language:str,avFile="",file_code="utf8"):
def writeJson(fileName:str, segments:List[segment_Transcribe], language:str,avFile="", file_code="utf8"):

_id = getMd5HashId(avFile, file_code=file_code)

Expand Down
5 changes: 4 additions & 1 deletion faster_whisper_GUI/util.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@ class VADParameters(TypedDict):
class WhisperParameters(TypedDict):
language:str = ""
task:str = "transcribe"
log_progress: bool = False
beam_size:int = 5
best_of:int = 5
patience:float = 0.0
Expand All @@ -36,6 +37,7 @@ class WhisperParameters(TypedDict):
word_timestamps:bool = False
prepend_punctuations:str = ""
append_punctuations:str = ""
multilingual: bool = False
max_new_tokens:int = None
chunk_length:int = None
clip_mode:int = 0
Expand All @@ -50,6 +52,7 @@ def outputWithDateTime(text:str):
print(f"\n=========={dateTime_}==========")
print(f"=========={text}==========\n")

# ---------------------------------------------------------------------------------------------------------------------------
def secondsToHMS(t) -> str:
try:
t_f:float = float(t)
Expand Down Expand Up @@ -95,7 +98,7 @@ def HMSToSeconds(t:str) -> float:

return float(hh) * 3600 + float(mm) * 60 + float(ss)


# ---------------------------------------------------------------------------------------------------------------------------
def secondsToMS(t) -> str:
try:
t_f:float = float(t)
Expand Down
17 changes: 11 additions & 6 deletions faster_whisper_GUI/vadPageNavigationInterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from PySide6.QtWidgets import (
QGridLayout
, QHBoxLayout
, QSizePolicy
)

from qfluentwidgets import (
Expand Down Expand Up @@ -64,19 +65,23 @@ def setupUI(self):
# ------------------------------------------------------------------------------------------------------------------------------------

self.doubleSpin_VAD_param_threshold = DoubleSpinBox()
# self.doubleSpin_VAD_param_threshold = LineEdit()
self.doubleSpin_VAD_param_threshold.setRange(0.0, 1.0)
self.doubleSpin_VAD_param_threshold.setSingleStep(0.05)
self.doubleSpin_VAD_param_threshold.setValue(0.50)
# self.doubleSpin_VAD_param_threshold.setSuffix("%")
# self.doubleSpin_VAD_param_threshold.setSizePolicy(QSizePolicy.Policy.Expanding, QSizePolicy.Policy.Expanding)
self.doubleSpin_VAD_param_threshold.setFixedWidth(200)

self.VAD_param_threshold_param_widget = ParamWidget(self.__tr("概率阈值"),
self.__tr("语音概率阈值。 Silero VAD为每个音频块输出语音概率, 概率高于此值的认为是语音。\n最好对每个数据集单独调整此参数, 但“懒散”的 0.5 对大多数数据集来说都非常好。"),
self.doubleSpin_VAD_param_threshold
)

self.VAD_param_threshold_param_widget = ParamWidget(
self.__tr("概率阈值"),
self.__tr("语音概率阈值。 Silero VAD为每个音频块输出语音概率, 概率高于此值的认为是语音。\n最好对每个数据集单独调整此参数, 但“懒散”的 0.5 对大多数数据集来说都非常好。"),
self.doubleSpin_VAD_param_threshold
)

# self.VAD_param_threshold_param_widget.mainHLayout.setStretch(2,5)

# self.VAD_param_threshold_param_widget.mainHLayout.setStretch(1,10)
# self.VAD_param_threshold_param_widget.setFixedSize(300, 50)

self.GridLayout_VAD_param.addWidget(self.VAD_param_threshold_param_widget, 0, 0)

Expand Down
4 changes: 2 additions & 2 deletions faster_whisper_GUI/version.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# coding:utf-8

__version__ = "0.8.1"
__FasterWhisper_version__ = "1.0.3"
__version__ = "0.8.5"
__FasterWhisper_version__ = "1.1.0"
__WhisperX_version__ = "3.1.1"
__Demucs_version__ = "v4.0"
Binary file modified resource/_rc/Translater/en.qm
Binary file not shown.
Loading

0 comments on commit dc06f25

Please sign in to comment.