Skip to content

Commit

Permalink
upgrade faster-whisper 1.02
Browse files Browse the repository at this point in the history
  • Loading branch information
CheshireCC committed May 29, 2024
1 parent 3a0ca18 commit 7f9f0b7
Show file tree
Hide file tree
Showing 7 changed files with 309 additions and 13 deletions.
14 changes: 11 additions & 3 deletions fasterWhisperGUIConfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -16,12 +16,12 @@
"preciese": 4,
"thread_num": "4",
"num_worker": "1",
"download_root": "",
"download_root": "C:/Users/12059/.cache/huggingface/hub",
"local_files_only": false
},
"vad_param": {
"use_VAD": true,
"threshold": 0.49999999999999994,
"threshold": 0.5,
"minSpeechDuration": "250",
"minSilenceDuration": "2000",
"maxSpeechDuration": "inf",
Expand Down Expand Up @@ -61,7 +61,15 @@
"append_punctuations": "\"'.。,,!!??::”)]}、",
"repetition_penalty": "1.0",
"no_repeat_ngram_size": "0",
"prompt_reset_on_temperature": "0.5"
"prompt_reset_on_temperature": "0.5",
"chunk_length": "30",
"clip_mode": 1,
"max_new_tokens": "448",
"clip_timestamps": "",
"hallucination_silence_threshold": "0",
"hotwords": "",
"language_detection_threshold": "",
"language_detection_segments": "1"
},
"output_whisperX": {
"tabMovable": true,
Expand Down
77 changes: 75 additions & 2 deletions faster_whisper_GUI/mainWindows.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,7 +59,12 @@
# from .style_sheet import StyleSheet
from .subtitleFileRead import readSRTFileToSegments, readJSONFileToSegments
from .config import ENCODING_DICT
from .util import outputWithDateTime
from .util import (
outputWithDateTime,
HMSToSeconds,
MSToSeconds,
WhisperParameters
)
from .split_audio import SplitAudioFileWithSpeakersWorker

import opencc
Expand Down Expand Up @@ -662,7 +667,8 @@ def transcribeOver(self, segments_path_info:list):


def getParamTranscribe(self) -> dict:
Transcribe_params = {}

Transcribe_params = WhisperParameters()

# audio = self.page_process.LineEdit_audio_fileName.text().strip()
# audio = audio.split(";;") if audio != "" else []
Expand Down Expand Up @@ -771,9 +777,76 @@ def getParamTranscribe(self) -> dict:
prompt_reset_on_temperature = float(prompt_reset_on_temperature)
Transcribe_params['prompt_reset_on_temperature'] = prompt_reset_on_temperature

max_new_tokens = self.page_transcribes.LineEdit_max_new_tokens.text().strip()
if max_new_tokens != "":
if max_new_tokens.isdigit():
max_new_tokens = int(max_new_tokens)
if max_new_tokens == 448:
max_new_tokens = None
else:
max_new_tokens = None
else :
max_new_tokens = None
Transcribe_params["max_new_tokens"] = max_new_tokens

chunk_length = self.page_transcribes.LineEdit_chunk_length.text().strip()
if chunk_length != "":
if chunk_length.isdigit():
chunk_length = float(chunk_length)
else:
chunk_length = None
else :
chunk_length = None
Transcribe_params["chunk_length"] = chunk_length

clip_mode = self.page_transcribes.ComboBox_clip_mode.currentIndex()
Transcribe_params["clip_mode"] = clip_mode

clip_timestamps = self.page_transcribes.LineEdit_clip_timestamps.text().strip()
clip_timestamps = self.getClipTimestamps(clip_mode, clip_timestamps)
Transcribe_params["clip_timestamps"] = clip_timestamps

hallucination_silence_threshold = self.page_transcribes.lineEdit_hallucination_silence_threshold.text().strip()
Transcribe_params["hallucination_silence_threshold"] = float(hallucination_silence_threshold)

hotwords = self.page_transcribes.LineEdit_hotwords.text().strip()
Transcribe_params["hotwords"] = hotwords

language_detaction_th = self.page_transcribes.LineEdit_language_detection_threshold.text().strip()
Transcribe_params["language_detection_th"] = float(language_detaction_th)

language_detaction_segments = self.page_transcribes.lienEdit_language_detection_segments.text().strip()
Transcribe_params["language_detaction_segments"] = int(language_detaction_segments)

return Transcribe_params

def getClipTimestamps(self, clip_mode:int, clip_timestamps:str):
if clip_timestamps == "0":
return clip_timestamps

if clip_mode == 0:
clip_timestamps_ = "0"
elif clip_mode == 1:
clip_timestamps_ = []
clip_timestamps = clip_timestamps.split(";")
for item in clip_timestamps:
items = item.split("-")
for item in items:
clip_timestamps_.append(item)

elif clip_mode == 2:
clip_timestamps_ = []
clip_timestamps = clip_timestamps.split(";")
for item in clip_timestamps:
items = item.split("-")
for item in items:
if len(item.split(":")) == 2:
clip_timestamps_.append(float(MSToSeconds(item)))
elif len(item.split(":")) == 3:
clip_timestamps_.append(float(HMSToSeconds(item)))

return clip_timestamps_

def getVADparam(self) -> dict:
"""
get param of VAD
Expand Down
2 changes: 1 addition & 1 deletion faster_whisper_GUI/paramItemWidget.py
Original file line number Diff line number Diff line change
Expand Up @@ -62,5 +62,5 @@ def setupUI(self):
self.titleVLayout.setAlignment(Qt.AlignmentFlag.AlignVCenter | Qt.AlignmentFlag.AlignLeft)
self.widgetVLayout.setAlignment(Qt.AlignmentFlag.AlignVCenter | Qt.AlignmentFlag.AlignLeft)

self.mainHLayout.setStretch(0,8)
# self.mainHLayout.setStretch(0,8)
self.mainHLayout.setStretch(1,1)
131 changes: 128 additions & 3 deletions faster_whisper_GUI/tranccribePageNavigationInterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
, InfoBarPosition
, TitleLabel
, SwitchButton
, ComboBox
)

from .config import Language_dict
Expand All @@ -24,7 +25,7 @@
# import datetime
import json

from .util import outputWithDateTime
from .util import outputWithDateTime, WhisperParameters
from .style_sheet import StyleSheet

from .paramItemWidget import ParamWidget
Expand Down Expand Up @@ -95,10 +96,23 @@ def saveParams(self):

else:
return


def setClipTimestampsStatus(self):
if self.ComboBox_clip_mode.currentIndex() == 0:
self.LineEdit_clip_timestamps.setPlaceholderText("")
self.LineEdit_clip_timestamps.setEnabled(False)
elif self.ComboBox_clip_mode.currentIndex() == 1:
self.LineEdit_clip_timestamps.setPlaceholderText("0.0-10.0;25.0-36.0;......")
self.LineEdit_clip_timestamps.setEnabled(True)
elif self.ComboBox_clip_mode.currentIndex() == 2:
self.LineEdit_clip_timestamps.setPlaceholderText("00:00:10.0-00:00:20.0;00:00:25.0-00:00:36.0;......")
self.LineEdit_clip_timestamps.setEnabled(True)

def SignalAndSlotConnect(self):
self.saveParamButton.clicked.connect(self.saveParams)
self.loadParamsButton.clicked.connect(self.loadParamsFromFile)
self.ComboBox_clip_mode.currentIndexChanged.connect(self.setClipTimestampsStatus)
# self.ComboBox_language.currentIndexChanged.connect(lambda:self.deta)

def setupUI(self):
# 使用网格布局存放参数列表
Expand Down Expand Up @@ -133,6 +147,25 @@ def setupUI(self):

widget_list.append(self.language_param_widget)

# --------------------------------------------------------------------------------------------
self.LineEdit_language_detection_threshold = LineEdit()
self.language_detection_threshold_param_widget = ParamWidget(
self.__tr("语言检测阈值"),
self.__tr("自动检测音频时,语言检测的阈值。如果某种语言的最大概率高于此值,则会检测为该语言。"),
self.LineEdit_language_detection_threshold
)
widget_list.append(self.language_detection_threshold_param_widget)

# --------------------------------------------------------------------------------------------
self.lienEdit_language_detection_segments:LineEdit = LineEdit()
self.lienEdit_language_detection_segments.setText("1")
self.language_detection_segments_param_widget = ParamWidget(
self.__tr("语言检测段落数"),
self.__tr("自动检测音频时,语言检测需考虑的分段数。"),
self.lienEdit_language_detection_segments
)
widget_list.append(self.language_detection_segments_param_widget)

# --------------------------------------------------------------------------------------------

self.switchButton_Translate_to_English = SwitchButton()
Expand Down Expand Up @@ -175,12 +208,73 @@ def setupUI(self):
)
widget_list.append(self.aggregate_contents_param_widget)

# =======================================================================================================
self.titleLabel_audio_segments = TitleLabel(self.__tr("音频分段设置"))
widget_list.append(self.titleLabel_audio_segments)

# --------------------------------------------------------------------------------------------
self.LineEdit_max_new_tokens:LineEdit = LineEdit()
self.LineEdit_max_new_tokens.setText("448")
self.max_new_tokens_param_widget = ParamWidget(self.__tr("最大新令牌数"),
self.__tr("Whisper 为每个音频块生成的新令牌的最大数量。"),
self.LineEdit_max_new_tokens
)

widget_list.append(self.max_new_tokens_param_widget)

# --------------------------------------------------------------------------------------------
self.LineEdit_chunk_length:LineEdit = LineEdit()
self.LineEdit_chunk_length.setText("30")
self.chunk_length_param_widget = ParamWidget(self.__tr("音频块长度"),
self.__tr("音频段的长度,默认为 30 秒"),
self.LineEdit_chunk_length
)

widget_list.append(self.chunk_length_param_widget)

# --------------------------------------------------------------------------------------------

self.ComboBox_clip_mode:ComboBox = ComboBox()

self.ComboBox_clip_mode.addItems([self.__tr("不启用手动分段"),self.__tr("按秒分割"),self.__tr("按时间按戳分割")])
self.ComboBox_clip_mode.setCurrentIndex(0)
self.clip_mode_param_widget = ParamWidget(self.__tr("音频分段模式"),
self.__tr("手动输入音频分段时要使用的分段标记方式,启用的情况下可以输入分段起止时间戳、秒为单位的分段起止点。"),
self.ComboBox_clip_mode
)

widget_list.append(self.clip_mode_param_widget)

# --------------------------------------------------------------------------------------------

self.LineEdit_clip_timestamps:LineEdit = LineEdit()
self.LineEdit_clip_timestamps.setClearButtonEnabled(True)
self.LineEdit_clip_timestamps.setEnabled(False)
self.clip_timestamps_param_widget = ParamWidget(
self.__tr("分段时间戳"),
self.__tr("手动输入音频分段,可输入分段时间戳,或者分段的起止秒数点,\n\"-\"分隔起止点,用\";\"分隔不同段,最后一个结束时间戳默认为音频结尾。"),
self.LineEdit_clip_timestamps
)

self.clip_timestamps_param_widget.mainHLayout.setStretch(2,6)
widget_list.append(self.clip_timestamps_param_widget)

# =======================================================================================================
self.titleLabel_auditory_hallucination = TitleLabel(self.__tr("幻听参数"))
widget_list.append(self.titleLabel_auditory_hallucination)

# --------------------------------------------------------------------------------------------

self.lineEdit_hallucination_silence_threshold:LineEdit = LineEdit()
self.lineEdit_hallucination_silence_threshold.setText("0")
self.hallucination_silence_threshold_param_widget = ParamWidget(self.__tr("幻听静音阈值"),
self.__tr("如果开启 单词级时间戳 ,当检测到可能的幻觉时,跳过长于此阈值(以秒为单位)的静默期。"),
self.lineEdit_hallucination_silence_threshold
)
widget_list.append(self.hallucination_silence_threshold_param_widget)

# --------------------------------------------------------------------------------------------

self.LineEdit_patience = LineEdit()
self.LineEdit_patience.setText("1.0")
self.patience_param_widget = ParamWidget(self.__tr("搜索耐心"),
Expand Down Expand Up @@ -355,6 +449,16 @@ def setupUI(self):

widget_list.append(self.prefix_param_widget)

# --------------------------------------------------------------------------------------------
self.LineEdit_hotwords:LineEdit = LineEdit()
self.LineEdit_hotwords.setText("")
self.hotwords_param_widget = ParamWidget(
self.__tr("热词/提示短语"),
self.__tr("为模型提供的热词/提示短语。如果给定了 初始文本前缀 则热词无效。"),
self.LineEdit_hotwords
)
widget_list.append(self.hotwords_param_widget)

# --------------------------------------------------------------------------------------------
self.LineEdit_suppress_tokens = LineEdit()
self.LineEdit_suppress_tokens.setText("-1")
Expand Down Expand Up @@ -552,9 +656,20 @@ def setParam(self, Transcribe_params:dict) -> None:
self.LineEdit_prompt_reset_on_temperature.setText(str(Transcribe_params['prompt_reset_on_temperature'] ))
# Transcribe_params['prompt_reset_on_temperature'] = prompt_reset_on_temperature

try:
self.LineEdit_chunk_length.setText(Transcribe_params["chunk_length"])
self.ComboBox_clip_mode.setCurrentIndex(Transcribe_params["clip_mode"])
self.LineEdit_max_new_tokens.setText(Transcribe_params["max_new_tokens"])
self.LineEdit_clip_timestamps.setText(Transcribe_params["clip_timestamps"])
self.lineEdit_hallucination_silence_threshold.setText(Transcribe_params["hallucination_silence_threshold"])
self.LineEdit_hotwords.setText(Transcribe_params["hotwords"])
self.LineEdit_language_detection_threshold.setText(Transcribe_params["language_detection_threshold"])
self.language_detection_segments_param_widget.setText(Transcribe_params["language_detection_segments"])
except:
pass

def getParam(self) -> dict:
Transcribe_params = {}
Transcribe_params = WhisperParameters()

# 从数据模型获取文件列表

Expand Down Expand Up @@ -651,5 +766,15 @@ def getParam(self) -> dict:
# prompt_reset_on_temperature = float(prompt_reset_on_temperature)
Transcribe_params['prompt_reset_on_temperature'] = prompt_reset_on_temperature

Transcribe_params["chunk_length"] = self.LineEdit_chunk_length.text().strip()
Transcribe_params["clip_mode"] = self.ComboBox_clip_mode.currentIndex()
Transcribe_params["max_new_tokens"] = self.LineEdit_max_new_tokens.text().strip()
Transcribe_params["clip_timestamps"] = self.LineEdit_clip_timestamps.text().strip()
Transcribe_params["hallucination_silence_threshold"] = self.lineEdit_hallucination_silence_threshold.text().strip()
Transcribe_params["hotwords"] = self.LineEdit_hotwords.text().strip()
Transcribe_params["language_detection_threshold"] = self.LineEdit_language_detection_threshold.text().strip()
Transcribe_params["language_detection_segments"] = self.lienEdit_language_detection_segments.text().strip()


return Transcribe_params

16 changes: 14 additions & 2 deletions faster_whisper_GUI/transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,12 @@
)

from .seg_ment import segment_Transcribe
from .util import secondsToHMS, secondsToMS
from .util import (
secondsToHMS,
secondsToMS,
WhisperParameters
)

from .config import ENCODING_DICT, Task_list


Expand Down Expand Up @@ -195,7 +200,7 @@ class TranscribeWorker(QThread):
def __init__(self
,parent=None
,model : WhisperModel = None
,parameters : dict = None
,parameters : WhisperParameters = None
,vad_filter : bool = False
,vad_parameters : dict = None
,num_workers : int = 1
Expand Down Expand Up @@ -250,6 +255,13 @@ def transcribe_file(self, file) -> (TranscriptionInfo, List): # type: ignore
word_timestamps=self.parameters["word_timestamps"],
prepend_punctuations=self.parameters["prepend_punctuations"],
append_punctuations=self.parameters["append_punctuations"],
max_new_tokens=self.parameters["max_new_tokens"],
chunk_length=self.parameters["chunk_length"],
clip_timestamps=self.parameters["clip_timestamps"],
hallucination_silence_threshold=self.parameters["hallucination_silence_threshold"],
hotwords = self.parameters["hotwords"],
language_detection_threshold = self.parameters["language_detection_threshold"],
language_detection_segments = self.parameters["language_detection_segments"],
vad_filter=self.vad_filter,
vad_parameters=self.vad_parameters
)
Expand Down
Loading

0 comments on commit 7f9f0b7

Please sign in to comment.