Skip to content

Commit

Permalink
fix bug #55
Browse files Browse the repository at this point in the history
  • Loading branch information
CheshireCC committed Dec 29, 2023
1 parent 1e9fd1a commit 6fac605
Show file tree
Hide file tree
Showing 7 changed files with 52 additions and 22 deletions.
8 changes: 4 additions & 4 deletions fasterWhisperGUIConfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,15 @@
"model_param": {
"localModel": true,
"onlineModel": false,
"model_path": "F:/WhisperModels/faster-whisper/large-v3-float32",
"model_path": "",
"modelName": 0,
"use_v3_model": true,
"device": 1,
"deviceIndex": "0",
"preciese": 0,
"thread_num": "4",
"num_worker": "1",
"download_root": "C:/Users/12059/.cache/huggingface/hub",
"download_root": "",
"local_files_only": false
},
"vad_param": {
Expand All @@ -30,7 +30,7 @@
},
"setting": {
"saveConfig": true,
"autoLoadModel": false,
"autoLoadModel": true,
"language": 2,
"huggingface_user_token": "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",
"autoGoToOutputPage": 2,
Expand All @@ -43,7 +43,7 @@
"best_of": "1",
"patience": "1.0",
"length_penalty": "1.0",
"temperature": "0",
"temperature": "0.5",
"compression_ratio_threshold": "2.4",
"log_prob_threshold": "-1.0",
"no_speech_threshold": "0.6",
Expand Down
6 changes: 3 additions & 3 deletions faster_whisper_GUI/UI_MainWindows.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
)

from PySide6.QtWidgets import (
# QSpacerItem,
# QApplication,
QSpacerItem,
QWidget
, QStackedWidget
, QVBoxLayout
Expand Down Expand Up @@ -231,8 +231,8 @@ def setupUI(self):
self.setCentralWidget(self.mainWindowsWidget)

# TODO: 创建一个空对象 用于改善布局顶部
# self.spacer_main = QSpacerItem(0,25)
# self.vBoxLayout.addItem(self.spacer_main)
self.spacer_main = QSpacerItem(0,25)
self.vBoxLayout.addItem(self.spacer_main)

# 设置显示图层到最后避免遮挡窗体按钮
self.mainWindowsWidget.lower()
Expand Down
5 changes: 4 additions & 1 deletion faster_whisper_GUI/fileNameListViewInterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -129,7 +129,7 @@ def testFileWithAudioTrackOrNot(self, fileNames:list[str]) -> list[str]:
for fileName in fileNames:
cont = None
try:
cont = av.open(fileName)
cont = av.open(fileName, metadata_errors="ignore")
except Exception as e:
print(f"InvalidDataError : {fileName} \nerror:{str(e)}")
ignoreFile.append(fileName)
Expand All @@ -147,6 +147,9 @@ def testFileWithAudioTrackOrNot(self, fileNames:list[str]) -> list[str]:

else:
fileNames_.append(fileName)

cont.close()

if len(ignoreFile) > 0:
ifi = ignore_files_info(ignore_files=ignoreFile, ignore_reason=self.__tr("不包含音频流的文件将被忽略:"))
self.ignore_files_signal.emit(ifi)
Expand Down
27 changes: 24 additions & 3 deletions faster_whisper_GUI/mainWindows.py
Original file line number Diff line number Diff line change
Expand Up @@ -136,6 +136,7 @@ def getDownloadCacheDir(self):
, self.tr("选择缓存文件夹")
, self.page_model.LineEdit_download_root.text()
):

self.page_model.LineEdit_download_root.setText(path)
self.download_cache_path = path

Expand Down Expand Up @@ -425,7 +426,7 @@ def transcribeProcess(self):
num_worker = 1

# 创建进程
self.log.write(f"create transcribe process with {num_worker} workers")
self.log.write(f"create transcribe process with {num_worker} workers\n")
self.transcribe_thread = TranscribeWorker(model = self.FasterWhisperModel
, parameters = Transcribe_params
, vad_filter = vad_filter
Expand All @@ -440,7 +441,7 @@ def transcribeProcess(self):
self.page_process.button_process.setIcon(r":/resource/Image/Cancel_red.svg")

# 启动进程
self.log.write(f"start transcribe process")
self.log.write(f"start transcribe process\n")
# self.transcribe_thread.is_running == True
self.transcribe_thread.start()
self.setStateTool(self.tr("音频处理"), self.tr("正在处理中"), False)
Expand Down Expand Up @@ -1278,7 +1279,26 @@ def unloadWhisperModel(self):
print("unload model failed")
print(str(e))
self.raiseErrorInfoBar(self.tr("卸载模型失败"), self.tr("卸载模型失败,请在转写之前禁用温度回退配置"))


def outputAudioPartWithSpeaker(self):
"""
output audio part with speaker
"""
outputWithDateTime("SegmentAudioFileWithSpeaker")
for segments, file_path, info in self.current_result:
with av.open(file_path) as av_file:

stream_ = next(s for s in av_file.streams if s.codec_context.type == 'audio')
# stream_.
audio_channel_num = stream_.channels
if audio_channel_num < 2:
print("单声道音频")
split_setore = False
else:
print("双声道音频")



def singleAndSlotProcess(self):
"""
process single connect and others
Expand Down Expand Up @@ -1313,6 +1333,7 @@ def singleAndSlotProcess(self):

self.page_output.tableTab.signal_delete_table.connect(self.deleteResultTableEvent)
self.page_output.unloadWhisperModelPushbutton.clicked.connect(self.unloadWhisperModel)
self.page_output.outputAudioPartWithSpeakerButton.clicked.connect(self.outputAudioPartWithSpeaker)

self.page_demucs.fileListView.ignore_files_signal.connect(lambda ignore_files_info: self.raiseInfoBar(self.tr("忽略文件"), ignore_files_info["ignore_reason"]+"\n"+"\n".join(ignore_files_info["ignore_files"])))
self.page_process.fileNameListView.ignore_files_signal.connect(lambda ignore_files_info: self.raiseInfoBar(self.tr("忽略文件"), ignore_files_info["ignore_reason"]+"\n"+"\n".join(ignore_files_info["ignore_files"])))
Expand Down
2 changes: 1 addition & 1 deletion faster_whisper_GUI/modelLoad.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from typing import (List, Optional, TypedDict, Union)
from PySide6.QtCore import QThread, Signal
from faster_whisper import WhisperModel
from concurrent import futures


class modelParamDict(TypedDict):
model_size_or_path: str
Expand Down
6 changes: 6 additions & 0 deletions faster_whisper_GUI/outputPageNavigationInterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,6 +54,7 @@ def getparam(self) -> dict:
param["outputEncoding"] = self.combox_output_code.currentIndex()

return param

def setupUI(self):
# -----------------------------------------------------------------------------------------

Expand All @@ -68,9 +69,14 @@ def setupUI(self):
self.WhisperXSpeakerDiarizeButton.setText(self.tr("WhisperX 说话人分离"))
self.WhisperXSpeakerDiarizeButton.setToolTip(self.tr("speachBrain 模型声纹聚类分析,将不同语音段的不同说话人进行分离"))

self.outputAudioPartWithSpeakerButton = PushButton()
self.outputAudioPartWithSpeakerButton.setText(self.tr("输出音频分段"))
self.outputAudioPartWithSpeakerButton.setToolTip(self.tr("将音频按照说话人进行分段,并输出"))

self.WhisperXHBoxLayout = QHBoxLayout()
self.WhisperXHBoxLayout.addWidget(self.WhisperXAligmentTimeStampleButton, 0, Qt.AlignmentFlag.AlignLeft)
self.WhisperXHBoxLayout.addWidget(self.WhisperXSpeakerDiarizeButton, 0, Qt.AlignmentFlag.AlignLeft)
self.WhisperXHBoxLayout.addWidget(self.outputAudioPartWithSpeakerButton, 0, Qt.AlignmentFlag.AlignLeft)
self.WhisperXHBoxLayout.addSpacing(10)

self.outputSubtitleFileButton = PushButton()
Expand Down
20 changes: 10 additions & 10 deletions faster_whisper_GUI/transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -211,15 +211,15 @@ def __init__(self


def transcribe_file(self, file) -> (TranscriptionInfo, List):
try:
is_av_file = self.try_decode_avFile(file)
if not is_av_file:
return (None,None)
except Exception as e: # 捕获异常
print(f' {file.split("/")[-1]} 不是一个有效的音视频文件\n')
print(f" error:{str(e)}")
print(f" ignore File : {file} \n")
return (None, None)
# try:
# is_av_file = self.try_decode_avFile(file)
# if not is_av_file:
# return (None,None)
# except Exception as e: # 捕获异常
# print(f' {file.split("/")[-1]} 不是一个有效的音视频文件\n')
# print(f" error:{str(e)}")
# print(f" ignore File : {file} \n")
# return (None, None)

print("开始处理音频...")
segments, info = self.model.transcribe(
Expand Down Expand Up @@ -303,7 +303,7 @@ def try_decode_avFile(self, file) -> bool:
print("\n")
print(f"current task: {file}")
print(" 尝试解析文件")
container = av.open(file) # 尝试打开文件
container = av.open(file, metadata_errors="ignore") # 尝试打开文件
av_cont = container.streams
for stream in av_cont:
if stream.codec_context.type == "audio":
Expand Down

0 comments on commit 6fac605

Please sign in to comment.