Skip to content

Commit

Permalink
finish function split Audio #54
Browse files Browse the repository at this point in the history
  • Loading branch information
CheshireCC committed Dec 30, 2023
1 parent 8eac17c commit a38c5c0
Show file tree
Hide file tree
Showing 12 changed files with 154 additions and 42 deletions.
6 changes: 0 additions & 6 deletions config/config.json

This file was deleted.

20 changes: 10 additions & 10 deletions fasterWhisperGUIConfig.json
Original file line number Diff line number Diff line change
Expand Up @@ -3,29 +3,29 @@
"demucs": {
"overlap": 0.1,
"segment": 7.8,
"tracks": 0
"tracks": 1
},
"model_param": {
"localModel": true,
"onlineModel": false,
"model_path": "",
"model_path": "F:/WhisperModels/faster-whisper/large-v3-float32",
"modelName": 0,
"use_v3_model": true,
"device": 1,
"deviceIndex": "0",
"preciese": 0,
"thread_num": "4",
"num_worker": "1",
"download_root": "",
"download_root": "C:/Users/12059/.cache/huggingface/hub",
"local_files_only": false
},
"vad_param": {
"use_VAD": true,
"threshold": 0.3,
"threshold": 0.5,
"minSpeechDuration": "250",
"minSilenceDuration": "2000",
"maxSpeechDuration": "inf",
"windowSize": 2,
"windowSize": 1,
"speechPad": "400"
},
"setting": {
Expand All @@ -37,9 +37,9 @@
"autoClearTempFiles": true
},
"Transcription_param": {
"language": 8,
"language": 2,
"task": false,
"beam_size": "5",
"beam_size": "2",
"best_of": "1",
"patience": "1.0",
"length_penalty": "1.0",
Expand All @@ -65,10 +65,10 @@
"tabMovable": false,
"tabScrollable": true,
"tabShadowEnabled": false,
"tabMaxWidth": 326,
"tabMaxWidth": 300,
"closeDisplayMode": 0,
"whisperXMinSpeaker": 3,
"whisperXMaxSpeaker": 3,
"whisperXMinSpeaker": 2,
"whisperXMaxSpeaker": 2,
"outputFormat": 0,
"outputEncoding": 1
}
Expand Down
7 changes: 7 additions & 0 deletions faster_whisper_GUI/UI_MainWindows.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,6 +190,13 @@ def __init__(self, translator:QTranslator=None):
# 设置配置
self.setConfig()

# 根据读取的配置设置完控件状态之后,根据控件状态设置相关属性
self.page_output.tableTab.onDisplayModeChanged(self.page_output.tableTab.closeDisplayModeComboBox.currentIndex())
self.page_output.tableTab.tabBar.setMovable(self.page_output.tableTab.movableCheckBox.isChecked())
self.page_output.tableTab.tabBar.setScrollable(self.page_output.tableTab.scrollableCheckBox.isChecked())
self.page_output.tableTab.tabBar.setTabShadowEnabled(self.page_output.tableTab.shadowEnabledCheckBox.isChecked())
self.page_output.tableTab.tabBar.setTabMaximumWidth(self.page_output.tableTab.tabMaxWidthSpinBox.value())

def initWin(self):

self.setObjectName("FramlessMainWin")
Expand Down
39 changes: 27 additions & 12 deletions faster_whisper_GUI/mainWindows.py
Original file line number Diff line number Diff line change
Expand Up @@ -882,20 +882,23 @@ def aligmentOver(self, segments_path_info:list):
self.whisperXWorker = None

def whisperXAligmentTimeStample(self):
if self.result_faster_whisper is None :
if self.result_faster_whisper is None and self.current_result is None:
self.raiseErrorInfoBar(
self.tr("错误"),
self.tr("没有有效的 音频-字幕 转写结果,无法进行对齐")
)
return

elif self.current_result is None :
self.current_result = self.result_faster_whisper

self.setPageOutButtonStatus()
self.outputWithDateTime("TimeStample_Alignment")

self.setStateTool(title=self.tr("WhisperX"), text=self.tr("时间戳对齐"), status=False)

if self.whisperXWorker is None:
self.whisperXWorker = WhisperXWorker(self.result_faster_whisper, alignment=True,speaker_diarize=False, parent=self)
self.whisperXWorker = WhisperXWorker(self.current_result, alignment=True,speaker_diarize=False, parent=self)
else:
self.whisperXWorker.result_segments_path_info = self.result_faster_whisper
self.whisperXWorker.alignment = True
Expand All @@ -909,7 +912,7 @@ def whisperXDiarizeSpeakers(self):

whisperParams = self.getParamWhisperX()

result_needed = self.result_whisperx_aligment or self.result_faster_whisper
result_needed = self.current_result or self.result_whisperx_aligment or self.result_faster_whisper
# print(f"result_useing: {result_needed}")
# try:
# print(len(result_needed))
Expand Down Expand Up @@ -956,6 +959,7 @@ def setPageOutButtonStatus(self):
self.page_output.WhisperXAligmentTimeStampleButton.setEnabled(not self.page_output.WhisperXAligmentTimeStampleButton.isEnabled())
self.page_output.outputSubtitleFileButton.setEnabled(not self.page_output.outputSubtitleFileButton.isEnabled())
self.page_output.WhisperXSpeakerDiarizeButton.setEnabled(not self.page_output.WhisperXSpeakerDiarizeButton.isEnabled())
self.page_output.outputAudioPartWithSpeakerButton.setEnabled(not self.page_output.outputAudioPartWithSpeakerButton.isEnabled())

def speakerDiarizeOver(self, segments_path_info:list):
self.setPageOutButtonStatus()
Expand Down Expand Up @@ -1004,7 +1008,7 @@ def is_audio_or_video(self, file_path:str) -> bool:

# 判定打开的文件是否音视频文件
try:
av_cont = av.open(file_path)
av_cont = av.open(file_path, metadata_errors = "ignore")

# 获取文件的全部 流数据
av_streams = av_cont.streams
Expand Down Expand Up @@ -1105,12 +1109,13 @@ def openExcitedFiles(self):
self.result_whisperx_aligment = None
self.result_whisperx_speaker_diarize = None

if self.result_faster_whisper is not None:
self.result_faster_whisper.append((segments, file, info))
if self.current_result is not None:
self.current_result.append((segments, file, info))
else:
self.result_faster_whisper = [(segments, file, info)]
self.current_result = [(segments, file, info)]
# self.tableModel_list[file] = file_subtitle_fileName
self.showResultInTable(self.result_faster_whisper)

self.showResultInTable(self.current_result)

def reSetButton_demucs_process(self):
self.page_demucs.process_button.setText(self.tr("提取"))
Expand Down Expand Up @@ -1287,16 +1292,26 @@ def outputAudioPartWithSpeaker(self):
"""
outputWithDateTime("SegmentAudioFileWithSpeaker")

self.page_output.outputAudioPartWithSpeakerButton.setEnabled(False)
# self.page_output.outputAudioPartWithSpeakerButton.setEnabled(False)
self.setPageOutButtonStatus()

output_path = self.page_output.outputGroupWidget.LineEdit_output_dir.text()
self.splitAudioFileWithSpeakerWorker = SplitAudioFileWithSpeakersWorker(self.current_result,output_path, self)
self.splitAudioFileWithSpeakerWorker.result_signal.connect(self.splitAudioFileWithSpeakerWorkerFinished)
self.splitAudioFileWithSpeakerWorker.current_task_signal.connect(lambda file: self.setStateTool(self.tr("分割音频"), self.tr("处理文件:") + file, False))
self.splitAudioFileWithSpeakerWorker.start()

self.setStateTool(self.tr("分割音频"), self.tr("按说话人分割音频文件"), False)

def splitAudioFileWithSpeakerWorkerFinished(self):
mes = MessageBox("over","ok", self)
mes.show()
self.page_output.outputAudioPartWithSpeakerButton.setEnabled(True)

self.setStateTool(self.tr("分割音频"), self.tr("按说话人分割音频文件完成"), True)
self.raiseSuccessInfoBar(self.tr("分割音频完成"),self.tr("按说话人分割音频文件完成"))

# mes = MessageBox("over","ok", self)
# mes.show()
# self.page_output.outputAudioPartWithSpeakerButton.setEnabled(True)
self.setPageOutButtonStatus()

def singleAndSlotProcess(self):
"""
Expand Down
1 change: 1 addition & 0 deletions faster_whisper_GUI/modelPageNavigationInterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ def setupUI(self):
self.label_model_path.setObjectName("LabelModelPath")
self.label_model_path.setStyleSheet("#LabelModelPath{ background : rgba(0, 128, 0, 120); }")
self.lineEdit_model_path = LineEdit()
self.lineEdit_model_path.setClearButtonEnabled(True)
self.toolPushButton_get_model_path = ToolButton()
self.toolPushButton_get_model_path.setIcon(self.style().standardPixmap(QStyle.StandardPixmap.SP_DirOpenIcon))

Expand Down
2 changes: 1 addition & 1 deletion faster_whisper_GUI/outputPageNavigationInterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ def setupUI(self):

self.outputAudioPartWithSpeakerButton = PushButton()
self.outputAudioPartWithSpeakerButton.setText(self.tr("输出音频分段"))
self.outputAudioPartWithSpeakerButton.setToolTip(self.tr("将音频按照说话人进行分段,并输出"))
self.outputAudioPartWithSpeakerButton.setToolTip(self.tr("将音频按照说话人和时间戳进行分段处理,并输出"))

self.WhisperXHBoxLayout = QHBoxLayout()
self.WhisperXHBoxLayout.addWidget(self.WhisperXAligmentTimeStampleButton, 0, Qt.AlignmentFlag.AlignLeft)
Expand Down
9 changes: 8 additions & 1 deletion faster_whisper_GUI/settingPageNavigation.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,8 @@
ComboBox,
LineEdit,
MessageBox,
PushButton
PushButton,
TitleLabel
)

from .paramItemWidget import ParamWidget
Expand Down Expand Up @@ -52,6 +53,12 @@ def addLayout(self, layout):
self.mainLayout.addLayout(layout)

def setupUI(self):

self.mainLayout.addSpacing(10)

self.titleLabel_title = TitleLabel(self.__tr("软件设置"))
self.addWidget(self.titleLabel_title)

# --------------------------------------------------------------------------------------------------------------------------------------------------------------
self.switchButton_saveConfig = SwitchButton()
self.switchButton_saveConfig.setChecked(True)
Expand Down
14 changes: 12 additions & 2 deletions faster_whisper_GUI/split_audio.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
class SplitAudioFileWithSpeakersWorker(QThread):
# 定义一个信号,用于在处理完成后发送结果
result_signal = Signal(str)
current_task_signal = Signal(str)

def __init__(self, segments_path_info_list:list, output_path, parent=None):
super().__init__(parent)
Expand All @@ -32,7 +33,13 @@ def creatCommandLine(self, start_time, end_time, fileName, output_path, speaker)
return commandLine

def getOutPutFileName(self, output_path:str, start_time:str, end_time:str, speaker:str):
return os.path.join(output_path, f"{speaker}_{start_time.replace(':','_')}_{end_time.replace(':','_')}.wav")
fileName = ""
if not(speaker is None) and speaker != "":
fileName = os.path.join(output_path, f"{speaker}_{start_time.replace(':','_')}_{end_time.replace(':','_')}.wav")
else:
fileName = os.path.join(output_path, f"UnClassedSpeaker_{start_time.replace(':','_')}_{end_time.replace(':','_')}.wav")
return fileName


def run(self):
self.is_running = True
Expand All @@ -42,6 +49,8 @@ def run(self):
base_path,file = os.path.split(path)
print(f" current task: {file}")

self.current_task_signal.emit(file)

if not self.output_path:
output_path = base_path
else:
Expand All @@ -54,7 +63,8 @@ def run(self):
os.makedirs(output_path)

for segment in segments:
if not segment.speaker: continue
# if not segment.speaker : continue

start_time = secondsToHMS(segment.start)
end_time = secondsToHMS(segment.end)
speaker = segment.speaker
Expand Down
46 changes: 40 additions & 6 deletions faster_whisper_GUI/tableModel_segments_path_info.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@
from PySide6.QtCore import QAbstractTableModel, Qt
# from PySide6.QtWidgets import QStyledItemDelegate, QLineEdit
from typing import List
from faster_whisper import Word

from .seg_ment import segment_Transcribe
from .transcribe import secondsToHMS
from .util import HMSToSeconds

# 自定义数据模型,用于在表格中显示数据
class TableModel(QAbstractTableModel):
Expand All @@ -22,18 +25,19 @@ def data(self, index, role):
row, column = index.row(), index.column()
data:segment_Transcribe = self._data[row]
if column == 0:
value = data.start
value = secondsToHMS(data.start)
elif column == 1:
value = data.end
value = secondsToHMS(data.end)
elif column == 2:
try:
value = f"{data.speaker}:{data.text}" if data.speaker is not None else data.text
except AttributeError:
value = data.text

elif column == 3:
elif column == 3:
value = ";".join([f"<{word.start}>{word.word}<{word.end}>" for word in data.words]) if len(data.words) > 0 else ""
# print(type(value))
value.encode('utf-8').decode('utf-8')
return value

# 使数据可编辑
Expand All @@ -44,21 +48,51 @@ def setData(self, index, value, role):

try:
if column == 0:
self._data[row].start = float(value)
self._data[row].start = HMSToSeconds(value)
elif column == 1:
self._data[row].end = float(value)
self._data[row].end = HMSToSeconds(value)
elif column == 2:
if value != "":
retxt = value.split(":")
if len(retxt) > 1:
if self._data[row].speaker != retxt[0]:
temp_data_speaker = self._data[row].speaker
for data in self._data:
if data.speaker is not None and data.speaker != "" and data.speaker == temp_data_speaker:
data.speaker = retxt[0]
self._data[row].speaker = retxt[0]
self._data[row].text = ":".join(retxt[1:])
else:
self._data[row].text = value
else:
return False

elif column == 3:
return False
if value != "":
words_list = []
text = ""
try:
words = value.split(";")
print(words)
for word in words:
temp = word.split("<")
print(temp)
end_time = float(temp[-1].split(">")[0])
start_time = float(temp[1].split(">")[0])
word_text = temp[1].split(">")[-1]
text += word_text
word_ = Word(start=start_time, end=end_time, word=word_text, probability=1.0)
words_list.append(word_)

print(text)
self._data[row].words = words_list
self._data[row].text = text
except Exception as e:
print(f"edit words-level timestample error:{e}")
return False
else:
return False

self.dataChanged.emit(index, index)
return True
except ValueError:
Expand Down
4 changes: 2 additions & 2 deletions faster_whisper_GUI/tableViewInterface.py
Original file line number Diff line number Diff line change
Expand Up @@ -190,8 +190,8 @@ def addSubInterface(self
widget.resizeColumnToContents(2)

# 设置列宽
widget.setColumnWidth(0,100)
widget.setColumnWidth(1,100)
widget.setColumnWidth(0,110)
widget.setColumnWidth(1,110)
# widget.setColumnWidth(3,500)
# widget.setColumnWidth(4,50)

Expand Down
Loading

0 comments on commit a38c5c0

Please sign in to comment.