Skip to content

Commit

Permalink
0.4.1
Browse files Browse the repository at this point in the history
  • Loading branch information
CheshireCC committed Nov 3, 2023
1 parent 58fe3a2 commit 7f0bb3f
Show file tree
Hide file tree
Showing 18 changed files with 2,917 additions and 2,588 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ models/
dist/
python39/
python310/
python*/

requirements.txt
requirements_310.txt
Expand Down
4 changes: 4 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -67,3 +67,7 @@
[whisperX](https://github.com/m-bain/whisperX)

[HuggingFace model download](https://huggingface.co/models)

[Demucs](https://github.com/facebookresearch/demucs)


514 changes: 288 additions & 226 deletions en.ts

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion fasterWhisperGUIConfig.json
Original file line number Diff line number Diff line change
@@ -1 +1 @@
{"use_auth_token": "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", "overlap": 0.2, "segment": 7.800000000000001}
{"use_auth_token": "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", "overlap": 0.1, "segment": 10.0}
31 changes: 18 additions & 13 deletions faster_whisper_GUI/UI_MainWindows.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
, QVBoxLayout
, QHBoxLayout
, QGridLayout
, QMainWindow
)

from PySide6.QtGui import QIcon
Expand Down Expand Up @@ -62,10 +63,10 @@
# =======================================================================================
# UI
# =======================================================================================
class UIMainWin(FramelessMainWindow):
class UIMainWin(QMainWindow):
"""V"""

def __tr(self, text):
def tr(self, text):
return QCoreApplication.translate(self.__class__.__name__, text)

def readConfigJson(self):
Expand All @@ -79,6 +80,9 @@ def readConfigJson(self):
def __init__(self):
super().__init__()

# self.setWindowFlags(Qt.FramelessWindowHint)
# self.setAttribute(Qt.WA_TranslucentBackground)

self.model_path = ""
self.model_names = Model_names

Expand Down Expand Up @@ -119,12 +123,13 @@ def initWin(self):
self.setGeometry(500, 200, 1147, 825)

# 添加标题栏
self.setTitleBar(StandardTitleBar(self))
# self.setTitleBar(StandardTitleBar(self))
# self.titleBar.setAttribute(Qt.WA_StyledBackground)

self.setWindowTitle(f"FasterWhisperGUI-{__version__}--fw-{__FasterWhisper_version__}--WhisperX-{__WhisperX_version__}")

self.setWindowIcon(QIcon(":/resource/Image/microphone.png"))
self.titleBar.setAttribute(Qt.WA_StyledBackground)


def setupUI(self):

Expand All @@ -149,8 +154,8 @@ def setupUI(self):
self.setCentralWidget(self.mainWindowsWidget)

# 创建一个空对象 用于改善布局顶部
self.spacer_main = QSpacerItem(0,25)
self.vBoxLayout.addItem(self.spacer_main)
# self.spacer_main = QSpacerItem(0,25)
# self.vBoxLayout.addItem(self.spacer_main)

# 设置显示图层到最后避免遮挡窗体按钮
self.mainWindowsWidget.lower()
Expand All @@ -175,31 +180,31 @@ def setupUI(self):

# 添加子界面
self.page_home = HomePageNavigationinterface(self)
self.addSubInterface(self.page_home, "pageHome", self.__tr("Home"), icon=FluentIcon.HOME)
self.addSubInterface(self.page_home, "pageHome", self.tr("Home"), icon=FluentIcon.HOME)
self.pages.append(self.page_home)

self.page_demucs = DemucsPageNavigation(self)
self.addSubInterface(self.page_demucs, "pageDecums", self.__tr("声乐移除"), icon=FasterWhisperGUIIcon.DEMUCS)
self.addSubInterface(self.page_demucs, "pageDecums", self.tr("声乐移除"), icon=FasterWhisperGUIIcon.DEMUCS)
self.pages.append(self.page_demucs)

self.page_model = ModelNavigationInterface(self)
self.addSubInterface(self.page_model, "pageModelParameter", self.__tr("模型参数"), icon=FluentIcon.BOOK_SHELF)
self.addSubInterface(self.page_model, "pageModelParameter", self.tr("模型参数"), icon=FluentIcon.BOOK_SHELF)
self.pages.append(self.page_model)

self.page_VAD = VADNavigationInterface(self)
self.addSubInterface(self.page_VAD, "pageVADParameter", self.__tr("VAD及WhisperX"), icon=FasterWhisperGUIIcon.VAD_PAGE)
self.addSubInterface(self.page_VAD, "pageVADParameter", self.tr("VAD及WhisperX"), icon=FasterWhisperGUIIcon.VAD_PAGE)
self.pages.append(self.page_VAD)

self.page_transcribes = TranscribeNavigationInterface(self)
self.addSubInterface(self.page_transcribes, "pageTranscribesParameter", self.__tr("转写参数"), icon=FasterWhisperGUIIcon.TRANSCRIPTION_PAGE)
self.addSubInterface(self.page_transcribes, "pageTranscribesParameter", self.tr("转写参数"), icon=FasterWhisperGUIIcon.TRANSCRIPTION_PAGE)
self.pages.append(self.page_transcribes)

self.page_process = ProcessPageNavigationInterface(self)
self.addSubInterface(self.page_process, "pageProcess", self.__tr("执行转写"), icon=FasterWhisperGUIIcon.HEAD_PHONE)
self.addSubInterface(self.page_process, "pageProcess", self.tr("执行转写"), icon=FasterWhisperGUIIcon.HEAD_PHONE)
self.pages.append(self.page_process)

self.page_output = OutputPageNavigationInterface(self)
self.addSubInterface(self.page_output, "pageOutput", self.__tr("后处理及输出"), icon=FluentIcon.SAVE_AS)
self.addSubInterface(self.page_output, "pageOutput", self.tr("后处理及输出"), icon=FluentIcon.SAVE_AS)
self.pages.append(self.page_output)

self.stackedWidget.currentChanged.connect(self.onCurrentIndexChanged)
Expand Down
6 changes: 6 additions & 0 deletions faster_whisper_GUI/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -144,3 +144,9 @@
]

STEMS = ["All Stems","Vocals", "Other","Bass", "Drums"]
ENCODING_DICT = {"UTF-8":"utf8",
"UTF-8 BOM":"utf_8_sig",
"GBK":"gbk",
"GB2312":"gb18030",
"ANSI":"ansi"
}
52 changes: 41 additions & 11 deletions faster_whisper_GUI/de_mucs.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import av
import soundfile
import numpy as np
import gc

from faster_whisper import decode_audio

Expand Down Expand Up @@ -45,7 +46,7 @@ def __init__(
def run(self) -> None:
self.is_running = True

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"device: {device}")

if not self.is_running:
Expand All @@ -72,6 +73,7 @@ def run(self) -> None:

try:
samples = self.reSampleAudio(audio, 44100, device=device)
samples = torch.tensor(samples,dtype=torch.float32).to(device)
except Exception as e:
print(f"resample audio error:\n{str(e)}")
self.signal_vr_over.emit(False)
Expand Down Expand Up @@ -123,13 +125,31 @@ def run(self) -> None:
return

self.file_process_status.emit({"file":audio, "status":False, "task": "file over"})
del samples
del sources
del audio
if torch.cuda.is_available():
torch.cuda.empty_cache()

self.signal_vr_over.emit(True)
print("over!")

# self.model.to("cpu")
del self.model
self.model = None

if torch.cuda.is_available():
torch.cuda.empty_cache()

# print(torch.cuda.memory_allocated(device=device))
# print(torch.cuda.memory_reserved())
# print(torch.cuda.memory_stats(device=device))

gc.collect()

self.requestInterruption()
self.stop()


def stop(self):
self.is_running = False
Expand All @@ -155,7 +175,7 @@ def separate_sources(self,
if device is None:
device = mix.device
else:
device = torch.device(device)
device = device

batch, channels, length = mix.shape

Expand All @@ -174,7 +194,7 @@ def separate_sources(self,
chunk = mix[:, :, start:end]
with torch.no_grad():
out = model.forward(chunk)

if not self.is_running:
return None

Expand All @@ -188,6 +208,16 @@ def separate_sources(self,
end += chunk_len
if end >= length:
fade.fade_out_len = 0

del out
del chunk
if torch.cuda.is_available():
torch.cuda.empty_cache()

# del model

del fade
del mix
return final

def loadModel(self, model_path:str, device=None):
Expand All @@ -210,16 +240,15 @@ def loadModel(self, model_path:str, device=None):
if not self.is_running:
return

model = bundle.get_model()
self.model = bundle.get_model()

if not self.is_running:
return

model.to(device)

self.model = model
del bundle
self.model.to(device)

def reSampleAudio(self, audio, sample_rate, device) -> torch.Tensor:
def reSampleAudio(self, audio, sample_rate, device) -> np.ndarray:
file_path = os.path.abspath(audio)

split_setore = True
Expand All @@ -234,10 +263,11 @@ def reSampleAudio(self, audio, sample_rate, device) -> torch.Tensor:

print("resample audio data")
samples = decode_audio(file_path, sample_rate, split_setore)
samples = np.array(samples)
# samples = np.array(samples)

samples_t = torch.tensor(samples,dtype=torch.float32).to(device)
return samples_t
# samples_t = torch.tensor(samples,dtype=torch.float32).to(device)
# del samples
return samples

def saveResult(self, model, file_path:str, sources:torch.Tensor, stems:str, output_path:str, sample_rate=44100):

Expand Down
Loading

0 comments on commit 7f0bb3f

Please sign in to comment.