0.4.1

CheshireCC · Nov 3, 2023 · 7f0bb3f · 7f0bb3f
1 parent 58fe3a2
commit 7f0bb3f
Show file tree

Hide file tree

Showing 18 changed files with 2,917 additions and 2,588 deletions.
diff --git a/.gitignore b/.gitignore
@@ -21,6 +21,7 @@ models/
 dist/
 python39/
 python310/
+python*/
 
 requirements.txt
 requirements_310.txt

diff --git a/README.md b/README.md
@@ -67,3 +67,7 @@
   [whisperX](https://github.com/m-bain/whisperX)
 
   [HuggingFace model download](https://huggingface.co/models)
+
+  [Demucs](https://github.com/facebookresearch/demucs)
+
+
diff --git a/en.ts b/en.ts
diff --git a/fasterWhisperGUIConfig.json b/fasterWhisperGUIConfig.json
@@ -1 +1 @@
-{"use_auth_token": "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", "overlap": 0.2, "segment": 7.800000000000001}
+{"use_auth_token": "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", "overlap": 0.1, "segment": 10.0}
diff --git a/faster_whisper_GUI/UI_MainWindows.py b/faster_whisper_GUI/UI_MainWindows.py
@@ -16,6 +16,7 @@
                                 , QVBoxLayout
                                 , QHBoxLayout
                                 , QGridLayout
+                                , QMainWindow
                             )
 
 from PySide6.QtGui import QIcon
@@ -62,10 +63,10 @@
 # =======================================================================================
 # UI
 # =======================================================================================
-class UIMainWin(FramelessMainWindow):
+class UIMainWin(QMainWindow):
     """V"""
 
-    def __tr(self, text):
+    def tr(self, text):
         return QCoreApplication.translate(self.__class__.__name__, text)
 
     def readConfigJson(self):
@@ -79,6 +80,9 @@ def readConfigJson(self):
     def __init__(self):
         super().__init__()
 
+        # self.setWindowFlags(Qt.FramelessWindowHint)
+        # self.setAttribute(Qt.WA_TranslucentBackground)  
+
         self.model_path = ""
         self.model_names = Model_names
 
@@ -119,12 +123,13 @@ def initWin(self):
         self.setGeometry(500, 200, 1147, 825)
 
         # 添加标题栏 
-        self.setTitleBar(StandardTitleBar(self))
+        # self.setTitleBar(StandardTitleBar(self))
+        # self.titleBar.setAttribute(Qt.WA_StyledBackground)
 
         self.setWindowTitle(f"FasterWhisperGUI-{__version__}--fw-{__FasterWhisper_version__}--WhisperX-{__WhisperX_version__}")
 
         self.setWindowIcon(QIcon(":/resource/Image/microphone.png"))
-        self.titleBar.setAttribute(Qt.WA_StyledBackground)
+
 
     def setupUI(self):
 
@@ -149,8 +154,8 @@ def setupUI(self):
         self.setCentralWidget(self.mainWindowsWidget)
 
         # 创建一个空对象 用于改善布局顶部
-        self.spacer_main = QSpacerItem(0,25)
-        self.vBoxLayout.addItem(self.spacer_main)
+        # self.spacer_main = QSpacerItem(0,25)
+        # self.vBoxLayout.addItem(self.spacer_main)
 
         # 设置显示图层到最后避免遮挡窗体按钮
         self.mainWindowsWidget.lower()
@@ -175,31 +180,31 @@ def setupUI(self):
 
         # 添加子界面
         self.page_home = HomePageNavigationinterface(self)
-        self.addSubInterface(self.page_home, "pageHome", self.__tr("Home"), icon=FluentIcon.HOME)
+        self.addSubInterface(self.page_home, "pageHome", self.tr("Home"), icon=FluentIcon.HOME)
         self.pages.append(self.page_home)
 
         self.page_demucs = DemucsPageNavigation(self)
-        self.addSubInterface(self.page_demucs, "pageDecums", self.__tr("声乐移除"), icon=FasterWhisperGUIIcon.DEMUCS)
+        self.addSubInterface(self.page_demucs, "pageDecums", self.tr("声乐移除"), icon=FasterWhisperGUIIcon.DEMUCS)
         self.pages.append(self.page_demucs)
 
         self.page_model = ModelNavigationInterface(self)
-        self.addSubInterface(self.page_model, "pageModelParameter", self.__tr("模型参数"), icon=FluentIcon.BOOK_SHELF)
+        self.addSubInterface(self.page_model, "pageModelParameter", self.tr("模型参数"), icon=FluentIcon.BOOK_SHELF)
         self.pages.append(self.page_model)
 
         self.page_VAD = VADNavigationInterface(self)
-        self.addSubInterface(self.page_VAD, "pageVADParameter", self.__tr("VAD及WhisperX"), icon=FasterWhisperGUIIcon.VAD_PAGE)
+        self.addSubInterface(self.page_VAD, "pageVADParameter", self.tr("VAD及WhisperX"), icon=FasterWhisperGUIIcon.VAD_PAGE)
         self.pages.append(self.page_VAD)
 
         self.page_transcribes = TranscribeNavigationInterface(self)
-        self.addSubInterface(self.page_transcribes, "pageTranscribesParameter", self.__tr("转写参数"), icon=FasterWhisperGUIIcon.TRANSCRIPTION_PAGE)
+        self.addSubInterface(self.page_transcribes, "pageTranscribesParameter", self.tr("转写参数"), icon=FasterWhisperGUIIcon.TRANSCRIPTION_PAGE)
         self.pages.append(self.page_transcribes)
 
         self.page_process = ProcessPageNavigationInterface(self)
-        self.addSubInterface(self.page_process, "pageProcess", self.__tr("执行转写"), icon=FasterWhisperGUIIcon.HEAD_PHONE)
+        self.addSubInterface(self.page_process, "pageProcess", self.tr("执行转写"), icon=FasterWhisperGUIIcon.HEAD_PHONE)
         self.pages.append(self.page_process)
 
         self.page_output = OutputPageNavigationInterface(self)
-        self.addSubInterface(self.page_output, "pageOutput", self.__tr("后处理及输出"), icon=FluentIcon.SAVE_AS)
+        self.addSubInterface(self.page_output, "pageOutput", self.tr("后处理及输出"), icon=FluentIcon.SAVE_AS)
         self.pages.append(self.page_output)
 
         self.stackedWidget.currentChanged.connect(self.onCurrentIndexChanged)

diff --git a/faster_whisper_GUI/config.py b/faster_whisper_GUI/config.py
@@ -144,3 +144,9 @@
 ]
 
 STEMS = ["All Stems","Vocals", "Other","Bass", "Drums"]
+ENCODING_DICT = {"UTF-8":"utf8", 
+                    "UTF-8 BOM":"utf_8_sig", 
+                    "GBK":"gbk", 
+                    "GB2312":"gb18030", 
+                    "ANSI":"ansi"
+                }
diff --git a/faster_whisper_GUI/de_mucs.py b/faster_whisper_GUI/de_mucs.py
@@ -7,6 +7,7 @@
 import av
 import soundfile
 import numpy as np
+import gc
 
 from faster_whisper import decode_audio
 
@@ -45,7 +46,7 @@ def __init__(
     def run(self) -> None:
         self.is_running = True
 
-        device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
+        device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
         print(f"device: {device}")
 
         if not self.is_running:
@@ -72,6 +73,7 @@ def run(self) -> None:
 
             try:
                 samples = self.reSampleAudio(audio, 44100, device=device)
+                samples = torch.tensor(samples,dtype=torch.float32).to(device)
             except Exception as e:
                 print(f"resample audio error:\n{str(e)}")
                 self.signal_vr_over.emit(False)
@@ -123,13 +125,31 @@ def run(self) -> None:
                 return
 
             self.file_process_status.emit({"file":audio, "status":False, "task": "file over"})
+            del samples
+            del sources
+            del audio
             if torch.cuda.is_available():
                 torch.cuda.empty_cache()
 
         self.signal_vr_over.emit(True)
         print("over!")
+
+        # self.model.to("cpu")
+        del self.model
+        self.model = None
+
+        if torch.cuda.is_available():
+            torch.cuda.empty_cache()
+
+        # print(torch.cuda.memory_allocated(device=device))
+        # print(torch.cuda.memory_reserved())
+        # print(torch.cuda.memory_stats(device=device))
+
+        gc.collect()
+
         self.requestInterruption()
         self.stop()
+
 
     def stop(self):
         self.is_running = False
@@ -155,7 +175,7 @@ def separate_sources(self,
         if device is None:
             device = mix.device
         else:
-            device = torch.device(device)
+            device = device
 
         batch, channels, length = mix.shape
 
@@ -174,7 +194,7 @@ def separate_sources(self,
             chunk = mix[:, :, start:end]
             with torch.no_grad():
                 out = model.forward(chunk)
-            
+
             if not self.is_running:
                 return None
 
@@ -188,6 +208,16 @@ def separate_sources(self,
             end += chunk_len
             if end >= length:
                 fade.fade_out_len = 0
+
+            del out
+            del chunk
+            if torch.cuda.is_available():
+                torch.cuda.empty_cache()
+
+        # del model
+
+        del fade
+        del mix
         return final
 
     def loadModel(self, model_path:str, device=None):
@@ -210,16 +240,15 @@ def loadModel(self, model_path:str, device=None):
         if not self.is_running:
             return
 
-        model = bundle.get_model()
+        self.model = bundle.get_model()
 
         if not self.is_running:
             return
 
-        model.to(device)
-
-        self.model = model
+        del bundle
+        self.model.to(device)
 
-    def reSampleAudio(self, audio, sample_rate, device) -> torch.Tensor:
+    def reSampleAudio(self, audio, sample_rate, device) -> np.ndarray:
         file_path = os.path.abspath(audio)
 
         split_setore = True
@@ -234,10 +263,11 @@ def reSampleAudio(self, audio, sample_rate, device) -> torch.Tensor:
 
         print("resample audio data")
         samples = decode_audio(file_path, sample_rate, split_setore)
-        samples = np.array(samples)
+        # samples = np.array(samples)
 
-        samples_t = torch.tensor(samples,dtype=torch.float32).to(device)
-        return samples_t
+        # samples_t = torch.tensor(samples,dtype=torch.float32).to(device)
+        # del samples
+        return samples
 
     def saveResult(self, model, file_path:str, sources:torch.Tensor, stems:str, output_path:str, sample_rate=44100):
Original file line number	Diff line number	Diff line change
Expand Up		@@ -67,3 +67,7 @@
		[whisperX](https://github.com/m-bain/whisperX)

		[HuggingFace model download](https://huggingface.co/models)

		[Demucs](https://github.com/facebookresearch/demucs)
Original file line number	Diff line number	Diff line change
		@@ -1 +1 @@
		{"use_auth_token": "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", "overlap": 0.2, "segment": 7.800000000000001}
		{"use_auth_token": "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", "overlap": 0.1, "segment": 10.0}