Skip to content

Commit

Permalink
Re-organize code to load model only once to work arround Windows issu…
Browse files Browse the repository at this point in the history
…e with CTranslate2 #11
  • Loading branch information
jordimas committed Jun 6, 2023
1 parent b1583e7 commit aa271aa
Show file tree
Hide file tree
Showing 4 changed files with 40 additions and 29 deletions.
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@ run:
pip3 install --force-reinstall .

run-e2e-tests:
pip install --force-reinstall ctranslate2==3.13.0
pip install --force-reinstall ctranslate2==3.14.0
pip install --force-reinstall faster-whisper==0.6.0
CT2_USE_MKL="False" CT2_FORCE_CPU_ISA='GENERIC' nose2 -s e2e-tests

Expand Down
21 changes: 13 additions & 8 deletions src/whisper_ctranslate2/live.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,7 @@ def __init__(
self.speaking = False
self.blocks_speaking = 0
self.buffers_to_process = []
self.transcribe = None

@staticmethod
def is_available():
Expand Down Expand Up @@ -119,17 +120,21 @@ def process(self):
if self.verbose:
print("\n\033[90mTranscribing..\033[0m")

result = Transcribe().inference(
if not self.transcribe:
self.transcribe = Transcribe(
self.model_path,
self.device,
self.device_index,
self.compute_type,
self.threads,
self.cache_directory,
self.local_files_only,
)

result = self.transcribe.inference(
audio=_buffer.flatten(),
model_path=self.model_path,
cache_directory=self.cache_directory,
local_files_only=self.local_files_only,
task=self.task,
language=self.language,
threads=self.threads,
device=self.device,
device_index=self.device_index,
compute_type=self.compute_type,
verbose=self.verbose,
live=True,
options=self.options,
Expand Down
27 changes: 15 additions & 12 deletions src/whisper_ctranslate2/transcribe.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,23 +94,17 @@ def _get_vad_parameters_dictionary(self, options):

return vad_parameters

def inference(
def __init__(
self,
audio: Union[str, BinaryIO, np.ndarray],
model_path: str,
cache_directory: str,
local_files_only: bool,
task: str,
language: str,
threads: int,
device: str,
device_index: Union[int, List[int]],
compute_type: str,
verbose: bool,
live: bool,
options: TranscriptionOptions,
threads: int,
cache_directory: str,
local_files_only: bool,
):
model = WhisperModel(
self.model = WhisperModel(
model_path,
device=device,
device_index=device_index,
Expand All @@ -120,9 +114,18 @@ def inference(
local_files_only=local_files_only,
)

def inference(
self,
audio: Union[str, BinaryIO, np.ndarray],
task: str,
language: str,
verbose: bool,
live: bool,
options: TranscriptionOptions,
):
vad_parameters = self._get_vad_parameters_dictionary(options)

segments, info = model.transcribe(
segments, info = self.model.transcribe(
audio=audio,
language=language,
task=task,
Expand Down
19 changes: 11 additions & 8 deletions src/whisper_ctranslate2/whisper_ctranslate2.py
Original file line number Diff line number Diff line change
Expand Up @@ -513,18 +513,21 @@ def main():

return

transcribe = Transcribe(
model_dir,
device,
device_index,
compute_type,
threads,
cache_directory,
local_files_only,
)

for audio_path in audio:
result = Transcribe().inference(
result = transcribe.inference(
audio_path,
model_dir,
cache_directory,
local_files_only,
task,
language,
threads,
device,
device_index,
compute_type,
verbose,
False,
options,
Expand Down

0 comments on commit aa271aa

Please sign in to comment.