Skip to content

Commit

Permalink
Reduce VAD memory usage (#1198)
Browse files Browse the repository at this point in the history
Co-authored-by: Mahmoud Ashraf <[email protected]>
  • Loading branch information
Purfview and MahmoudAshraf97 authored Dec 12, 2024
1 parent b568fae commit 1b24f28
Showing 1 changed file with 13 additions and 3 deletions.
16 changes: 13 additions & 3 deletions faster_whisper/vad.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,8 +260,9 @@ def __init__(self, encoder_path, decoder_path):
) from e

opts = onnxruntime.SessionOptions()
opts.inter_op_num_threads = 0
opts.intra_op_num_threads = 0
opts.inter_op_num_threads = 1
opts.intra_op_num_threads = 1
opts.enable_cpu_mem_arena = False
opts.log_severity_level = 4

self.encoder_session = onnxruntime.InferenceSession(
Expand Down Expand Up @@ -301,7 +302,16 @@ def __call__(

batched_audio = batched_audio.reshape(-1, num_samples + context_size_samples)

encoder_output = self.encoder_session.run(None, {"input": batched_audio})[0]
encoder_batch_size = 10000
num_segments = batched_audio.shape[0]
encoder_outputs = []
for i in range(0, num_segments, encoder_batch_size):
encoder_output = self.encoder_session.run(
None, {"input": batched_audio[i : i + encoder_batch_size]}
)[0]
encoder_outputs.append(encoder_output)

encoder_output = np.concatenate(encoder_outputs, axis=0)
encoder_output = encoder_output.reshape(batch_size, -1, 128)

decoder_outputs = []
Expand Down

0 comments on commit 1b24f28

Please sign in to comment.