expose speculative decoding from spear tts

lucidrains · Sep 30, 2023 · 74872f2 · 74872f2
1 parent 22d257d
commit 74872f2
Show file tree

Hide file tree

Showing 2 changed files with 7 additions and 3 deletions.
diff --git a/setup.py b/setup.py
@@ -3,7 +3,7 @@
 setup(
   name = 'soundstorm-pytorch',
   packages = find_packages(exclude=[]),
-  version = '0.1.4',
+  version = '0.2.0',
   license='MIT',
   description = 'SoundStorm - Efficient Parallel Audio Generation from Google Deepmind, in Pytorch',
   author = 'Phil Wang',
@@ -23,10 +23,10 @@
     'beartype',
     'classifier-free-guidance-pytorch>=0.1.5',
     'einops>=0.6.1',
-    'spear-tts-pytorch>=0.0.15',
+    'spear-tts-pytorch>=0.4.0',
     'torch>=1.6',
   ],
-  classifiers=[
+  classifiers = [
     'Development Status :: 4 - Beta',
     'Intended Audience :: Developers',
     'Topic :: Scientific/Engineering :: Artificial Intelligence',

diff --git a/soundstorm_pytorch/soundstorm.py b/soundstorm_pytorch/soundstorm.py
@@ -762,6 +762,8 @@ def generate(
         noise_level_scale = 1.,
         num_full_sampling_levels = 1,
         text_to_semantic_generate_kwargs: dict = {},
+        spec_decode = False,
+        spec_decode_gamma = 5,
         **kwargs
     ):
         if self.should_condition and not exists(cond_semantic_token_ids):
@@ -776,6 +778,8 @@ def generate(
                 texts,
                 source_type = 'text',
                 target_type = 'speech',
+                spec_decode = spec_decode,
+                spec_decode_gamma = spec_decode_gamma,
                 **text_to_semantic_generate_kwargs
             )