able to still return raw mel

lucidrains · Sep 10, 2024 · d5cf973 · d5cf973
1 parent aecf03d
commit d5cf973
Show file tree

Hide file tree

Showing 2 changed files with 9 additions and 3 deletions.
diff --git a/e2_tts_pytorch/e2_tts.py b/e2_tts_pytorch/e2_tts.py
@@ -875,10 +875,11 @@ def sample(
         steps = 32,
         cfg_strength = 1.,   # they used a classifier free guidance strength of 1.
         max_duration = 4096, # in case the duration predictor goes haywire
-        vocoder: Callable[[Float['b d n']], list[Float['_']]] | None = None
+        vocoder: Callable[[Float['b d n']], list[Float['_']]] | None = None,
+        return_raw_output: bool | None = None
     ) -> (
         Float['b n d'],
-        list[Float['nw']]
+        list[Float['_']]
     ):
         self.eval()
 
@@ -956,6 +957,11 @@ def fn(t, x):
 
         out = torch.where(cond_mask, cond, out)
 
+        # able to return raw untransformed output, if not using mel rep
+
+        if exists(return_raw_output) and return_raw_output:
+            return out
+
         # take care of transforming mel to audio if `vocoder` is passed in, or if `use_vocos` is turned on
 
         if exists(vocoder):

diff --git a/pyproject.toml b/pyproject.toml
@@ -1,6 +1,6 @@
 [project]
 name = "e2-tts-pytorch"
-version = "1.0.1"
+version = "1.0.2"
 description = "E2-TTS in Pytorch"
 authors = [
     { name = "Phil Wang", email = "[email protected]" }