diff --git a/setup.py b/setup.py index 4ed0c22..ec57139 100644 --- a/setup.py +++ b/setup.py @@ -3,7 +3,7 @@ setup( name = 'soundstorm-pytorch', packages = find_packages(exclude=[]), - version = '0.4.3', + version = '0.4.4', license='MIT', description = 'SoundStorm - Efficient Parallel Audio Generation from Google Deepmind, in Pytorch', author = 'Phil Wang', diff --git a/soundstorm_pytorch/soundstorm.py b/soundstorm_pytorch/soundstorm.py index 5baa645..3d6969d 100644 --- a/soundstorm_pytorch/soundstorm.py +++ b/soundstorm_pytorch/soundstorm.py @@ -530,6 +530,8 @@ def __init__( Rearrange('b n (h d) -> b (n h) d', h = num_effective_quantizers) ) + self.num_effective_quantizers = num_effective_quantizers + # each quantizer codebook would require its own logits weight and bias matrices # the amazing einops makes this easy with 'EinMix' @@ -579,7 +581,7 @@ def forward( x = self.embedding_proj(x) if exists(sum_embeds): - x = x + sum_embeds + x = x + reduce(sum_embeds, 'b (n h) d -> b n d', h = self.num_effective_quantizers) if exists(cond): if cond.ndim == 2: