diff --git a/setup.py b/setup.py
index 4ed0c22..ec57139 100644
--- a/setup.py
+++ b/setup.py
@@ -3,7 +3,7 @@
 setup(
   name = 'soundstorm-pytorch',
   packages = find_packages(exclude=[]),
-  version = '0.4.3',
+  version = '0.4.4',
   license='MIT',
   description = 'SoundStorm - Efficient Parallel Audio Generation from Google Deepmind, in Pytorch',
   author = 'Phil Wang',
diff --git a/soundstorm_pytorch/soundstorm.py b/soundstorm_pytorch/soundstorm.py
index 5baa645..3d6969d 100644
--- a/soundstorm_pytorch/soundstorm.py
+++ b/soundstorm_pytorch/soundstorm.py
@@ -530,6 +530,8 @@ def __init__(
             Rearrange('b n (h d) -> b (n h) d', h = num_effective_quantizers)
         )
 
+        self.num_effective_quantizers = num_effective_quantizers
+
         # each quantizer codebook would require its own logits weight and bias matrices
         # the amazing einops makes this easy with 'EinMix'
 
@@ -579,7 +581,7 @@ def forward(
         x = self.embedding_proj(x)
 
         if exists(sum_embeds):
-            x = x + sum_embeds
+            x = x + reduce(sum_embeds, 'b (n h) d -> b n d', h = self.num_effective_quantizers)
 
         if exists(cond):
             if cond.ndim == 2: