fix key_padding_mask bug in nnf_multi_head_attention_forward (#1208)

#1205
mlverse · Nov 11, 2024 · 6d277a8 · 6d277a8
1 parent e7897ae
commit 6d277a8
Show file tree

Hide file tree

Showing 3 changed files with 10 additions and 4 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -11,6 +11,7 @@ Authors@R: c(
     person("Krzysztof", "Joachimiak", role = c("ctb")),
     person("Hamada S.", "Badr", role = c("ctb")),
     person("Sebastian", "Fischer", role = c("ctb")),
+    person("Maximilian", "Pichler", role = c("ctb")),
     person(family = "RStudio", role = c("cph"))
     )
 Description: Provides functionality to define and train neural networks similar to

diff --git a/NEWS.md b/NEWS.md
@@ -3,6 +3,7 @@
 ## Bug fixes
 
 - `torch_iinfo()` now support all integer dtypes (#1190 @cregouby)
+- Fixed float key_padding_mask in `nnf_multi_head_attention_forward()` (#1205)
 
 # torch 0.13.0
 

diff --git a/R/nnf-activation.R b/R/nnf-activation.R
@@ -728,10 +728,14 @@ nnf_multi_head_attention_forward <- function(query, # type: Tensor
 
   if (!is.null(key_padding_mask)) {
     attn_output_weights <- attn_output_weights$view(c(bsz, num_heads, tgt_len, src_len))
-    attn_output_weights <- attn_output_weights$masked_fill(
-      key_padding_mask$unsqueeze(2)$unsqueeze(3),
-      -Inf
-    )
+    if (key_padding_mask$dtype == torch_bool()) {
+      attn_output_weights <- attn_output_weights$masked_fill(
+        key_padding_mask$unsqueeze(2)$unsqueeze(3),
+        -Inf
+      )
+    } else {
+      attn_output_weights <- attn_output_weights + key_padding_mask$unsqueeze(2)$unsqueeze(3)
+    }
     attn_output_weights <- attn_output_weights$view(c(
       bsz * num_heads,
       tgt_len,