test fixes

keras-team · Feb 14, 2024 · f274d1e · f274d1e
1 parent a2a412a
commit f274d1e
Show file tree

Hide file tree

Showing 6 changed files with 16 additions and 12 deletions.
diff --git a/keras_nlp/layers/modeling/transformer_layer_utils.py b/keras_nlp/layers/modeling/transformer_layer_utils.py
@@ -55,9 +55,11 @@ def compute_causal_mask(batch_size, input_length, output_length, cache_index=0):
         `(batch_size, output_length, input_length)` that can be passed to a
         attention layer.
     """
-    i = ops.expand_dims(ops.arange(output_length), axis=1) + cache_index
-    j = ops.arange(input_length)
-    mask = ops.expand_dims(ops.cast(i >= j, dtype="int32"), axis=0)
+    i = ops.arange(output_length, dtype="float32")
+    i = i + ops.cast(cache_index, "float32")
+    i = ops.expand_dims(i, axis=1)
+    j = ops.arange(input_length, dtype="float32")
+    mask = ops.expand_dims(i >= j, axis=0)
     return ops.broadcast_to(mask, (batch_size, output_length, input_length))
 
 

diff --git a/keras_nlp/models/bart/bart_seq_2_seq_lm.py b/keras_nlp/models/bart/bart_seq_2_seq_lm.py
@@ -257,7 +257,7 @@ def call_with_cache(
     ):
         tokens = self.backbone.token_embedding(token_ids)
         positions = self.backbone.decoder_position_embedding(
-            tokens, start_index=index,
+            tokens, start_index=index
         )
         # Sum, normalize and apply dropout to embeddings.
         x = self.backbone.decoder_embeddings_add((tokens, positions))

diff --git a/keras_nlp/models/bart/bart_seq_2_seq_lm_test.py b/keras_nlp/models/bart/bart_seq_2_seq_lm_test.py
@@ -119,9 +119,7 @@ def wrapper(*args, **kwargs):
                 cache,
             )
 
-        with patch.object(
-            seq_2_seq_lm, "call_with_cache", wraps=wrapper
-        ):
+        with patch.object(seq_2_seq_lm, "call_with_cache", wraps=wrapper):
             inputs = {
                 "encoder_text": [
                     " airplane at airport",

diff --git a/keras_nlp/models/gpt_neo_x/gpt_neo_x_causal_lm_preprocessor.py b/keras_nlp/models/gpt_neo_x/gpt_neo_x_causal_lm_preprocessor.py
@@ -141,6 +141,10 @@ def generate_postprocess(
             token_ids = ops.convert_to_numpy(token_ids)
         if not isinstance(padding_mask, tf.Tensor):
             padding_mask = ops.convert_to_numpy(padding_mask)
+        # Make sure the numpy array has type `int32` since
+        # `SentencePieceProcessor.detokenize` only accepts `int32` arrays.
+        token_ids = tf.cast(token_ids, "int32")
+        padding_mask = tf.cast(padding_mask, "bool")
         # Strip any special tokens during detokenization (e.g. the start and
         # end markers). In the future we could make this configurable.
         padding_mask = padding_mask & (token_ids != self.tokenizer.end_token_id)

diff --git a/keras_nlp/models/mistral/mistral_causal_lm_preprocessor.py b/keras_nlp/models/mistral/mistral_causal_lm_preprocessor.py
@@ -155,12 +155,12 @@ def generate_postprocess(
         # Convert the inputs to numpy arrays if they aren't a tensor already.
         if not isinstance(token_ids, tf.Tensor):
             token_ids = ops.convert_to_numpy(token_ids)
-            # Make sure the numpy array has type `int32` since
-            # `SentencePieceProcessor.detokenize` only accepts `int32` arrays.
-            token_ids = token_ids.astype("int32")
         if not isinstance(padding_mask, tf.Tensor):
             padding_mask = ops.convert_to_numpy(padding_mask)
-            padding_mask = padding_mask.astype("bool")
+        # Make sure the numpy array has type `int32` since
+        # `SentencePieceProcessor.detokenize` only accepts `int32` arrays.
+        token_ids = tf.cast(token_ids, "int32")
+        padding_mask = tf.cast(padding_mask, "bool")
         # Strip any special tokens during detokenization (e.g. the start and
         # end markers). In the future we could make this configurable.
         padding_mask = padding_mask & (token_ids != self.tokenizer.end_token_id)

diff --git a/keras_nlp/samplers/serialization.py b/keras_nlp/samplers/serialization.py
@@ -14,11 +14,11 @@
 
 from keras_nlp.api_export import keras_nlp_export
 from keras_nlp.backend import keras
-from keras_nlp.samplers.sampler import Sampler
 from keras_nlp.samplers.beam_sampler import BeamSampler
 from keras_nlp.samplers.contrastive_sampler import ContrastiveSampler
 from keras_nlp.samplers.greedy_sampler import GreedySampler
 from keras_nlp.samplers.random_sampler import RandomSampler
+from keras_nlp.samplers.sampler import Sampler
 from keras_nlp.samplers.top_k_sampler import TopKSampler
 from keras_nlp.samplers.top_p_sampler import TopPSampler