From 9a18a21c50d1b94c6bfedee49d39ba5117dc6174 Mon Sep 17 00:00:00 2001
From: Michael Goin <michael@neuralmagic.com>
Date: Wed, 27 Dec 2023 10:22:44 -0500
Subject: [PATCH 1/4] Update test_clip.py

---
 tests/deepsparse/pipelines/test_clip.py | 27 +++++++++++++++++++------
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/tests/deepsparse/pipelines/test_clip.py b/tests/deepsparse/pipelines/test_clip.py
index cb8bfeb97b..211d1c0434 100644
--- a/tests/deepsparse/pipelines/test_clip.py
+++ b/tests/deepsparse/pipelines/test_clip.py
@@ -13,6 +13,7 @@
 # limitations under the License.
 
 import pytest
+import numpy as np
 from deepsparse.clip import (
     CLIPCaptionInput,
     CLIPCaptionPipeline,
@@ -29,22 +30,39 @@
 from tests.deepsparse.pipelines.data_helpers import computer_vision
 from tests.utils import mock_engine
 
+def custom_process_inputs(self, inputs):
+    if not isinstance(inputs.text, list):
+        inputs.text = [inputs.text]
+    if not isinstance(inputs.text[0], str):
+        return inputs.text
+    tokens = [np.array(t).astype(np.int32) for t in self.tokenizer(inputs.text)]
+    tokens = np.stack(tokens, axis=0)
+    tokens_lengths = np.array(tokens.shape[0] * [tokens.shape[1] - 1])
+    return [tokens, tokens_lengths]
+
+# This overrides the process_inputs function globally for all CLIPTextPipeline classes
+# This is needed for CLIP-ViT-B-32-256x256-DataComp-s34B-b86K
+CLIPTextPipeline.process_inputs = custom_process_inputs
+
+@pytest.fixture
+def model_folder():
+    from huggingface_hub import snapshot_download
+    return snapshot_download(repo_id="neuralmagic/CLIP-ViT-B-32-256x256-DataComp-s34B-b86K-quant-ds")
 
 @pytest.fixture
 def visual_input():
+    model_path = model_folder + "/visual.onnx"
     images = computer_vision(batch_size=2)
-    model_path = None
     return CLIPVisualInput(images=images.get("images")), model_path
 
 
 @pytest.fixture
 def text_input():
-    model_path = None
+    model_path = model_folder + "/textual.onnx"
     text = ["a building", "a dog", "a cat"]
     return CLIPTextInput(text=text), model_path
 
 
-@pytest.mark.skip(reason="No CLIP models currently available to run tests")
 @mock_engine(rng_seed=0)
 def test_visual_clip(engine, visual_input):
     from deepsparse import Pipeline
@@ -57,7 +75,6 @@ def test_visual_clip(engine, visual_input):
     assert len(output.image_embeddings) == 1
 
 
-@pytest.mark.skip(reason="No CLIP models curently available to run tests")
 @mock_engine(rng_seed=0)
 def test_text_clip(engine, text_input):
     from deepsparse import Pipeline
@@ -70,7 +87,6 @@ def test_text_clip(engine, text_input):
     assert len(output.text_embeddings) == 1
 
 
-@pytest.mark.skip(reason="No CLIP models currently available to run tests")
 @mock_engine(rng_seed=0)
 def test_zero_shot(engine, visual_input, text_input):
     from deepsparse.legacy import BasePipeline
@@ -90,7 +106,6 @@ def test_zero_shot(engine, visual_input, text_input):
     assert isinstance(output, CLIPZeroShotOutput)
 
 
-@pytest.mark.skip(reason="No CLIP models currently available to run tests")
 @mock_engine(rng_seed=0)
 def test_caption(engine, visual_input, text_input):
     from deepsparse.legacy import BasePipeline

From ad5c775b39fd0508aefbdf65f560484f7dabc463 Mon Sep 17 00:00:00 2001
From: mgoin <michael@neuralmagic.com>
Date: Wed, 27 Dec 2023 17:06:17 +0000
Subject: [PATCH 2/4] Fix test

---
 tests/deepsparse/pipelines/test_clip.py | 30 ++++++++++++++++---------
 tests/utils/engine_mocking.py           | 11 +++++++--
 2 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/tests/deepsparse/pipelines/test_clip.py b/tests/deepsparse/pipelines/test_clip.py
index 211d1c0434..89aee2fdc5 100644
--- a/tests/deepsparse/pipelines/test_clip.py
+++ b/tests/deepsparse/pipelines/test_clip.py
@@ -12,8 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-import pytest
 import numpy as np
+
+import pytest
 from deepsparse.clip import (
     CLIPCaptionInput,
     CLIPCaptionPipeline,
@@ -30,6 +31,7 @@
 from tests.deepsparse.pipelines.data_helpers import computer_vision
 from tests.utils import mock_engine
 
+
 def custom_process_inputs(self, inputs):
     if not isinstance(inputs.text, list):
         inputs.text = [inputs.text]
@@ -40,24 +42,29 @@ def custom_process_inputs(self, inputs):
     tokens_lengths = np.array(tokens.shape[0] * [tokens.shape[1] - 1])
     return [tokens, tokens_lengths]
 
+
 # This overrides the process_inputs function globally for all CLIPTextPipeline classes
 # This is needed for CLIP-ViT-B-32-256x256-DataComp-s34B-b86K
 CLIPTextPipeline.process_inputs = custom_process_inputs
 
+
 @pytest.fixture
 def model_folder():
     from huggingface_hub import snapshot_download
-    return snapshot_download(repo_id="neuralmagic/CLIP-ViT-B-32-256x256-DataComp-s34B-b86K-quant-ds")
+
+    model_id = "neuralmagic/CLIP-ViT-B-32-256x256-DataComp-s34B-b86K-quant-ds"
+    return snapshot_download(repo_id=model_id)
+
 
 @pytest.fixture
-def visual_input():
+def visual_input(model_folder):
     model_path = model_folder + "/visual.onnx"
     images = computer_vision(batch_size=2)
     return CLIPVisualInput(images=images.get("images")), model_path
 
 
 @pytest.fixture
-def text_input():
+def text_input(model_folder):
     model_path = model_folder + "/textual.onnx"
     text = ["a building", "a dog", "a cat"]
     return CLIPTextInput(text=text), model_path
@@ -65,7 +72,7 @@ def text_input():
 
 @mock_engine(rng_seed=0)
 def test_visual_clip(engine, visual_input):
-    from deepsparse import Pipeline
+    from deepsparse.legacy import Pipeline
 
     model_path = visual_input[-1]
     pipeline = Pipeline.create(task="clip_visual", model_path=model_path)
@@ -77,7 +84,7 @@ def test_visual_clip(engine, visual_input):
 
 @mock_engine(rng_seed=0)
 def test_text_clip(engine, text_input):
-    from deepsparse import Pipeline
+    from deepsparse.legacy import Pipeline
 
     model_path = text_input[-1]
     pipeline = Pipeline.create(task="clip_text", model_path=model_path)
@@ -89,7 +96,7 @@ def test_text_clip(engine, text_input):
 
 @mock_engine(rng_seed=0)
 def test_zero_shot(engine, visual_input, text_input):
-    from deepsparse.legacy import BasePipeline
+    from deepsparse.legacy import Pipeline
 
     model_path_text = text_input[-1]
     model_path_visual = visual_input[-1]
@@ -97,7 +104,7 @@ def test_zero_shot(engine, visual_input, text_input):
         "visual_model_path": model_path_visual,
         "text_model_path": model_path_text,
     }
-    pipeline = BasePipeline.create(task="clip_zeroshot", **kwargs)
+    pipeline = Pipeline.create(task="clip_zeroshot", **kwargs)
     assert isinstance(pipeline, CLIPZeroShotPipeline)
     pipeline_input = CLIPZeroShotInput(
         image=CLIPVisualInput(images=visual_input[0].images[-1]), text=text_input[0]
@@ -106,11 +113,12 @@ def test_zero_shot(engine, visual_input, text_input):
     assert isinstance(output, CLIPZeroShotOutput)
 
 
+@pytest.mark.skip(reason="No CLIP decoder models currently available to run tests")
 @mock_engine(rng_seed=0)
 def test_caption(engine, visual_input, text_input):
-    from deepsparse.legacy import BasePipeline
+    from deepsparse.legacy import Pipeline
 
-    model_path_visual = text_input[-1]
+    model_path_visual = visual_input[-1]
     model_path_text = text_input[-1]
     model_path_decoder = None
     pipeline_input = CLIPCaptionInput(
@@ -121,6 +129,6 @@ def test_caption(engine, visual_input, text_input):
         "text_model_path": model_path_text,
         "decoder_model_path": model_path_decoder,
     }
-    pipeline = BasePipeline.create(task="clip_caption", **kwargs)
+    pipeline = Pipeline.create(task="clip_caption", **kwargs)
     assert isinstance(pipeline, CLIPCaptionPipeline)
     assert isinstance(pipeline_input, CLIPCaptionInput)
diff --git a/tests/utils/engine_mocking.py b/tests/utils/engine_mocking.py
index cef0b60164..cfdcbd76ae 100644
--- a/tests/utils/engine_mocking.py
+++ b/tests/utils/engine_mocking.py
@@ -135,10 +135,17 @@ def execute_list_out(self, inputs: List[numpy.ndarray]) -> List[numpy.ndarray]:
 
 def _to_descriptor(node: ort.NodeArg) -> "_NumpyDescriptor":
     to_numpy_dtype = {
-        "tensor(float)": numpy.float32,
         "tensor(double)": numpy.float64,
-        "tensor(uint8)": numpy.uint8,
+        "tensor(float)": numpy.float32,
+        "tensor(float16)": numpy.float16,
         "tensor(int64)": numpy.int64,
+        "tensor(int32)": numpy.int32,
+        "tensor(int16)": numpy.int16,
+        "tensor(int8)": numpy.int8,
+        "tensor(uint64)": numpy.uint64,
+        "tensor(uint32)": numpy.uint32,
+        "tensor(uint16)": numpy.uint16,
+        "tensor(uint8)": numpy.uint8,
     }
     return _NumpyDescriptor(shape=node.shape, dtype=to_numpy_dtype[node.type])
 

From 7ffb2ae265e378ee8701a20737148bf44b80e1b2 Mon Sep 17 00:00:00 2001
From: Michael Goin <michael@neuralmagic.com>
Date: Wed, 27 Dec 2023 12:07:26 -0500
Subject: [PATCH 3/4] Update test_clip.py

---
 tests/deepsparse/pipelines/test_clip.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/tests/deepsparse/pipelines/test_clip.py b/tests/deepsparse/pipelines/test_clip.py
index 89aee2fdc5..0fdb11b980 100644
--- a/tests/deepsparse/pipelines/test_clip.py
+++ b/tests/deepsparse/pipelines/test_clip.py
@@ -43,8 +43,9 @@ def custom_process_inputs(self, inputs):
     return [tokens, tokens_lengths]
 
 
-# This overrides the process_inputs function globally for all CLIPTextPipeline classes
-# This is needed for CLIP-ViT-B-32-256x256-DataComp-s34B-b86K
+# This overrides the process_inputs function globally for all CLIPTextPipeline classes.
+# This is needed for CLIP-ViT-B-32-256x256-DataComp-s34B-b86K since it has a second input
+# that specifies how many tokens are present.
 CLIPTextPipeline.process_inputs = custom_process_inputs
 
 

From 87c8ba595dbd0522ae922c7f810c9aaf1f142581 Mon Sep 17 00:00:00 2001
From: mgoin <michael@neuralmagic.com>
Date: Wed, 27 Dec 2023 17:56:27 +0000
Subject: [PATCH 4/4] Format

---
 tests/deepsparse/pipelines/test_clip.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/deepsparse/pipelines/test_clip.py b/tests/deepsparse/pipelines/test_clip.py
index 0fdb11b980..2858be7dca 100644
--- a/tests/deepsparse/pipelines/test_clip.py
+++ b/tests/deepsparse/pipelines/test_clip.py
@@ -44,7 +44,7 @@ def custom_process_inputs(self, inputs):
 
 
 # This overrides the process_inputs function globally for all CLIPTextPipeline classes.
-# This is needed for CLIP-ViT-B-32-256x256-DataComp-s34B-b86K since it has a second input
+# This is needed for CLIP-ViT-B-32-256x256-DataComp-s34B-b86K as it has a second input
 # that specifies how many tokens are present.
 CLIPTextPipeline.process_inputs = custom_process_inputs