Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add CLIP model to enable test_clip.py #1500

Open
wants to merge 5 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 39 additions & 15 deletions tests/deepsparse/pipelines/test_clip.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import numpy as np

import pytest
from deepsparse.clip import (
CLIPCaptionInput,
Expand All @@ -30,24 +32,48 @@
from tests.utils import mock_engine


def custom_process_inputs(self, inputs):
if not isinstance(inputs.text, list):
inputs.text = [inputs.text]
if not isinstance(inputs.text[0], str):
return inputs.text
tokens = [np.array(t).astype(np.int32) for t in self.tokenizer(inputs.text)]
tokens = np.stack(tokens, axis=0)
tokens_lengths = np.array(tokens.shape[0] * [tokens.shape[1] - 1])
return [tokens, tokens_lengths]


# This overrides the process_inputs function globally for all CLIPTextPipeline classes.
# This is needed for CLIP-ViT-B-32-256x256-DataComp-s34B-b86K as it has a second input
# that specifies how many tokens are present.
CLIPTextPipeline.process_inputs = custom_process_inputs


@pytest.fixture
def visual_input():
def model_folder():
from huggingface_hub import snapshot_download

model_id = "neuralmagic/CLIP-ViT-B-32-256x256-DataComp-s34B-b86K-quant-ds"
Copy link
Contributor

@dsikka dsikka Jan 2, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Could we add a quick comment/note indicating that this model is not from OpenCLIP and only used for zero-shot classification?

return snapshot_download(repo_id=model_id)


@pytest.fixture
def visual_input(model_folder):
model_path = model_folder + "/visual.onnx"
images = computer_vision(batch_size=2)
model_path = None
return CLIPVisualInput(images=images.get("images")), model_path


@pytest.fixture
def text_input():
model_path = None
def text_input(model_folder):
model_path = model_folder + "/textual.onnx"
text = ["a building", "a dog", "a cat"]
return CLIPTextInput(text=text), model_path


@pytest.mark.skip(reason="No CLIP models currently available to run tests")
@mock_engine(rng_seed=0)
def test_visual_clip(engine, visual_input):
from deepsparse import Pipeline
from deepsparse.legacy import Pipeline
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

shouldnt be needed?


model_path = visual_input[-1]
pipeline = Pipeline.create(task="clip_visual", model_path=model_path)
Expand All @@ -57,10 +83,9 @@ def test_visual_clip(engine, visual_input):
assert len(output.image_embeddings) == 1


@pytest.mark.skip(reason="No CLIP models curently available to run tests")
@mock_engine(rng_seed=0)
def test_text_clip(engine, text_input):
from deepsparse import Pipeline
from deepsparse.legacy import Pipeline
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

same comment as above.


model_path = text_input[-1]
pipeline = Pipeline.create(task="clip_text", model_path=model_path)
Expand All @@ -70,18 +95,17 @@ def test_text_clip(engine, text_input):
assert len(output.text_embeddings) == 1


@pytest.mark.skip(reason="No CLIP models currently available to run tests")
@mock_engine(rng_seed=0)
def test_zero_shot(engine, visual_input, text_input):
from deepsparse.legacy import BasePipeline
from deepsparse.legacy import Pipeline

model_path_text = text_input[-1]
model_path_visual = visual_input[-1]
kwargs = {
"visual_model_path": model_path_visual,
"text_model_path": model_path_text,
}
pipeline = BasePipeline.create(task="clip_zeroshot", **kwargs)
pipeline = Pipeline.create(task="clip_zeroshot", **kwargs)
assert isinstance(pipeline, CLIPZeroShotPipeline)
pipeline_input = CLIPZeroShotInput(
image=CLIPVisualInput(images=visual_input[0].images[-1]), text=text_input[0]
Expand All @@ -90,12 +114,12 @@ def test_zero_shot(engine, visual_input, text_input):
assert isinstance(output, CLIPZeroShotOutput)


@pytest.mark.skip(reason="No CLIP models currently available to run tests")
@pytest.mark.skip(reason="No CLIP decoder models currently available to run tests")
@mock_engine(rng_seed=0)
def test_caption(engine, visual_input, text_input):
from deepsparse.legacy import BasePipeline
from deepsparse.legacy import Pipeline

model_path_visual = text_input[-1]
model_path_visual = visual_input[-1]
model_path_text = text_input[-1]
model_path_decoder = None
pipeline_input = CLIPCaptionInput(
Expand All @@ -106,6 +130,6 @@ def test_caption(engine, visual_input, text_input):
"text_model_path": model_path_text,
"decoder_model_path": model_path_decoder,
}
pipeline = BasePipeline.create(task="clip_caption", **kwargs)
pipeline = Pipeline.create(task="clip_caption", **kwargs)
assert isinstance(pipeline, CLIPCaptionPipeline)
assert isinstance(pipeline_input, CLIPCaptionInput)
11 changes: 9 additions & 2 deletions tests/utils/engine_mocking.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,10 +135,17 @@ def execute_list_out(self, inputs: List[numpy.ndarray]) -> List[numpy.ndarray]:

def _to_descriptor(node: ort.NodeArg) -> "_NumpyDescriptor":
to_numpy_dtype = {
"tensor(float)": numpy.float32,
"tensor(double)": numpy.float64,
"tensor(uint8)": numpy.uint8,
"tensor(float)": numpy.float32,
"tensor(float16)": numpy.float16,
"tensor(int64)": numpy.int64,
"tensor(int32)": numpy.int32,
"tensor(int16)": numpy.int16,
"tensor(int8)": numpy.int8,
"tensor(uint64)": numpy.uint64,
"tensor(uint32)": numpy.uint32,
"tensor(uint16)": numpy.uint16,
"tensor(uint8)": numpy.uint8,
}
return _NumpyDescriptor(shape=node.shape, dtype=to_numpy_dtype[node.type])

Expand Down
Loading