diff --git a/examples/notebooks/openclip-mojo-onnx-client.ipynb b/examples/notebooks/openclip-mojo-onnx-client.ipynb deleted file mode 100644 index a38d7877..00000000 --- a/examples/notebooks/openclip-mojo-onnx-client.ipynb +++ /dev/null @@ -1,195 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "*Copyright 2024 Modular, Inc: Licensed under the Apache License v2.0 with LLVM Exceptions.*" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# MAX Serve and PyTorch model client example" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from python import Python\n", - "from tensor import Tensor, TensorShape, TensorSpec\n", - "from max.engine import EngineNumpyView\n", - "\n", - "@always_inline\n", - "fn numpy_data_pointer[\n", - " type: DType\n", - "](numpy_array: PythonObject) raises -> DTypePointer[type]:\n", - " var data_ptr = numpy_array.__array_interface__[\"data\"][0].__index__()\n", - " return DTypePointer[type](address=data_ptr)\n", - "\n", - "@always_inline\n", - "fn memcpy_to_numpy[\n", - " type: DType\n", - "](array: PythonObject, tensor: Tensor[type]) raises:\n", - " var dst = numpy_data_pointer[type](array)\n", - " var src = tensor._ptr\n", - " var length = tensor.num_elements()\n", - " memcpy(dst, src, length)\n", - "\n", - "\n", - "@always_inline\n", - "fn shape_to_python_list(shape: TensorShape) raises -> PythonObject:\n", - " var python_list = Python.evaluate(\"list()\")\n", - " for i in range(shape.rank()):\n", - " _ = python_list.append(shape[i])\n", - " return python_list^\n", - "\n", - "@always_inline\n", - "fn get_np_dtype[type: DType](np: PythonObject) raises -> PythonObject:\n", - " @parameter\n", - " if type.is_float32():\n", - " return np.float32\n", - " elif type.is_int32():\n", - " return np.int32\n", - " elif type.is_int64():\n", - " return np.int64\n", - " elif type.is_uint8():\n", - " return np.uint8\n", - "\n", - " raise \"Unknown datatype\"\n", - "\n", - "@always_inline\n", - "fn tensor_to_numpy[\n", - " type: DType\n", - "](tensor: Tensor[type], np: PythonObject) raises -> PythonObject:\n", - " var shape = shape_to_python_list(tensor.shape())\n", - " var tensor_as_numpy = np.zeros(shape, get_np_dtype[type](np))\n", - " _ = shape^\n", - " memcpy_to_numpy(tensor_as_numpy, tensor)\n", - " return tensor_as_numpy^\n", - "\n", - "@always_inline\n", - "fn numpy_to_tensor[\n", - " dtype: DType\n", - "](inout np_array: PythonObject) raises -> Tensor[dtype]:\n", - " var view = EngineNumpyView(np_array)\n", - " var size = view.spec().num_elements()\n", - " var ptr = DTypePointer[dtype].alloc(size)\n", - " memcpy(ptr, view.unsafe_ptr().bitcast[dtype](), size)\n", - " return Tensor[dtype](view.spec(), ptr)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Prepare client/inputs" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "var open_clip = Python.import_module(\"open_clip\")\n", - "var PIL = Python.import_module(\"PIL\")\n", - "var requests = Python.import_module(\"requests\")\n", - "var torch = Python.import_module(\"torch\")\n", - "\n", - "var tup = open_clip.create_model_and_transforms(\n", - " \"ViT-B-32\", pretrained=\"laion2b_s34b_b79k\"\n", - ")\n", - "var tokenizer = open_clip.get_tokenizer(\"ViT-B-32\")\n", - "\n", - "var url = 'http://images.cocodataset.org/val2017/000000039769.jpg'\n", - "var labels = [\"cats\", \"dogs\", \"fish\"]\n", - "var raw_image = PIL.Image.open(requests.get(url, stream=True).raw)\n", - "var image = tup[2](raw_image).unsqueeze(0).detach().numpy()\n", - "var text = tokenizer(labels).detach().numpy()" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from max.engine import InferenceSession\n", - "from max.engine.tensor import EngineNumpyView\n", - "from max.serve.kserve.client import GRPCInferenceClient\n", - "\n", - "var session = InferenceSession()\n", - "var inputs = session.new_tensor_map()\n", - "var image_tensor = numpy_to_tensor[DType.float32](image)\n", - "var text_tensor = numpy_to_tensor[DType.int64](text)\n", - "inputs.borrow(\"image\", image_tensor)\n", - "inputs.borrow(\"text\", text_tensor)\n", - "print(str(inputs))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Run an inference" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "var req_outputs = List[String](\"image_features\", \"text_features\")\n", - "var client = GRPCInferenceClient(\"0.0.0.0:8000\", session)\n", - "var response = client.infer(\"openclip\", \"0\", inputs, req_outputs)\n", - "var outputs = response.get_output_tensors()\n", - "\n", - "var np = Python.import_module(\"numpy\")\n", - "var img_feats = tensor_to_numpy(outputs.get[DType.float32](\"image_features\"), np)\n", - "var txt_feats = tensor_to_numpy(outputs.get[DType.int64](\"text_features\"), np)\n", - "fn softmax(np: PythonObject, x: PythonObject) raises -> PythonObject:\n", - " var z = x - np.max(x)\n", - " var num = np.exp(z)\n", - " return np.exp(z) / np.sum(num)\n", - "\n", - "txt_feats /= np.linalg.norm(txt_feats)\n", - "img_feats /= np.linalg.norm(img_feats)\n", - "var similarity = softmax(np, 100.0 * np.matmul(img_feats, txt_feats.T))\n", - "print(\"Label probs:\\n\", similarity)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Mojo", - "language": "mojo", - "name": "mojo-jupyter-kernel" - }, - "language_info": { - "codemirror_mode": { - "name": "mojo" - }, - "file_extension": ".mojo", - "mimetype": "text/x-mojo", - "name": "mojo" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/examples/notebooks/openclip-mojo-onnx-server.ipynb b/examples/notebooks/openclip-mojo-onnx-server.ipynb deleted file mode 100644 index 3319fc6f..00000000 --- a/examples/notebooks/openclip-mojo-onnx-server.ipynb +++ /dev/null @@ -1,69 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "*Copyright 2024 Modular, Inc: Licensed under the Apache License v2.0 with LLVM Exceptions.*" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# MAX Serve and PyTorch model server example" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Start server" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from tensor import TensorSpec\n", - "\n", - "from max.engine import InferenceSession, InputSpec\n", - "from max.serve.service import FileModel\n", - "from max.serve.kserve.server import GRPCInferenceServer, MuxInferenceService\n", - "\n", - "var model_name = \"openclip\"\n", - "var model_path = \"clip.onnx\"\n", - "\n", - "# Load models during service creation:\n", - "var models = List(FileModel(model_name, \"0\", model_path, None))\n", - "var session = InferenceSession()\n", - "var service = MuxInferenceService(models, session)\n", - "\n", - "# Create service and start listening:\n", - "var server = GRPCInferenceServer.create(\"0.0.0.0:8000\", session)\n", - "service.init(server)\n", - "print(\"Listening on port 8000!\")\n", - "server.serve(service)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Mojo", - "language": "mojo", - "name": "mojo-jupyter-kernel" - }, - "language_info": { - "codemirror_mode": { - "name": "mojo" - }, - "file_extension": ".mojo", - "mimetype": "text/x-mojo", - "name": "mojo" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/examples/notebooks/roberta-mojo-pytorch-client.ipynb b/examples/notebooks/roberta-mojo-pytorch-client.ipynb deleted file mode 100644 index 034ba04c..00000000 --- a/examples/notebooks/roberta-mojo-pytorch-client.ipynb +++ /dev/null @@ -1,205 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "*Copyright 2024 Modular, Inc: Licensed under the Apache License v2.0 with LLVM Exceptions.*" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# MAX Serve and PyTorch model client example" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from python import Python\n", - "from tensor import Tensor, TensorShape, TensorSpec\n", - "from max.engine import EngineNumpyView\n", - "\n", - "@always_inline\n", - "fn numpy_data_pointer[\n", - " type: DType\n", - "](numpy_array: PythonObject) raises -> DTypePointer[type]:\n", - " var data_ptr = numpy_array.__array_interface__[\"data\"][0].__index__()\n", - " return DTypePointer[type](address=data_ptr)\n", - "\n", - "@always_inline\n", - "fn memcpy_to_numpy[\n", - " type: DType\n", - "](array: PythonObject, tensor: Tensor[type]) raises:\n", - " var dst = numpy_data_pointer[type](array)\n", - " var src = tensor._ptr\n", - " var length = tensor.num_elements()\n", - " memcpy(dst, src, length)\n", - "\n", - "\n", - "@always_inline\n", - "fn shape_to_python_list(shape: TensorShape) raises -> PythonObject:\n", - " var python_list = Python.evaluate(\"list()\")\n", - " for i in range(shape.rank()):\n", - " _ = python_list.append(shape[i])\n", - " return python_list^\n", - "\n", - "@always_inline\n", - "fn get_np_dtype[type: DType](np: PythonObject) raises -> PythonObject:\n", - " @parameter\n", - " if type.is_float32():\n", - " return np.float32\n", - " elif type.is_int32():\n", - " return np.int32\n", - " elif type.is_int64():\n", - " return np.int64\n", - " elif type.is_uint8():\n", - " return np.uint8\n", - "\n", - " raise \"Unknown datatype\"\n", - "\n", - "@always_inline\n", - "fn tensor_to_numpy[\n", - " type: DType\n", - "](tensor: Tensor[type], np: PythonObject) raises -> PythonObject:\n", - " var shape = shape_to_python_list(tensor.shape())\n", - " var tensor_as_numpy = np.zeros(shape, get_np_dtype[type](np))\n", - " _ = shape^\n", - " memcpy_to_numpy(tensor_as_numpy, tensor)\n", - " return tensor_as_numpy^\n", - "\n", - "@always_inline\n", - "fn numpy_to_tensor[\n", - " dtype: DType\n", - "](inout np_array: PythonObject) raises -> Tensor[dtype]:\n", - " var view = EngineNumpyView(np_array)\n", - " var size = view.spec().num_elements()\n", - " var ptr = DTypePointer[dtype].alloc(size)\n", - " memcpy(ptr, view.unsafe_ptr().bitcast[dtype](), size)\n", - " return Tensor[dtype](view.spec(), ptr)" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Prepare client/inputs" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "var transformers = Python.import_module(\"transformers\")\n", - "\n", - "var model_name = \"roberta\"\n", - "var model_path = \"roberta.torchscript\"\n", - "var batch = 1\n", - "var seqlen = 128\n", - "\n", - "var HF_MODEL_NAME = \"cardiffnlp/twitter-roberta-base-emotion-multilabel-latest\"\n", - "var hf_model = transformers.AutoModelForSequenceClassification.from_pretrained(HF_MODEL_NAME)\n", - "hf_model.config.return_dict = False\n", - "\n", - "# Tokenize input into input ids and mask:\n", - "var INPUT = \"There are many exciting developments in the field of AI Infrastructure!\"\n", - "var tokenizer = transformers.AutoTokenizer.from_pretrained(HF_MODEL_NAME)\n", - "var raw_inputs = tokenizer(INPUT,\n", - " return_tensors=\"pt\", padding='max_length', truncation=True, max_length=seqlen)\n", - "print(raw_inputs)" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from max.engine import InferenceSession\n", - "from max.engine.tensor import EngineNumpyView\n", - "from max.serve.kserve.client import GRPCInferenceClient\n", - "\n", - "var session = InferenceSession()\n", - "var inputs = session.new_tensor_map()\n", - "var a = raw_inputs[\"input_ids\"].detach().numpy()\n", - "var b = raw_inputs[\"attention_mask\"].detach().numpy()\n", - "var input_ids = numpy_to_tensor[DType.int64](a)\n", - "var attention_mask = numpy_to_tensor[DType.int64](b)\n", - "inputs.borrow(\"input_ids\", input_ids)\n", - "inputs.borrow(\"attention_mask\", attention_mask)\n", - "for key in inputs.keys():\n", - " print(key[] + \" : \" + str(inputs.get[DType.int64](key[])))\n" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Run an inference" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "var req_outputs = List[String](\"result0\")\n", - "var client = GRPCInferenceClient(\"0.0.0.0:8000\", session)\n", - "var response = client.infer(\"roberta\", \"0\", inputs, req_outputs)\n", - "var outputs = response.get_output_tensors()\n", - "for key in outputs.keys():\n", - " print(key[] + \" : \" + str(outputs.get[DType.float32](key[])))\n", - "\n", - "var np = Python.import_module(\"numpy\")\n", - "var arr = tensor_to_numpy(outputs.get[DType.float32](\"result0\"), np)\n", - "\n", - "# Extract class prediction from output\n", - "var predicted_class_id = arr.argmax(axis=-1)[0]\n", - "var classification = hf_model.config.id2label[predicted_class_id]\n", - "\n", - "print(\"The sentiment is: \" + str(classification))" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "* TODO: Add batch example" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Mojo", - "language": "mojo", - "name": "mojo-jupyter-kernel" - }, - "language_info": { - "codemirror_mode": { - "name": "mojo" - }, - "file_extension": ".mojo", - "mimetype": "text/x-mojo", - "name": "mojo" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -} diff --git a/examples/notebooks/roberta-mojo-pytorch-server.ipynb b/examples/notebooks/roberta-mojo-pytorch-server.ipynb deleted file mode 100644 index 9fb4cd3a..00000000 --- a/examples/notebooks/roberta-mojo-pytorch-server.ipynb +++ /dev/null @@ -1,72 +0,0 @@ -{ - "cells": [ - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "*Copyright 2024 Modular, Inc: Licensed under the Apache License v2.0 with LLVM Exceptions.*" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "# MAX Serve and PyTorch model server example" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "## Start server" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [ - "from tensor import TensorSpec\n", - "\n", - "from max.engine import InferenceSession, InputSpec\n", - "from max.serve.service import FileModel\n", - "from max.serve.server import InferenceServer, MuxInferenceService\n", - "\n", - "var model_name = \"roberta\"\n", - "var model_path = \"roberta.torchscript\"\n", - "\n", - "# Load models during service creation:\n", - "var models = List(FileModel(model_name, \"0\", model_path, List(\n", - " InputSpec(TensorSpec(DType.int64, 1, 128)),\n", - " InputSpec(TensorSpec(DType.int64, 1, 128)),\n", - ")))\n", - "var session = InferenceSession()\n", - "var service = MuxInferenceService(models, session)\n", - "\n", - "# Create service and start listening:\n", - "var server = InferenceServer.create(\"0.0.0.0:8000\", session)\n", - "service.init(server)\n", - "print(\"Listening on port 8000!\")\n", - "server.serve(service)" - ] - } - ], - "metadata": { - "kernelspec": { - "display_name": "Mojo", - "language": "mojo", - "name": "mojo-jupyter-kernel" - }, - "language_info": { - "codemirror_mode": { - "name": "mojo" - }, - "file_extension": ".mojo", - "mimetype": "text/x-mojo", - "name": "mojo" - } - }, - "nbformat": 4, - "nbformat_minor": 2 -}