FEAT: vllm support for qwen2.5-vl-instruct (#2821)

xorbitsai · Feb 8, 2025 · 0129b84 · 0129b84
1 parent 37c8b89
commit 0129b84
Show file tree

Hide file tree

Showing 4 changed files with 8 additions and 5 deletions.
diff --git a/doc/source/getting_started/installation.rst b/doc/source/getting_started/installation.rst
@@ -47,7 +47,7 @@ Currently, supported models include:
 - ``deepseek``, ``deepseek-coder``, ``deepseek-chat``, ``deepseek-coder-instruct``, ``deepseek-r1-distill-qwen``, ``deepseek-v2-chat``, ``deepseek-v2-chat-0628``, ``deepseek-v2.5``, ``deepseek-r1-distill-llama``
 - ``yi-coder``, ``yi-coder-chat``
 - ``codeqwen1.5``, ``codeqwen1.5-chat``
-- ``qwen2.5``, ``qwen2.5-coder``, ``qwen2.5-instruct``, ``qwen2.5-coder-instruct``
+- ``qwen2.5``, ``qwen2.5-coder``, ``qwen2.5-instruct``, ``qwen2.5-coder-instruct``, ``qwen2.5-vl-instruct``
 - ``baichuan-2-chat``
 - ``internlm2-chat``
 - ``internlm2.5-chat``, ``internlm2.5-chat-1m``

diff --git a/doc/source/models/builtin/llm/qwen2.5-vl-instruct.rst b/doc/source/models/builtin/llm/qwen2.5-vl-instruct.rst
@@ -20,7 +20,7 @@ Model Spec 1 (pytorch, 3 Billion)
 - **Model Format:** pytorch
 - **Model Size (in billions):** 3
 - **Quantizations:** none
-- **Engines**: Transformers
+- **Engines**: vLLM, Transformers
 - **Model ID:** Qwen/Qwen2.5-VL-3B-Instruct
 - **Model Hubs**:  `Hugging Face <https://huggingface.co/Qwen/Qwen2.5-VL-3B-Instruct>`__, `ModelScope <https://modelscope.cn/models/qwen/Qwen2.5-VL-3B-Instruct>`__
 
@@ -36,7 +36,7 @@ Model Spec 2 (pytorch, 7 Billion)
 - **Model Format:** pytorch
 - **Model Size (in billions):** 7
 - **Quantizations:** none
-- **Engines**: Transformers
+- **Engines**: vLLM, Transformers
 - **Model ID:** Qwen/Qwen2.5-VL-7B-Instruct
 - **Model Hubs**:  `Hugging Face <https://huggingface.co/Qwen/Qwen2.5-VL-7B-Instruct>`__, `ModelScope <https://modelscope.cn/models/qwen/Qwen2.5-VL-7B-Instruct>`__
 
@@ -52,7 +52,7 @@ Model Spec 3 (pytorch, 72 Billion)
 - **Model Format:** pytorch
 - **Model Size (in billions):** 72
 - **Quantizations:** none
-- **Engines**: Transformers
+- **Engines**: vLLM, Transformers
 - **Model ID:** Qwen/Qwen2.5-VL-72B-Instruct
 - **Model Hubs**:  `Hugging Face <https://huggingface.co/Qwen/Qwen2.5-VL-72B-Instruct>`__, `ModelScope <https://modelscope.cn/models/qwen/Qwen2.5-VL-72B-Instruct>`__
 

diff --git a/doc/source/user_guide/backends.rst b/doc/source/user_guide/backends.rst
@@ -54,7 +54,7 @@ Currently, supported model includes:
 - ``deepseek``, ``deepseek-coder``, ``deepseek-chat``, ``deepseek-coder-instruct``, ``deepseek-r1-distill-qwen``, ``deepseek-v2-chat``, ``deepseek-v2-chat-0628``, ``deepseek-v2.5``, ``deepseek-r1-distill-llama``
 - ``yi-coder``, ``yi-coder-chat``
 - ``codeqwen1.5``, ``codeqwen1.5-chat``
-- ``qwen2.5``, ``qwen2.5-coder``, ``qwen2.5-instruct``, ``qwen2.5-coder-instruct``
+- ``qwen2.5``, ``qwen2.5-coder``, ``qwen2.5-instruct``, ``qwen2.5-coder-instruct``, ``qwen2.5-vl-instruct``
 - ``baichuan-2-chat``
 - ``internlm2-chat``
 - ``internlm2.5-chat``, ``internlm2.5-chat-1m``

diff --git a/xinference/model/llm/vllm/core.py b/xinference/model/llm/vllm/core.py
@@ -203,6 +203,9 @@ class VLLMGenerateConfig(TypedDict, total=False):
 if VLLM_INSTALLED and vllm.__version__ >= "0.7.0":
     VLLM_SUPPORTED_CHAT_MODELS.append("internlm3-instruct")
 
+if VLLM_INSTALLED and vllm.__version__ >= "0.7.2":
+    VLLM_SUPPORTED_VISION_MODEL_LIST.append("qwen2.5-vl-instruct")
+
 
 class VLLMModel(LLM):
     def __init__(