Skip to content

Commit

Permalink
FEAT: Support LMDeploy for internvl2 and fix finish reasion miss at i…
Browse files Browse the repository at this point in the history
…nternvl stream (#2145)

Co-authored-by: wuzhaoxin <[email protected]>
  • Loading branch information
amumu96 and wuzhaoxin authored Aug 23, 2024
1 parent 16d1193 commit b500224
Show file tree
Hide file tree
Showing 12 changed files with 629 additions and 23 deletions.
5 changes: 4 additions & 1 deletion xinference/core/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -177,6 +177,7 @@ def __init__(
request_limits: Optional[int] = None,
):
super().__init__()
from ..model.llm.lmdeploy.core import LMDeployModel
from ..model.llm.sglang.core import SGLANGModel
from ..model.llm.transformers.core import PytorchModel
from ..model.llm.vllm.core import VLLMModel
Expand All @@ -192,7 +193,9 @@ def __init__(
self._current_generator = lambda: None
self._lock = (
None
if isinstance(self._model, (PytorchModel, VLLMModel, SGLANGModel))
if isinstance(
self._model, (PytorchModel, VLLMModel, SGLANGModel, LMDeployModel)
)
else asyncio.locks.Lock()
)
self._worker_ref = None
Expand Down
2 changes: 2 additions & 0 deletions xinference/deploy/docker/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ RUN pip install --upgrade -i "$PIP_INDEX" pip && \
pip install "llama-cpp-python>=0.2.82" -i https://abetlen.github.io/llama-cpp-python/whl/cu124 && \
pip install -i "$PIP_INDEX" --upgrade-strategy only-if-needed -r /opt/inference/xinference/deploy/docker/requirements.txt && \
pip install -i "$PIP_INDEX" --no-deps sglang && \
pip uninstall flashinfer -y && \
pip install flashinfer -i https://flashinfer.ai/whl/cu124/torch2.4 && \
cd /opt/inference && \
python3 setup.py build_web && \
git restore . && \
Expand Down
4 changes: 4 additions & 0 deletions xinference/model/llm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@
BUILTIN_MODELSCOPE_LLM_FAMILIES,
LLAMA_CLASSES,
LLM_ENGINES,
LMDEPLOY_CLASSES,
MLX_CLASSES,
SGLANG_CLASSES,
SUPPORTED_ENGINES,
Expand Down Expand Up @@ -113,6 +114,7 @@ def generate_engine_config_by_model_family(model_family):

def _install():
from .llama_cpp.core import LlamaCppChatModel, LlamaCppModel
from .lmdeploy.core import LMDeployChatModel, LMDeployModel
from .mlx.core import MLXChatModel, MLXModel
from .sglang.core import SGLANGChatModel, SGLANGModel
from .transformers.chatglm import ChatglmPytorchChatModel
Expand Down Expand Up @@ -148,6 +150,7 @@ def _install():
SGLANG_CLASSES.extend([SGLANGModel, SGLANGChatModel])
VLLM_CLASSES.extend([VLLMModel, VLLMChatModel, VLLMVisionModel])
MLX_CLASSES.extend([MLXModel, MLXChatModel])
LMDEPLOY_CLASSES.extend([LMDeployModel, LMDeployChatModel])
TRANSFORMERS_CLASSES.extend(
[
ChatglmPytorchChatModel,
Expand Down Expand Up @@ -176,6 +179,7 @@ def _install():
SUPPORTED_ENGINES["Transformers"] = TRANSFORMERS_CLASSES
SUPPORTED_ENGINES["llama.cpp"] = LLAMA_CLASSES
SUPPORTED_ENGINES["MLX"] = MLX_CLASSES
SUPPORTED_ENGINES["LMDEPLOY"] = LMDEPLOY_CLASSES

json_path = os.path.join(
os.path.dirname(os.path.abspath(__file__)), "llm_family.json"
Expand Down
18 changes: 9 additions & 9 deletions xinference/model/llm/llm_family.json
Original file line number Diff line number Diff line change
Expand Up @@ -7189,15 +7189,6 @@
"model_id": "OpenGVLab/InternVL2-4B",
"model_revision": "b50544dafada6c41e80bfde2f57cc9b0140fc21c"
},
{
"model_format": "awq",
"model_size_in_billions": 4,
"quantizations": [
"Int4"
],
"model_id": "OpenGVLab/InternVL2-8B-AWQ",
"model_revision": "9f1a4756b7ae18eb26d8a22b618dfc283e8193b3"
},
{
"model_format": "pytorch",
"model_size_in_billions": 8,
Expand All @@ -7209,6 +7200,15 @@
"model_id": "OpenGVLab/InternVL2-8B",
"model_revision": "3bfd3664dea4f3da628785f5125d30f889701253"
},
{
"model_format": "awq",
"model_size_in_billions": 8,
"quantizations": [
"Int4"
],
"model_id": "OpenGVLab/InternVL2-8B-AWQ",
"model_revision": "9f1a4756b7ae18eb26d8a22b618dfc283e8193b3"
},
{
"model_format": "pytorch",
"model_size_in_billions": 26,
Expand Down
2 changes: 2 additions & 0 deletions xinference/model/llm/llm_family.py
Original file line number Diff line number Diff line change
Expand Up @@ -271,6 +271,8 @@ def parse_raw(

MLX_CLASSES: List[Type[LLM]] = []

LMDEPLOY_CLASSES: List[Type[LLM]] = []

LLM_ENGINES: Dict[str, Dict[str, List[Dict[str, Any]]]] = {}
SUPPORTED_ENGINES: Dict[str, List[Type[LLM]]] = {}

Expand Down
20 changes: 10 additions & 10 deletions xinference/model/llm/llm_family_modelscope.json
Original file line number Diff line number Diff line change
Expand Up @@ -4778,10 +4778,10 @@
"model_revision": "master"
},
{
"model_format": "pytorch",
"model_format": "awq",
"model_size_in_billions": 2,
"quantizations": [
"none"
"Int4"
],
"model_hub": "modelscope",
"model_id": "OpenGVLab/InternVL2-2B-AWQ",
Expand Down Expand Up @@ -4812,10 +4812,10 @@
"model_revision": "master"
},
{
"model_format": "pytorch",
"model_format": "awq",
"model_size_in_billions": 8,
"quantizations": [
"none"
"Int4"
],
"model_hub": "modelscope",
"model_id": "OpenGVLab/InternVL2-8B-AWQ",
Expand All @@ -4834,10 +4834,10 @@
"model_revision": "master"
},
{
"model_format": "pytorch",
"model_format": "awq",
"model_size_in_billions": 26,
"quantizations": [
"none"
"Int4"
],
"model_hub": "modelscope",
"model_id": "OpenGVLab/InternVL2-26B-AWQ",
Expand All @@ -4856,10 +4856,10 @@
"model_revision": "master"
},
{
"model_format": "pytorch",
"model_format": "awq",
"model_size_in_billions": 40,
"quantizations": [
"none"
"Int4"
],
"model_hub": "modelscope",
"model_id": "OpenGVLab/InternVL2-40B-AWQ",
Expand All @@ -4878,10 +4878,10 @@
"model_revision": "master"
},
{
"model_format": "pytorch",
"model_format": "awq",
"model_size_in_billions": 76,
"quantizations": [
"none"
"Int4"
],
"model_hub": "modelscope",
"model_id": "OpenGVLab/InternVL2-Llama3-76B-AWQ",
Expand Down
Empty file.
Loading

0 comments on commit b500224

Please sign in to comment.