Skip to content

Commit

Permalink
FEAT: support glm-edge-chat model (#2582)
Browse files Browse the repository at this point in the history
  • Loading branch information
amumu96 authored Nov 29, 2024
1 parent f4b5b42 commit eb8ddd4
Show file tree
Hide file tree
Showing 6 changed files with 720 additions and 0 deletions.
2 changes: 2 additions & 0 deletions xinference/model/llm/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -143,6 +143,7 @@ def _install():
)
from .transformers.deepseek_vl import DeepSeekVLChatModel
from .transformers.glm4v import Glm4VModel
from .transformers.glm_edge_v import GlmEdgeVModel
from .transformers.intern_vl import InternVLChatModel
from .transformers.internlm2 import Internlm2PytorchChatModel
from .transformers.minicpmv25 import MiniCPMV25Model
Expand Down Expand Up @@ -193,6 +194,7 @@ def _install():
DeepSeekV2PytorchModel,
DeepSeekV2PytorchChatModel,
OptPytorchModel,
GlmEdgeVModel,
]
)
if OmniLMMModel: # type: ignore
Expand Down
227 changes: 227 additions & 0 deletions xinference/model/llm/llm_family.json
Original file line number Diff line number Diff line change
Expand Up @@ -8596,5 +8596,232 @@
"<|im_start|>",
"<|im_end|>"
]
},
{
"version": 1,
"context_length": 8192,
"model_name": "glm-edge-chat",
"model_lang": [
"en",
"zh"
],
"model_ability": [
"chat"
],
"model_description": "The GLM-Edge series is our attempt to face the end-side real-life scenarios, which consists of two sizes of large-language dialogue models and multimodal comprehension models (GLM-Edge-1.5B-Chat, GLM-Edge-4B-Chat, GLM-Edge-V-2B, GLM-Edge-V-5B). Among them, the 1.5B / 2B model is mainly for platforms such as mobile phones and cars, and the 4B / 5B model is mainly for platforms such as PCs.",
"model_specs": [
{
"model_format": "pytorch",
"model_size_in_billions": "1_5",
"quantizations": [
"4-bit",
"8-bit",
"none"
],
"model_id": "THUDM/glm-edge-1.5b-chat"
},
{
"model_format": "pytorch",
"model_size_in_billions": "4",
"quantizations": [
"4-bit",
"8-bit",
"none"
],
"model_id": "THUDM/glm-edge-4b-chat"
},
{
"model_format": "ggufv2",
"model_size_in_billions": "1_5",
"quantizations": [
"Q4_0",
"Q4_1",
"Q4_K",
"Q4_K_M",
"Q4_K_S",
"Q5_0",
"Q5_1",
"Q5_K",
"Q5_K_M",
"Q5_K_S",
"Q6_K",
"Q8_0"
],
"model_file_name_template": "ggml-model-{quantization}.gguf",
"model_id": "THUDM/glm-edge-1.5b-chat-gguf"
},
{
"model_format": "ggufv2",
"model_size_in_billions": "1_5",
"quantizations": [
"F16"
],
"model_file_name_template": "glm-edge-1.5B-chat-{quantization}.gguf",
"model_id": "THUDM/glm-edge-1.5b-chat-gguf"
},
{
"model_format": "ggufv2",
"model_size_in_billions": "4",
"quantizations": [
"Q4_0",
"Q4_1",
"Q4_K",
"Q4_K_M",
"Q4_K_S",
"Q5_0",
"Q5_1",
"Q5_K",
"Q5_K_M",
"Q5_K_S",
"Q6_K",
"Q8_0"
],
"model_file_name_template": "ggml-model-{quantization}.gguf",
"model_id": "THUDM/glm-edge-4b-chat-gguf"
},
{
"model_format": "ggufv2",
"model_size_in_billions": "4",
"quantizations": [
"F16"
],
"model_file_name_template": "glm-edge-4B-chat-{quantization}.gguf",
"model_id": "THUDM/glm-edge-4b-chat-gguf"
}
],
"chat_template": "{% for item in messages %}{% if item['role'] == 'system' %}<|system|>\n{{ item['content'] }}{% elif item['role'] == 'user' %}<|user|>\n{{ item['content'] }}{% elif item['role'] == 'assistant' %}<|assistant|>\n{{ item['content'] }}{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>\n{% endif %}",
"stop_token_ids": [
59246,
59253,
59255
],
"stop": [
"<|endoftext|>",
"<|user|>",
"<|observation|>"
]
},
{
"version": 1,
"context_length": 8192,
"model_name": "glm-edge-v",
"model_lang": [
"en",
"zh"
],
"model_ability": [
"chat",
"vision"
],
"model_description": "The GLM-Edge series is our attempt to face the end-side real-life scenarios, which consists of two sizes of large-language dialogue models and multimodal comprehension models (GLM-Edge-1.5B-Chat, GLM-Edge-4B-Chat, GLM-Edge-V-2B, GLM-Edge-V-5B). Among them, the 1.5B / 2B model is mainly for platforms such as mobile phones and cars, and the 4B / 5B model is mainly for platforms such as PCs.",
"model_specs": [
{
"model_format": "pytorch",
"model_size_in_billions": "2",
"quantizations": [
"4-bit",
"8-bit",
"none"
],
"model_id": "THUDM/glm-edge-v-2b"
},
{
"model_format": "pytorch",
"model_size_in_billions": "5",
"quantizations": [
"4-bit",
"8-bit",
"none"
],
"model_id": "THUDM/glm-edge-v-5b"
},
{
"model_format": "ggufv2",
"model_size_in_billions": "2",
"quantizations": [
"Q4_0",
"Q4_1",
"Q4_K",
"Q4_K_M",
"Q4_K_S",
"Q5_0",
"Q5_1",
"Q5_K",
"Q5_K_M",
"Q5_K_S",
"Q6_K",
"Q8_0"
],
"model_file_name_template": "ggml-model-{quantization}.gguf",
"model_id": "THUDM/glm-edge-v-2b-gguf"
},
{
"model_format": "ggufv2",
"model_size_in_billions": "2",
"quantizations": [
"F16"
],
"model_file_name_template": "glm-edge-v-2B-{quantization}.gguf",
"model_id": "THUDM/glm-edge-v-2b-gguf"
},
{
"model_format": "ggufv2",
"model_size_in_billions": "2",
"quantizations": [
"f16"
],
"model_file_name_template": "mmproj-model-{quantization}.gguf",
"model_id": "THUDM/glm-edge-v-2b-gguf"
},
{
"model_format": "ggufv2",
"model_size_in_billions": "5",
"quantizations": [
"Q4_0",
"Q4_1",
"Q4_K",
"Q4_K_M",
"Q4_K_S",
"Q5_0",
"Q5_1",
"Q5_K",
"Q5_K_M",
"Q5_K_S",
"Q6_K",
"Q8_0"
],
"model_file_name_template": "ggml-model-{quantization}.gguf",
"model_id": "THUDM/glm-edge-v-5b-gguf"
},
{
"model_format": "ggufv2",
"model_size_in_billions": "5",
"quantizations": [
"F16"
],
"model_file_name_template": "glm-edge-v-5B-{quantization}.gguf",
"model_id": "THUDM/glm-edge-v-5b-gguf"
},
{
"model_format": "ggufv2",
"model_size_in_billions": "5",
"quantizations": [
"f16"
],
"model_file_name_template": "mmproj-model-{quantization}.gguf",
"model_id": "THUDM/glm-edge-v-5b-gguf"
}
],
"chat_template": "{% for item in messages %}{% if item['role'] != 'system' %}<|{{ item['role'] }}|>\n{% for content in item['content'] %}{% if content['type'] == 'image' %}{% for _ in range(578) %}<|begin_of_image|>{% endfor %}{% elif content['type'] == 'text' %}{{ content['text'] }}{% endif %}{% endfor %}\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|assistant|>\n{% endif %}",
"stop_token_ids": [
59246,
59253,
59255
],
"stop": [
"<|endoftext|>",
"<|user|>",
"<|observation|>"
]
}
]
Loading

0 comments on commit eb8ddd4

Please sign in to comment.