add mistral and zephyr

deep-diver · Nov 19, 2023 · cc87b7b · cc87b7b
1 parent 7cf7563
commit cc87b7b
Show file tree

Hide file tree

Showing 9 changed files with 392 additions and 42 deletions.
diff --git a/app.py b/app.py
@@ -641,44 +641,12 @@ def gradio_main(args):
                         gr.Markdown("## Recent Releases")
                         with gr.Row(elem_classes=["sub-container"]):
                             with gr.Column(min_width=20):
-                                codellama_7b_rr = gr.Button("codellama-7b", elem_id="codellama-7b", elem_classes=["square"])
-                                gr.Markdown("Code LLaMA (7B)", elem_classes=["center"])
-
-                            with gr.Column(min_width=20):
-                                codellama_13b_rr = gr.Button("codellama-13b", elem_id="codellama-13b", elem_classes=["square"])
-                                gr.Markdown("Code LLaMA (13B)", elem_classes=["center"])
-
-                            with gr.Column(min_width=20):
-                                codellama_34b_rr = gr.Button("codellama-34b", elem_id="codellama-34b", elem_classes=["square"])
-                                gr.Markdown("Code LLaMA (34B)", elem_classes=["center"])
-
-                            with gr.Column(min_width=20):
-                                upstage_llama2_70b_2_rr = gr.Button("upstage-llama2-70b-2", elem_id="upstage-llama2-70b-2", elem_classes=["square"])
-                                gr.Markdown("Upstage2 v2 (70B)", elem_classes=["center"])
-
-                            with gr.Column(min_width=20):
-                                platypus2_70b_rr = gr.Button("platypus2-70b", elem_id="platypus2-70b", elem_classes=["square"])
-                                gr.Markdown("Platypus2 (70B)", elem_classes=["center"])
-
-                            with gr.Column(min_width=20):
-                                wizardlm_70b_rr = gr.Button("wizardlm-70b", elem_id="wizardlm-70b", elem_classes=["square"])
-                                gr.Markdown("WizardLM (70B)", elem_classes=["center"])
+                                mistral_7b_rr = gr.Button("mistral-7b", elem_id="mistral-7b", elem_classes=["square"])
+                                gr.Markdown("Mistral (7B)", elem_classes=["center"])
 
                             with gr.Column(min_width=20):
-                                orcamini_70b_rr = gr.Button("orcamini-70b", elem_id="orcamini-70b", elem_classes=["square"])
-                                gr.Markdown("Orca Mini (70B)", elem_classes=["center"])
-
-                            with gr.Column(min_width=20):
-                                samantha_70b_rr = gr.Button("samantha-70b", elem_id="samantha-70b", elem_classes=["square"])
-                                gr.Markdown("Samantha (70B)", elem_classes=["center"])
-
-                            with gr.Column(min_width=20):
-                                godzilla_70b_rr = gr.Button("godzilla-70b", elem_id="godzilla-70b", elem_classes=["square"])
-                                gr.Markdown("GadziLLa (70B)", elem_classes=["center"])
-
-                            with gr.Column(min_width=20):
-                                nous_hermes_70b_rr = gr.Button("nous-hermes-70b", elem_id="nous-hermes-70b", elem_classes=["square"])
-                                gr.Markdown("Nous Hermes 2 (70B)", elem_classes=["center"])
+                                zephyr_7b_rr = gr.Button("zephyr-7b", elem_id="zephyr-7b", elem_classes=["square"])
+                                gr.Markdown("Zephyr (7B)", elem_classes=["center"])
 
                     with gr.Column(visible=False) as full_section:                            
                         gr.Markdown("## ~ 10B Parameters")
@@ -791,6 +759,14 @@ def gradio_main(args):
                                 codellama_7b = gr.Button("codellama-7b", elem_id="codellama-7b", elem_classes=["square"])
                                 gr.Markdown("Code LLaMA", elem_classes=["center"])
 
+                            with gr.Column(min_width=20):
+                                mistral_7b = gr.Button("mistral-7b", elem_id="mistral-7b", elem_classes=["square"])
+                                gr.Markdown("Mistral", elem_classes=["center"])
+
+                            with gr.Column(min_width=20):
+                                zephyr_7b = gr.Button("zephyr-7b", elem_id="zephyr-7b", elem_classes=["square"])
+                                gr.Markdown("Zephyr", elem_classes=["center"])
+
                         gr.Markdown("## ~ 20B Parameters")
                         with gr.Row(elem_classes=["sub-container"]):
                             with gr.Column(min_width=20, visible=False):
@@ -1285,7 +1261,7 @@ def gradio_main(args):
                 gpt4_alpaca_7b, os_stablelm7b, mpt_7b, redpajama_7b, redpajama_instruct_7b, llama_deus_7b, 
                 evolinstruct_vicuna_7b, alpacoom_7b, baize_7b, guanaco_7b, vicuna_7b_1_3,
                 falcon_7b, wizard_falcon_7b, airoboros_7b, samantha_7b, openllama_7b, orcamini_7b,
-                xgen_7b, llama2_7b, nous_hermes_7b_v2, codellama_7b,
+                xgen_7b, llama2_7b, nous_hermes_7b_v2, codellama_7b, mistral_7b, zephyr_7b,
 
                 flan11b, koalpaca, kullm, alpaca_lora13b, gpt4_alpaca_13b, stable_vicuna_13b,
                 starchat_15b, starchat_beta_15b, vicuna_7b, vicuna_13b, evolinstruct_vicuna_13b, 
@@ -1299,8 +1275,7 @@ def gradio_main(args):
                 stable_beluga2_70b, upstage_llama2_70b, upstage_llama2_70b_2, platypus2_70b, wizardlm_70b, orcamini_70b,
                 samantha_70b, godzilla_70b, nous_hermes_70b,
 
-                codellama_7b_rr, codellama_13b_rr, codellama_34b_rr, upstage_llama2_70b_2_rr, platypus2_70b_rr, 
-                wizardlm_70b_rr, orcamini_70b_rr, samantha_70b_rr, godzilla_70b_rr, nous_hermes_70b_rr
+                mistral_7b_rr, zephyr_7b_rr
             ]
             for btn in btns:
                 btn.click(

diff --git a/chats/central.py b/chats/central.py
@@ -17,6 +17,7 @@
 from chats import xgen
 from chats import llama2
 from chats import freewilly
+from chats import mistral
 from chats import custom
 
 import copy
@@ -125,6 +126,15 @@ def sync_chat_stream(
             internet_option, serper_api_key
         )
 
+    elif model_type == "mistral" or model_type == "zephyr":
+        cs = mistral.chat_stream(
+            idx, local_data, user_message, state,
+            global_context, ctx_num_lconv, ctx_sum_prompt,
+            res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+            sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid, 
+            internet_option, serper_api_key
+        )                
+
     elif model_type == "puffin":
         cs = alpaca.chat_stream(
             idx, local_data, user_message, state,

diff --git a/chats/mistral.py b/chats/mistral.py
@@ -0,0 +1,51 @@
+import copy
+import json
+import global_vars
+from chats import pre, post
+from pingpong import PingPong
+from gens.batch_gen import get_output_batch
+
+from chats.utils import build_prompts, text_stream, internet_search
+
+def chat_stream(
+    idx, local_data, user_message, state,
+    global_context, ctx_num_lconv, ctx_sum_prompt,
+    res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
+    sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
+    internet_option, serper_api_key
+):
+    res = [
+      state["ppmanager_type"].from_json(json.dumps(ppm))
+      for ppm in local_data
+    ]
+
+    ppm = res[idx]
+
+    # add_ping returns a prompt structured in Alpaca form
+    ppm.add_pingpong(
+        PingPong(user_message, "")
+    )
+    prompt = build_prompts(ppm, global_context, ctx_num_lconv)
+
+    #######
+    if internet_option:
+        search_prompt = None
+        for tmp_prompt, uis in internet_search(ppm, serper_api_key, global_context, ctx_num_lconv):
+            search_prompt = tmp_prompt
+            yield "", uis, prompt, str(res)
+
+    # prepare text generating streamer & start generating
+    gen_kwargs, streamer = pre.build(
+        search_prompt if internet_option else prompt,
+        res_temp, res_topp, res_topk, res_rpen, res_mnts, 
+        res_beams, res_cache, res_sample, res_eosid, res_padid,
+        return_token_type_ids=False
+    )
+    pre.start_gen(gen_kwargs)
+
+    # handling stream
+    for ppmanager, uis in text_stream(ppm, streamer):
+        yield "", uis, prompt, str(res)
+
+    ppm = post.strip_pong(ppm)
+    yield "", ppm.build_uis(), prompt, str(res)
diff --git a/configs/response_configs/mistral.yaml b/configs/response_configs/mistral.yaml
@@ -0,0 +1,11 @@
+generation_config:
+    temperature: 0.95
+    top_p: 0.9
+    top_k: 50
+    num_beams: 1
+    use_cache: True
+    repetition_penalty: 1.2
+    max_new_tokens: 1024
+    do_sample: True
+    bos_token_id: 1
+    eos_token_id: 2
diff --git a/global_vars.py b/global_vars.py
@@ -6,6 +6,7 @@
 from models import camel, t5_vicuna, vicuna, starchat, redpajama, bloom
 from models import baize, guanaco, falcon, kullm, replit, airoboros
 from models import samantha_vicuna, wizard_coder, xgen, freewilly
+from models import mistral
 from models import byom
 
 cuda_availability = False
@@ -69,7 +70,11 @@ def initialize_globals(args):
 
     model_type_tmp = "alpaca"
     print(args.base_url.lower())
-    if "meta-llama/llama-2-70b-hf" in args.base_url.lower():
+    if "mistralai/mistral" in args.base_url.lower():
+        model_type_tmp = "mistral"
+    elif "huggingfaceh4/zephyr" in args.base_url.lower():
+        model_type_tmp = "zephyr"
+    elif "meta-llama/llama-2-70b-hf" in args.base_url.lower():
         model_type_tmp = "llama2-70b"
     elif "codellama/codellama-34b-instruct-hf" in args.base_url.lower():
         model_type_tmp = "codellama2-70b"
@@ -309,6 +314,10 @@ def get_load_model(model_type):
         return samantha_vicuna.load_model
     elif model_type == "xgen":
         return xgen.load_model
+    elif model_type == "mistral":
+        return mistral.load_model
+    elif model_type == "zephyr":
+        return mistral.load_model
     else:
         return None
 

diff --git a/miscs/styles.py b/miscs/styles.py
@@ -809,6 +809,26 @@
   margin: auto;
 }
 
+#mistral-7b {
+  background: url(https://i.ibb.co/kDRcjjh/mistral-logo.png);
+  background-repeat: no-repeat;
+  background-size: 100px 95px;
+  color: transparent;  
+  width: 100px;
+  height: 100px;
+  margin: auto;
+}
+
+#zephyr-7b {
+  background: url(https://i.ibb.co/FDNS82f/zephyr-thumb-mid.png);
+  background-repeat: no-repeat;
+  background-size: 100px 95px;
+  color: transparent;  
+  width: 100px;
+  height: 100px;
+  margin: auto;
+}
+
 #replit-3b {
   background: url(https://i.ibb.co/BrKCKYq/replit.png);
   background-repeat: no-repeat;

diff --git a/model_cards.json b/model_cards.json
@@ -2919,5 +2919,59 @@
     "ollb_hellaswag": -1,
     "ollb_mmlu": -1,
     "ollb_truthfulqa": -1
-  }
+  },
+  "mistral-7b": {
+    "category": "<10B",
+    "display_name": "Mistral",
+    "thumb": "https://i.ibb.co/kDRcjjh/mistral-logo.png",
+    "thumb-mid": "https://i.ibb.co/kDRcjjh/mistral-logo.png",
+    "thumb-tiny": "https://i.ibb.co/kxSNwgd/mistral-logo-tiny.png",
+    "parameters": "7",
+    "vram(full)": "13858",
+    "vram(8bit)": "8254",
+    "vram(4bit)": "5140",
+    "vram(gptq)": "N/A",
+    "hub(base)": "mistralai/Mistral-7B-Instruct-v0.1",
+    "hub(ckpt)": "N/A",
+    "hub(gptq)": "N/A",
+    "hub(gptq_base)": "N/A",
+    "default_gen_config": "configs/response_configs/mistral.yaml",
+    "desc": "The Mistral-7B-Instruct-v0.1 Large Language Model (LLM) is a instruct fine-tuned version of the [Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) generative text model using a variety of publicly available conversation datasets. For full details of this model, read the [paper](https://arxiv.org/abs/2310.06825) and the [release blog post](https://mistral.ai/news/announcing-mistral-7b).",
+    "example1": [],
+    "example2": [],
+    "example3": [],
+    "example4": [],
+    "ollb_average": 53.27,
+    "ollb_arc": 54.52,
+    "ollb_hellaswag": 75.63,
+    "ollb_mmlu": 55.38,
+    "ollb_truthfulqa": 56.28
+  },
+  "zephyr-7b": {
+    "category": "<10B",
+    "display_name": "Zephyr",
+    "thumb": "https://i.ibb.co/Msk4L8M/zephyr-thumb.png",
+    "thumb-mid": "https://i.ibb.co/FDNS82f/zephyr-thumb-mid.png",
+    "thumb-tiny": "https://i.ibb.co/6Yjq96Z/zephyr-thumb-tiny.png",
+    "parameters": "7",
+    "vram(full)": "13858",
+    "vram(8bit)": "8254",
+    "vram(4bit)": "5140",
+    "vram(gptq)": "N/A",
+    "hub(base)": "HuggingFaceH4/zephyr-7b-alpha",
+    "hub(ckpt)": "N/A",
+    "hub(gptq)": "N/A",
+    "hub(gptq_base)": "N/A",
+    "default_gen_config": "configs/response_configs/mistral.yaml",
+    "desc": "Zephyr is a series of language models that are trained to act as helpful assistants. Zephyr-7B-α is the first model in the series, and is a fine-tuned version of [mistralai/Mistral-7B-v0.1](https://huggingface.co/mistralai/Mistral-7B-v0.1) that was trained on on a mix of publicly available, synthetic datasets using [Direct Preference Optimization (DPO)](https://arxiv.org/abs/2305.18290). We found that removing the in-built alignment of these datasets boosted performance on [MT Bench](https://huggingface.co/spaces/lmsys/mt-bench) and made the model more helpful. However, this means that model is likely to generate problematic text when prompted to do so and should only be used for educational and research purposes.",
+    "example1": [],
+    "example2": [],
+    "example3": [],
+    "example4": [],
+    "ollb_average": -1,
+    "ollb_arc": -1,
+    "ollb_hellaswag": -1,
+    "ollb_mmlu": -1,
+    "ollb_truthfulqa": -1
+  }  
 }