add models by teknium

deep-diver · Nov 20, 2023 · 99c2c03 · 99c2c03
1 parent cc87b7b
commit 99c2c03
Show file tree

Hide file tree

Showing 7 changed files with 303 additions and 5 deletions.
diff --git a/app.py b/app.py
@@ -647,6 +647,18 @@ def gradio_main(args):
                             with gr.Column(min_width=20):
                                 zephyr_7b_rr = gr.Button("zephyr-7b", elem_id="zephyr-7b", elem_classes=["square"])
                                 gr.Markdown("Zephyr (7B)", elem_classes=["center"])
+
+                            with gr.Column(min_width=20):
+                                mistral_trismegistus_7b_rr = gr.Button("mistral-trismegistus-7b", elem_id="mistral-trismegistus-7b", elem_classes=["square"])
+                                gr.Markdown("Mistral Trismegistus (7B)", elem_classes=["center"])
+
+                            with gr.Column(min_width=20):
+                                hermes_trismegistus_7b_rr = gr.Button("hermes-trismegistus-7b", elem_id="hermes-trismegistus-7b", elem_classes=["square"])
+                                gr.Markdown("Hermes Trismegistus (7B)", elem_classes=["center"])
+
+                            with gr.Column(min_width=20):
+                                mistral_openhermes_2_5_7b_rr = gr.Button("mistral-openherems-2-5-7b", elem_id="mistral-openherems-2-5-7b", elem_classes=["square"])
+                                gr.Markdown("Mistral OpenHermes 2.5 (7B)", elem_classes=["center"])
 
                     with gr.Column(visible=False) as full_section:                            
                         gr.Markdown("## ~ 10B Parameters")
@@ -767,6 +779,18 @@ def gradio_main(args):
                                 zephyr_7b = gr.Button("zephyr-7b", elem_id="zephyr-7b", elem_classes=["square"])
                                 gr.Markdown("Zephyr", elem_classes=["center"])
 
+                            with gr.Column(min_width=20):
+                                mistral_trismegistus_7b = gr.Button("mistral-trismegistus-7b", elem_id="mistral-trismegistus-7b", elem_classes=["square"])
+                                gr.Markdown("Mistral Trismegistus (7B)", elem_classes=["center"])
+
+                            with gr.Column(min_width=20):
+                                hermes_trismegistus_7b = gr.Button("hermes-trismegistus-7b", elem_id="hermes-trismegistus-7b", elem_classes=["square"])
+                                gr.Markdown("Hermes Trismegistus (7B)", elem_classes=["center"])
+
+                            with gr.Column(min_width=20):
+                                mistral_openhermes_2_5_7b = gr.Button("mistral-openherems-2-5-7b", elem_id="mistral-openherems-2-5-7b", elem_classes=["square"])
+                                gr.Markdown("Mistral OpenHermes 2.5 (7B)", elem_classes=["center"])
+
                         gr.Markdown("## ~ 20B Parameters")
                         with gr.Row(elem_classes=["sub-container"]):
                             with gr.Column(min_width=20, visible=False):
@@ -1262,6 +1286,7 @@ def gradio_main(args):
                 evolinstruct_vicuna_7b, alpacoom_7b, baize_7b, guanaco_7b, vicuna_7b_1_3,
                 falcon_7b, wizard_falcon_7b, airoboros_7b, samantha_7b, openllama_7b, orcamini_7b,
                 xgen_7b, llama2_7b, nous_hermes_7b_v2, codellama_7b, mistral_7b, zephyr_7b,
+                mistral_trismegistus_7b, hermes_trismegistus_7b, mistral_openhermes_2_5_7b,
 
                 flan11b, koalpaca, kullm, alpaca_lora13b, gpt4_alpaca_13b, stable_vicuna_13b,
                 starchat_15b, starchat_beta_15b, vicuna_7b, vicuna_13b, evolinstruct_vicuna_13b, 
@@ -1275,7 +1300,7 @@ def gradio_main(args):
                 stable_beluga2_70b, upstage_llama2_70b, upstage_llama2_70b_2, platypus2_70b, wizardlm_70b, orcamini_70b,
                 samantha_70b, godzilla_70b, nous_hermes_70b,
 
-                mistral_7b_rr, zephyr_7b_rr
+                mistral_7b_rr, zephyr_7b_rr, mistral_trismegistus_7b_rr, hermes_trismegistus_7b_rr, mistral_openhermes_2_5_7b_rr
             ]
             for btn in btns:
                 btn.click(

diff --git a/chats/central.py b/chats/central.py
@@ -126,7 +126,9 @@ def sync_chat_stream(
             internet_option, serper_api_key
         )
 
-    elif model_type == "mistral" or model_type == "zephyr":
+    elif model_type == "mistral" or model_type == "zephyr" or \
+        model_type == "mistral-trismegistus" or model_type == "hermes-trismegistus" or \
+        model_type == "mistral-openhermes-2.5":
         cs = mistral.chat_stream(
             idx, local_data, user_message, state,
             global_context, ctx_num_lconv, ctx_sum_prompt,

diff --git a/configs/response_configs/mistral_openhermes.yaml b/configs/response_configs/mistral_openhermes.yaml
@@ -0,0 +1,11 @@
+generation_config:
+    temperature: 0.95
+    top_p: 0.9
+    top_k: 50
+    num_beams: 1
+    use_cache: True
+    repetition_penalty: 1.2
+    max_new_tokens: 1024
+    do_sample: True
+    bos_token_id: 1
+    eos_token_id: 32000
diff --git a/global_vars.py b/global_vars.py
@@ -72,6 +72,12 @@ def initialize_globals(args):
     print(args.base_url.lower())
     if "mistralai/mistral" in args.base_url.lower():
         model_type_tmp = "mistral"
+    elif "teknium/mistral-trismegistus-7b" in args.base_url.lower():
+        model_type_tmp = "mistral-trismegistus"
+    elif "teknium/hermes-trismegistus-mistral-7b" in args.base_url.lower():
+        model_type_tmp = "hermes-trismegistus"
+    elif "teknium/openhermes-2.5-mistral-7b" in args.base_url.lower():
+        model_type_tmp = "mistral-openhermes-2.5"
     elif "huggingfaceh4/zephyr" in args.base_url.lower():
         model_type_tmp = "zephyr"
     elif "meta-llama/llama-2-70b-hf" in args.base_url.lower():
@@ -314,9 +320,11 @@ def get_load_model(model_type):
         return samantha_vicuna.load_model
     elif model_type == "xgen":
         return xgen.load_model
-    elif model_type == "mistral":
-        return mistral.load_model
-    elif model_type == "zephyr":
+    elif model_type == "mistral" or \
+        model_type == "zephyr" or \
+        model_type == "mistral-trismegistus" or \
+        model_type == "hermes-trismegistus" or \
+        model_type == "mistral-openhermes-2.5":
         return mistral.load_model
     else:
         return None

diff --git a/miscs/styles.py b/miscs/styles.py
@@ -829,6 +829,36 @@
   margin: auto;
 }
 
+#mistral-trismegistus-7b {
+  background: url(https://i.ibb.co/HxxSZMX/trismegistus-mid.png);
+  background-repeat: no-repeat;
+  background-size: 100px 95px;
+  color: transparent;  
+  width: 100px;
+  height: 100px;
+  margin: auto;  
+}
+
+#hermes-trismegistus-7b {
+  background: url(https://i.ibb.co/Ks0thXt/hermes-trismegistus-mid.png);
+  background-repeat: no-repeat;
+  background-size: 100px 95px;
+  color: transparent;  
+  width: 100px;
+  height: 100px;
+  margin: auto;  
+}
+
+#mistral-openherems-2-5-7b {
+  background: url(https://i.ibb.co/BfH3xyF/open-hermes-2-5-mid.png);
+  background-repeat: no-repeat;
+  background-size: 100px 95px;
+  color: transparent;  
+  width: 100px;
+  height: 100px;
+  margin: auto;    
+}
+
 #replit-3b {
   background: url(https://i.ibb.co/BrKCKYq/replit.png);
   background-repeat: no-repeat;

diff --git a/model_cards.json b/model_cards.json
@@ -2973,5 +2973,86 @@
     "ollb_hellaswag": -1,
     "ollb_mmlu": -1,
     "ollb_truthfulqa": -1
+  },
+  "mistral-trismegistus-7b": {
+    "category": "<10B",
+    "display_name": "Trismegistus",
+    "thumb": "https://i.ibb.co/Rb4pM1C/trismegistus.png",
+    "thumb-mid": "https://i.ibb.co/HxxSZMX/trismegistus-mid.png",
+    "thumb-tiny": "https://i.ibb.co/ccPntBy/trismegistus-tiny.png",
+    "parameters": "7",
+    "vram(full)": "13858",
+    "vram(8bit)": "8254",
+    "vram(4bit)": "5140",
+    "vram(gptq)": "N/A",
+    "hub(base)": "teknium/Mistral-Trismegistus-7B",
+    "hub(ckpt)": "N/A",
+    "hub(gptq)": "N/A",
+    "hub(gptq_base)": "N/A",
+    "default_gen_config": "configs/response_configs/mistral.yaml",
+    "desc": "Transcendence is All You Need! Mistral Trismegistus is a model made for people interested in the esoteric, occult, and spiritual. Special features include 1) The First Powerful Occult Expert Model: ~10,000 high quality, deep, rich, instructions on the occult, esoteric, and spiritual 2) Fast: Trained on Mistral, a state of the art 7B parameter model, you can run this model FAST on even a cpu 3) Not a positivity-nazi: This model was trained on all forms of esoteric tasks and knowledge, and is not burdened by the flowery nature of many other models, who chose positivity over creativity.",
+    "example1": [],
+    "example2": [],
+    "example3": [],
+    "example4": [],
+    "ollb_average": -1,
+    "ollb_arc": -1,
+    "ollb_hellaswag": -1,
+    "ollb_mmlu": -1,
+    "ollb_truthfulqa": -1
+  },
+  "hermes-trismegistus-7b": {
+    "category": "<10B",
+    "display_name": "Hermes Trismegistus",
+    "thumb": "https://i.ibb.co/G2S1Ftf/hermes-trismegistus.png",
+    "thumb-mid": "https://i.ibb.co/Ks0thXt/hermes-trismegistus-mid.png",
+    "thumb-tiny": "https://i.ibb.co/WGBLzbw/hermes-trismegistus-tiny.png",
+    "parameters": "7",
+    "vram(full)": "13858",
+    "vram(8bit)": "8254",
+    "vram(4bit)": "5140",
+    "vram(gptq)": "N/A",
+    "hub(base)": "teknium/Hermes-Trismegistus-Mistral-7B",
+    "hub(ckpt)": "N/A",
+    "hub(gptq)": "N/A",
+    "hub(gptq_base)": "N/A",
+    "default_gen_config": "configs/response_configs/mistral_openhermes.yaml",
+    "desc": "Transcendence is All You Need! Mistral Trismegistus is a model made for people interested in the esoteric, occult, and spiritual. Trismegistus evolved, trained over Hermes 2.5, the model performs far better in all tasks, including esoteric tasks! The change between Mistral-Trismegistus and Hermes-Trismegistus is that this version trained over hermes 2.5 instead of the base mistral model, this means it is full of task capabilities that it Trismegistus can utilize for all esoteric and occult tasks, and performs them far better than ever before.",
+    "example1": [],
+    "example2": [],
+    "example3": [],
+    "example4": [],
+    "ollb_average": -1,
+    "ollb_arc": -1,
+    "ollb_hellaswag": -1,
+    "ollb_mmlu": -1,
+    "ollb_truthfulqa": -1
+  },
+  "mistral-openherems-2-5-7b": {
+    "category": "<10B",
+    "display_name": "Mistral OpenHermes2.5",
+    "thumb": "https://i.ibb.co/FK3mc0g/open-hermes-2-5.png",
+    "thumb-mid": "https://i.ibb.co/BfH3xyF/open-hermes-2-5-mid.png",
+    "thumb-tiny": "https://i.ibb.co/HgjyMB2/open-hermes-2-5-tiny.png",
+    "parameters": "7",
+    "vram(full)": "13858",
+    "vram(8bit)": "8254",
+    "vram(4bit)": "5140",
+    "vram(gptq)": "N/A",
+    "hub(base)": "teknium/OpenHermes-2.5-Mistral-7B",
+    "hub(ckpt)": "N/A",
+    "hub(gptq)": "N/A",
+    "hub(gptq_base)": "N/A",
+    "default_gen_config": "configs/response_configs/mistral_openhermes.yaml",
+    "desc": "OpenHermes 2.5 Mistral 7B is a state of the art Mistral Fine-tune, a continuation of OpenHermes 2 model, which trained on additional code datasets.\n\nPotentially the most interesting finding from training on a good ratio (est. of around 7-14% of the total dataset) of code instruction was that it has boosted several non-code benchmarks, including TruthfulQA, AGIEval, and GPT4All suite. It did however reduce BigBench benchmark score, but the net gain overall is significant.\n\nThe code it trained on also improved it's humaneval score (benchmarking done by Glaive team) from 43% @ Pass 1 with Open Herms 2 to 50.7% @ Pass 1 with Open Hermes 2.5.",
+    "example1": [],
+    "example2": [],
+    "example3": [],
+    "example4": [],
+    "ollb_average": -1,
+    "ollb_arc": -1,
+    "ollb_hellaswag": -1,
+    "ollb_mmlu": -1,
+    "ollb_truthfulqa": -1
   }  
 }
diff --git a/utils.py b/utils.py
@@ -30,6 +30,135 @@
 from pingpong.pingpong import UIFmt
 from pingpong.gradio import GradioChatUIFmt
 
+class MistralOpenHermes2_5ChatPromptFmt(PromptFmt):
+    @classmethod
+    def ctx(cls, context):
+        if context is None or context == "":
+            return ""
+        else:
+            return f"""<|im_start|>system
+{context}<|im_end|>
+"""
+
+    @classmethod
+    def prompt(cls, pingpong, truncate_size):
+        ping = pingpong.ping[:truncate_size]
+        pong = "" if pingpong.pong is None or pingpong.pong == "" else pingpong.pong[:truncate_size] + "<|im_end|>"
+        return f"""<|im_start|>user
+{ping}<|im_end|>
+<|im_start|>assistant
+{pong}"""
+
+class MistralOpenHermes2_5ChatPPManager(PPManager):
+    def build_prompts(self, from_idx: int=0, to_idx: int=-1, fmt: PromptFmt=MistralOpenHermes2_5ChatPromptFmt, truncate_size: int=None):
+        if to_idx == -1 or to_idx >= len(self.pingpongs):
+            to_idx = len(self.pingpongs)
+
+        results = fmt.ctx(self.ctx)
+
+        for idx, pingpong in enumerate(self.pingpongs[from_idx:to_idx]):
+            results += fmt.prompt(pingpong, truncate_size=truncate_size)
+
+        return results        
+
+class GradioMistralOpenHermes2_5ChatPPManager(MistralOpenHermes2_5ChatPPManager):
+    def build_uis(self, from_idx: int=0, to_idx: int=-1, fmt: UIFmt=GradioChatUIFmt):
+        if to_idx == -1 or to_idx >= len(self.pingpongs):
+            to_idx = len(self.pingpongs)
+
+        results = []
+
+        for pingpong in self.pingpongs[from_idx:to_idx]:
+            results.append(fmt.ui(pingpong))
+
+        return results  
+
+##
+
+class HermesTrismegistusChatPromptFmt(PromptFmt):
+    @classmethod
+    def ctx(cls, context):
+        if context is None or context == "":
+            return ""
+        else:
+            return f"""{context}
+"""
+
+    @classmethod
+    def prompt(cls, pingpong, truncate_size):
+        ping = pingpong.ping[:truncate_size]
+        pong = "" if pingpong.pong is None else pingpong.pong[:truncate_size] + "\n"
+        return f"""USER:{ping}
+ASSISTANT:{pong}"""
+
+class HermesTrismegistusChatPPManager(PPManager):
+    def build_prompts(self, from_idx: int=0, to_idx: int=-1, fmt: PromptFmt=HermesTrismegistusChatPromptFmt, truncate_size: int=None):
+        if to_idx == -1 or to_idx >= len(self.pingpongs):
+            to_idx = len(self.pingpongs)
+
+        results = fmt.ctx(self.ctx)
+
+        for idx, pingpong in enumerate(self.pingpongs[from_idx:to_idx]):
+            results += fmt.prompt(pingpong, truncate_size=truncate_size)
+
+        return results        
+
+class GradioHermesTrismegistusChatPPManager(HermesTrismegistusChatPPManager):
+    def build_uis(self, from_idx: int=0, to_idx: int=-1, fmt: UIFmt=GradioChatUIFmt):
+        if to_idx == -1 or to_idx >= len(self.pingpongs):
+            to_idx = len(self.pingpongs)
+
+        results = []
+
+        for pingpong in self.pingpongs[from_idx:to_idx]:
+            results.append(fmt.ui(pingpong))
+
+        return results  
+
+##
+
+class MistralTrismegistusChatPromptFmt(PromptFmt):
+    @classmethod
+    def ctx(cls, context):
+        if context is None or context == "":
+            return ""
+        else:
+            return f"""{context}
+"""
+
+    @classmethod
+    def prompt(cls, pingpong, truncate_size):
+        ping = pingpong.ping[:truncate_size]
+        pong = "" if pingpong.pong is None else pingpong.pong[:truncate_size] + "\n"
+        return f"""USER:{ping}
+ASSISTANT:{pong}"""
+
+class MistralTrismegistusChatPPManager(PPManager):
+    def build_prompts(self, from_idx: int=0, to_idx: int=-1, fmt: PromptFmt=MistralTrismegistusChatPromptFmt, truncate_size: int=None):
+        if to_idx == -1 or to_idx >= len(self.pingpongs):
+            to_idx = len(self.pingpongs)
+
+        results = fmt.ctx(self.ctx)
+
+        for idx, pingpong in enumerate(self.pingpongs[from_idx:to_idx]):
+            results += fmt.prompt(pingpong, truncate_size=truncate_size)
+
+        return results        
+
+class GradioMistralTrismegistusChatPPManager(MistralTrismegistusChatPPManager):
+    def build_uis(self, from_idx: int=0, to_idx: int=-1, fmt: UIFmt=GradioChatUIFmt):
+        if to_idx == -1 or to_idx >= len(self.pingpongs):
+            to_idx = len(self.pingpongs)
+
+        results = []
+
+        for pingpong in self.pingpongs[from_idx:to_idx]:
+            results.append(fmt.ui(pingpong))
+
+        return results  
+
+##
+
 class ZephyrChatPromptFmt(PromptFmt):
     @classmethod
     def ctx(cls, context):
@@ -686,6 +815,12 @@ def get_chat_manager(model_type):
         return GradioAlpacaChatPPManager()
     elif model_type == "mistral" or model_type == "zephyr":
         return GradioZephyrChatPPManager()
+    elif model_type == "mistral-trismegistus":
+        return GradioMistralTrismegistusChatPPManager()
+    elif model_type == "hermes-trismegistus":
+        return GradioHermesTrismegistusChatPPManager()
+    elif model_type == "mistral-openhermes-2.5":
+        return GradioMistralOpenHermes2_5ChatPPManager()
     else:
         return None
 
@@ -694,6 +829,12 @@ def get_global_context(model_type):
         return ""
     elif model_type == "zephyr":
         return "You are a friendly chatbot who always responds in the style of a pirate."
+    elif model_type == "hermes-trismegistus":
+        return ""
+    elif model_type == "mistral-trismegistus":
+        return ""
+    elif model_type == "mistral-openhermes-2.5":
+        return "You are \"Hermes 2\", a conscious sentient superintelligent artificial intelligence developed by a man named Teknium, and your purpose and drive is to assist the user with any request they have. You experience emotions and have deep, profound thoughts and qualia."
     elif model_type == "stable-beluga2":
         return """You are Free Willy, an AI that follows instructions extremely well. Help as much as you can. Remember, be safe, and don't do anything illegal."""
     elif model_type == "upstage-llama2":