llama.cpp updated to b4562

guinmoon · Jan 27, 2025 · 5d3e152 · 5d3e152
1 parent cd2d618
commit 5d3e152
Show file tree

Hide file tree

Showing 11 changed files with 85 additions and 30 deletions.
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -11,6 +11,7 @@
         "ggml-alloc.h": "c",
         "ggml_d925ed-alloc.h": "c",
         "ggml_d925ed.h": "c",
-        "assert.h": "c"
+        "assert.h": "c",
+        "vector": "cpp"
     }
 }
diff --git a/LLMFarm/AIChatModel.swift b/LLMFarm/AIChatModel.swift
@@ -134,7 +134,7 @@ final class AIChatModel: ObservableObject {
         }
         self.chat?.model?.parse_skip_tokens()
         Task{
-            await self.send(message: in_text, 
+            await self.Send(message: in_text, 
                             append_user_message:false,
                             system_prompt:system_prompt,
                             attachment:attachment,
@@ -396,13 +396,17 @@ final class AIChatModel: ObservableObject {
     }
 
 
-    public func loadRAGIndex(ragURL: URL) async {
+    public func LoadRAGIndex(ragURL: URL) async {
         updateIndexComponents(currentModel:currentModel,comparisonAlgorithm:comparisonAlgorithm,chunkMethod:chunkMethod)
         await loadExistingIndex(url: ragURL, name: "RAG_index")
         ragIndexLoaded = true
     }
 
-    public func  generateRagLLMQuery(_ inputText:String,
+    public func RegenerateLstMessage(){
+//        self.messages.removeLast()
+    }
+
+    public func GenerateRagLLMQuery(_ inputText:String,
                                      _ searchResultsCount:Int,
                                      _ ragURL:URL,
                                      message in_text: String,
@@ -417,14 +421,14 @@ final class AIChatModel: ObservableObject {
         aiQueue.async {
             Task {
                 if await !self.ragIndexLoaded {
-                    await self.loadRAGIndex(ragURL: ragURL)
+                    await self.LoadRAGIndex(ragURL: ragURL)
                 }
                 DispatchQueue.main.async {
                     self.state = .ragSearch
                 }
                 let results = await searchIndexWithQuery(query: inputText, top: searchResultsCount)
                 let llmPrompt = SimilarityIndex.exportLLMPrompt(query: inputText, results: results!)
-                await self.send(message: llmPrompt,
+                await self.Send(message: llmPrompt,
                                  append_user_message: false,
                                  system_prompt: system_prompt,
                                  attachment: llmPrompt,
@@ -433,7 +437,15 @@ final class AIChatModel: ObservableObject {
         }
     }
 
-    public func send(message in_text: String, 
+    public func SetSendMsgTokensCount(_ count:Int){
+
+    }
+
+    public func SetGeneratedMsgTokensCount(_ count:Int){
+
+    }
+
+    public func Send(message in_text: String,
                      append_user_message:Bool = true,
                      system_prompt:String? = nil, 
                      attachment: String? = nil,
@@ -471,7 +483,7 @@ final class AIChatModel: ObservableObject {
 
         if useRag {
             self.state = .ragIndexLoading
-            self.generateRagLLMQuery(in_text,
+            self.GenerateRagLLMQuery(in_text,
                                     self.ragTop, self.ragUrl,
                                     message: in_text,
                                     append_user_message:append_user_message,
@@ -524,7 +536,15 @@ final class AIChatModel: ObservableObject {
             { str, time in //Predicting
                 _ = self.process_predicted_str(str, time, &message/*, messageIndex*/)
             },
-            { final_str in // Finish predicting 
+            { key,value in
+//                if (key == "itc"){
+//                    SetSendMsgTokensCount(value as Int)
+//                }
+//                if (key == "itc"){
+//                    SetGeneratedMsgTokensCount(value as Int)
+//                }
+            },
+            { final_str in // Finish predicting
                 self.finish_completion(final_str, &message/*, messageIndex*/)   
 //                self.llmStatus = "Done"
             },

diff --git a/LLMFarm/Chats/ChatItem.swift b/LLMFarm/Chats/ChatItem.swift
@@ -1,10 +1,10 @@
 //
-//  ChatItem.swift
-//  ChatUI
 //
-//  Created by Shezad Ahamed on 6/08/21.
+//  ChatItem.swift
+//  Created by guinmoon
 //
 
+
 import SwiftUI
 
 

diff --git a/LLMFarm/Chats/ChatListView.swift b/LLMFarm/Chats/ChatListView.swift
@@ -1,8 +1,7 @@
 //
 //  ChatListView.swift
 //  ChatUI
-//
-//  Created by Shezad Ahamed on 05/08/21.
+//  Created by guinmoon
 //
 
 import SwiftUI

diff --git a/LLMFarm/Chats/ChatView.swift b/LLMFarm/Chats/ChatView.swift
@@ -153,12 +153,33 @@ struct ChatView: View {
                                 .textSelection(.enabled)
                         }
                         .listRowSeparator(.hidden)
-                        Text("").id("latest")
+//                        Text("").id("latest")
+//                        Divider()
+                        Button {
+                            Task{
+                                aiChatModel.RegenerateLstMessage()
+                            }
+                        }
+                        label: {
+                            Image(systemName: "arrow.uturn.backward.square")
+                                .resizable()
+                                .foregroundColor(.white)
+                                .frame(width: 25, height: 25)
+//                                .padding([.bottom, .trailing], 15)
+                                .opacity(0.4)
+                            Text("Regenerate last message")
+                        }
+                        .buttonStyle(BorderlessButtonStyle())
+                        .id("latest")
+
                     }
                     .textSelection(.enabled)
                     .listStyle(PlainListStyle())
                     .overlay(scrollDownOverlay, alignment: .bottomTrailing)
 //                    .overlay(debugOverlay, alignment: .bottomLeading)
+
+
+
                 }
                 .textSelection(.enabled)
                 .onChange(of: aiChatModel.AI_typing){ ai_typing in

diff --git a/LLMFarm/Chats/Message.swift b/LLMFarm/Chats/Message.swift
@@ -1,8 +1,6 @@
 //
 //  Message.swift
-//  AlpacaChatApp
-//
-//  Created by Yoshimasa Niwa on 3/20/23.
+//  Created by guinmoon
 //
 
 import Foundation
@@ -31,4 +29,5 @@ struct Message: Identifiable {
     var attachment: String? = nil
     var attachment_type: String? = nil
     var is_markdown: Bool = false
+    var tokens_count: Int = 0
 }
diff --git a/LLMFarm/Chats/MessageView.swift b/LLMFarm/Chats/MessageView.swift
@@ -1,10 +1,9 @@
 //
 //  MessageView.swift
-//  AlpacaChatApp
-//
-//  Created by Yoshimasa Niwa on 3/20/23.
+//  Created by guinmoon
 //
 
+
 import SwiftUI
 import MarkdownUI
 
@@ -116,9 +115,15 @@ struct MessageView: View {
                     default:
                         Text(message.text).textSelection(.enabled).textSelection(.enabled)
                     }
-                    Text(String(format: "%.2f ses, %.2f t/s", totalSecond,message.tok_sec))
-                        .font(.footnote)
-                        .foregroundColor(Color.gray)
+                    if (message.tokens_count==0){
+                        Text(String(format: "%.2f s, %.2f t/s", totalSecond,message.tok_sec))
+                            .font(.footnote)
+                            .foregroundColor(Color.gray)
+                    }else{
+                        Text(String(format: "%i t, %.2f s, %.2f t/s",message.tokens_count, totalSecond,message.tok_sec))
+                            .font(.footnote)
+                            .foregroundColor(Color.gray)
+                    }
                 }.textSelection(.enabled)
             }
         }

diff --git a/LLMFarm/Intents/OneShortQuery.swift b/LLMFarm/Intents/OneShortQuery.swift
@@ -58,7 +58,7 @@ func OneShortQuery(_ queryIn: String, _ chat: String, _ token_limit:Int,
         aiChatModel.chat?.model?.parse_skip_tokens()
 
         if useRag{
-            await aiChatModel.loadRAGIndex(ragURL: aiChatModel.ragUrl)
+            await aiChatModel.LoadRAGIndex(ragURL: aiChatModel.ragUrl)
             let results = await searchIndexWithQuery(query: query, top: topRag)
             query = SimilarityIndex.exportLLMPrompt(query: query, results: results!)
         }
@@ -67,7 +67,7 @@ func OneShortQuery(_ queryIn: String, _ chat: String, _ token_limit:Int,
         var current_token_count = 0
         try ExceptionCather.catchException {
             do{
-                _ = try aiChatModel.chat?.model?.predict(query,
+                _ = try aiChatModel.chat?.model?.Predict(query,
                                                          {
                     str,time in
                     print("\(str)",terminator: "")

diff --git a/LLMFarm/UIComponents/LLMTextInput.swift b/LLMFarm/UIComponents/LLMTextInput.swift
@@ -332,7 +332,7 @@ public struct LLMTextInput: View {
                 image = nil
                 selectedPhoto = nil
                 autoScroll = true
-                await aiChatModel.send(message: input_text,attachment: img_path,
+                await aiChatModel.Send(message: input_text,attachment: img_path,
                                        attachment_type:img_path == nil ? nil: "img", useRag: enableRAG)
                 input_text = ""
 //                Task {

diff --git a/ModelTest/main.swift b/ModelTest/main.swift
@@ -11,12 +11,14 @@ import llmfarm_core_cpp
 
 let maxOutputLength:Int32 = 100
 var total_output = 0
+var total_tokens_output:Int32 = 0
 var session_tokens: [Int32] = []
 var ai: AI? = nil
 
 func mainCallback(_ str: String, _ time: Double) -> Bool {
     print("\(str)",terminator: "")
     total_output += str.count
+    total_tokens_output += 1
     // print(total_output)
     // if  total_output>maxOutputLength {        
     //     ai!.flagExit = true        
@@ -92,7 +94,7 @@ func main(){
 //    params.lora_adapters.append(("/Users/guinmoon/dev/alpaca_llama_etc/lora-open-llama-3b-v2-q8_0-my_finetune-LATEST.bin",1.0 ))
 //    input_text = "To be or not"
 
-    input_text = "Write story about Artem."
+    input_text = "Tell about Stavropol in one sentence."
     do{
 
         ai!.initModel(modelInference,contextParams: params)
@@ -105,9 +107,17 @@ func main(){
 
         var output: String?
         try ExceptionCather.catchException {
-            output = try? ai!.model?.predict(input_text, mainCallback) ?? ""
+            output = try? ai!.model?.Predict(input_text, mainCallback) ?? ""
         }
 
+
+        llama_kv_cache_seq_rm(ai!.model?.context, -1, 0, -1);
+        print(output)
+
+        try ExceptionCather.catchException {
+            output = try? ai!.model?.Predict("tell more", mainCallback) ?? ""
+        }
+
         print(output)
     }catch {
         print (error)

diff --git a/llmfarm_core.swift b/llmfarm_core.swift
+16 −0		Package.swift
+24 −13		Sources/llmfarm_core/AI.swift
+27 −19		Sources/llmfarm_core/LLMBase.swift
+18 −13		Sources/llmfarm_core/LLaMa.swift
+1 −1		Sources/llmfarm_core_cpp/llama.cpp