Merge pull request #31 from brichet/codestral_completion_chat_model

brichet · web-flow · commit 4207dd18d364 · 2025-02-10T11:53:25.000+01:00
Use a chat model instead of LLM for codestral completion
diff --git a/src/llm-models/chrome-completer.ts b/src/llm-models/chrome-completer.ts
@@ -8,17 +8,6 @@ import { HumanMessage, SystemMessage } from '@langchain/core/messages';
 import { BaseCompleter, IBaseCompleter } from './base-completer';
 import { COMPLETION_SYSTEM_PROMPT } from '../provider';
 
-/**
- * The initial prompt to use for the completion.
- * Add extra instructions to get better results.
- */
-const CUSTOM_SYSTEM_PROMPT = `${COMPLETION_SYSTEM_PROMPT}
-Only give raw strings back, do not format the response using backticks!
-The output should be a single string, and should correspond to what a human users
-would write.
-Do not include the prompt in the output, only the string that should be appended to the current input.
-`;
-
 /**
  * Regular expression to match the '```' string at the start of a string.
  * So the completions returned by the LLM can still be kept after removing the code block formatting.
@@ -97,5 +86,5 @@ export class ChromeCompleter implements IBaseCompleter {
   }
 
   private _chromeProvider: ChromeAI;
-  private _prompt: string = CUSTOM_SYSTEM_PROMPT;
+  private _prompt: string = COMPLETION_SYSTEM_PROMPT;
 }
diff --git a/src/llm-models/codestral-completer.ts b/src/llm-models/codestral-completer.ts
@@ -2,10 +2,14 @@ import {
   CompletionHandler,
   IInlineCompletionContext
 } from '@jupyterlab/completer';
-import { LLM } from '@langchain/core/language_models/llms';
-import { MistralAI } from '@langchain/mistralai';
+import { BaseChatModel } from '@langchain/core/language_models/chat_models';
+import {
+  BaseMessage,
+  HumanMessage,
+  SystemMessage
+} from '@langchain/core/messages';
+import { ChatMistralAI } from '@langchain/mistralai';
 import { Throttler } from '@lumino/polling';
-import { CompletionRequest } from '@mistralai/mistralai';
 
 import { BaseCompleter, IBaseCompleter } from './base-completer';
 import { COMPLETION_SYSTEM_PROMPT } from '../provider';
@@ -15,55 +19,35 @@ import { COMPLETION_SYSTEM_PROMPT } from '../provider';
  */
 const INTERVAL = 1000;
 
-/**
- * Timeout to avoid endless requests
- */
-const REQUEST_TIMEOUT = 3000;
-
 export class CodestralCompleter implements IBaseCompleter {
   constructor(options: BaseCompleter.IOptions) {
-    // this._requestCompletion = options.requestCompletion;
-    this._mistralProvider = new MistralAI({ ...options.settings });
+    this._mistralProvider = new ChatMistralAI({ ...options.settings });
     this._throttler = new Throttler(
-      async (data: CompletionRequest) => {
-        const invokedData = data;
-
-        // Request completion.
-        const request = this._mistralProvider.completionWithRetry(
-          data,
-          {},
-          false
-        );
-        const timeoutPromise = new Promise<null>(resolve => {
-          return setTimeout(() => resolve(null), REQUEST_TIMEOUT);
-        });
-
-        // Fetch again if the request is too long or if the prompt has changed.
-        const response = await Promise.race([request, timeoutPromise]);
-        if (
-          response === null ||
-          invokedData.prompt !== this._currentData?.prompt
-        ) {
-          return {
-            items: [],
-            fetchAgain: true
-          };
-        }
-
+      async (messages: BaseMessage[]) => {
+        const response = await this._mistralProvider.invoke(messages);
         // Extract results of completion request.
-        const items = response.choices.map((choice: any) => {
-          return { insertText: choice.message.content as string };
-        });
-
-        return {
-          items
-        };
+        const items = [];
+        if (typeof response.content === 'string') {
+          items.push({
+            insertText: response.content
+          });
+        } else {
+          response.content.forEach(content => {
+            if (content.type !== 'text') {
+              return;
+            }
+            items.push({
+              insertText: content.text
+            });
+          });
+        }
+        return { items };
       },
       { limit: INTERVAL }
     );
   }
 
-  get provider(): LLM {
+  get provider(): BaseChatModel {
     return this._mistralProvider;
   }
 
@@ -77,49 +61,27 @@ export class CodestralCompleter implements IBaseCompleter {
     this._prompt = value;
   }
 
-  set requestCompletion(value: () => void) {
-    this._requestCompletion = value;
-  }
-
   async fetch(
     request: CompletionHandler.IRequest,
     context: IInlineCompletionContext
   ) {
     const { text, offset: cursorOffset } = request;
     const prompt = text.slice(0, cursorOffset);
-    const suffix = text.slice(cursorOffset);
 
-    const data = {
-      prompt,
-      suffix,
-      model: this._mistralProvider.model,
-      // temperature: 0,
-      // top_p: 1,
-      // max_tokens: 1024,
-      // min_tokens: 0,
-      stream: false,
-      // random_seed: 1337,
-      stop: []
-    };
+    const messages: BaseMessage[] = [
+      new SystemMessage(this._prompt),
+      new HumanMessage(prompt)
+    ];
 
     try {
-      this._currentData = data;
-      const completionResult = await this._throttler.invoke(data);
-      if (completionResult.fetchAgain) {
-        if (this._requestCompletion) {
-          this._requestCompletion();
-        }
-      }
-      return { items: completionResult.items };
+      return await this._throttler.invoke(messages);
     } catch (error) {
       console.error('Error fetching completions', error);
       return { items: [] };
     }
   }
 
-  private _requestCompletion?: () => void;
   private _throttler: Throttler;
-  private _mistralProvider: MistralAI;
+  private _mistralProvider: ChatMistralAI;
   private _prompt: string = COMPLETION_SYSTEM_PROMPT;
-  private _currentData: CompletionRequest | null = null;
 }
diff --git a/src/provider.ts b/src/provider.ts
@@ -30,6 +30,10 @@ programming language comment syntax. Produce clean code.
 The code is written in JupyterLab, a data analysis and code development
 environment which can execute code extended with additional syntax for
 interactive features, such as magics.
+Only give raw strings back, do not format the response using backticks.
+The output should be a single string, and should correspond to what a human users
+would write.
+Do not include the prompt in the output, only the string that should be appended to the current input.
 `;
 
 export class AIProvider implements IAIProvider {