-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Refactored autocommit.js to import getModelContextSize from count_tok…
…ens.js and removed unused imports. Updated config.js to include modelName and increased maxTokens. Added count_tokens.js to calculate the number of tokens in a prompt. Updated package.json to include tiktoken as a dependency and bumped the version to 6.0.0.
- Loading branch information
Showing
6 changed files
with
129 additions
and
37 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,78 @@ | ||
// langchain/dist/base_language/count_tokens.js | ||
export const getModelNameForTiktoken = (modelName) => { | ||
if (modelName.startsWith("gpt-3.5-turbo-16k")) { | ||
return "gpt-3.5-turbo-16k"; | ||
} | ||
if (modelName.startsWith("gpt-3.5-turbo-")) { | ||
return "gpt-3.5-turbo"; | ||
} | ||
if (modelName.startsWith("gpt-4-32k-")) { | ||
return "gpt-4-32k"; | ||
} | ||
if (modelName.startsWith("gpt-4-")) { | ||
return "gpt-4"; | ||
} | ||
return modelName; | ||
}; | ||
export const getEmbeddingContextSize = (modelName) => { | ||
switch (modelName) { | ||
case "text-embedding-ada-002": | ||
return 8191; | ||
default: | ||
return 2046; | ||
} | ||
}; | ||
export const getModelContextSize = (modelName) => { | ||
switch (getModelNameForTiktoken(modelName)) { | ||
case "gpt-3.5-turbo-16k": | ||
return 16384; | ||
case "gpt-3.5-turbo": | ||
return 4096; | ||
case "gpt-4-32k": | ||
return 32768; | ||
case "gpt-4": | ||
return 8192; | ||
case "text-davinci-003": | ||
return 4097; | ||
case "text-curie-001": | ||
return 2048; | ||
case "text-babbage-001": | ||
return 2048; | ||
case "text-ada-001": | ||
return 2048; | ||
case "code-davinci-002": | ||
return 8000; | ||
case "code-cushman-001": | ||
return 2048; | ||
default: | ||
return 4097; | ||
} | ||
}; | ||
export const importTiktoken = async () => { | ||
try { | ||
const { encoding_for_model } = await import("@dqbd/tiktoken"); | ||
return { encoding_for_model }; | ||
} | ||
catch (error) { | ||
console.log(error); | ||
return { encoding_for_model: null }; | ||
} | ||
}; | ||
export const calculateMaxTokens = async ({ prompt, modelName, }) => { | ||
const { encoding_for_model } = await importTiktoken(); | ||
// fallback to approximate calculation if tiktoken is not available | ||
let numTokens = Math.ceil(prompt.length / 4); | ||
try { | ||
if (encoding_for_model) { | ||
const encoding = encoding_for_model(getModelNameForTiktoken(modelName)); | ||
const tokenized = encoding.encode(prompt); | ||
numTokens = tokenized.length; | ||
encoding.free(); | ||
} | ||
} | ||
catch (error) { | ||
console.warn("Failed to calculate number of tokens with tiktoken, falling back to approximate count", error); | ||
} | ||
const maxTokens = getModelContextSize(modelName); | ||
return maxTokens - numTokens; | ||
}; |
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,17 +1,17 @@ | ||
{ | ||
"name": "git-aicommit", | ||
"version": "5.2.1", | ||
"version": "6.0.0", | ||
"description": "Generates auto commit messages with OpenAI GPT3 model", | ||
"main": "autocommit.js", | ||
"repository": "https://github.com/shanginn/autocommit", | ||
"author": "[email protected]", | ||
"license": "MIT", | ||
"type": "module", | ||
"dependencies": { | ||
"@dqbd/tiktoken": "^1.0.7", | ||
"langchain": "^0.0.75", | ||
"openai": "^3.2.1", | ||
"rc": "^1.2.8" | ||
"openai": "^3.3.0", | ||
"rc": "^1.2.8", | ||
"tiktoken": "^1.0.8" | ||
}, | ||
"preferGlobal": true, | ||
"bin": { | ||
|
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.