1
1
/* Only the token of gpt-3.5-turbo is used */
2
2
import type { ChatItemType } from '../../../core/chat/type' ;
3
3
import { Tiktoken } from 'js-tiktoken/lite' ;
4
- import { adaptChat2GptMessages } from '../../../core/chat/adapt' ;
5
- import { ChatCompletionRequestMessageRoleEnum } from '../../../core/ai/constant' ;
4
+ import { chats2GPTMessages } from '../../../core/chat/adapt' ;
6
5
import encodingJson from './cl100k_base.json' ;
7
- import { ChatMessageItemType } from '../../../core/ai/type' ;
6
+ import {
7
+ ChatCompletionMessageParam ,
8
+ ChatCompletionContentPart ,
9
+ ChatCompletionCreateParams ,
10
+ ChatCompletionTool
11
+ } from '../../../core/ai/type' ;
12
+ import { ChatCompletionRequestMessageRoleEnum } from '../../../core/ai/constants' ;
8
13
9
14
/* init tikToken obj */
10
15
export function getTikTokenEnc ( ) {
@@ -29,18 +34,25 @@ export function getTikTokenEnc() {
29
34
30
35
/* count one prompt tokens */
31
36
export function countPromptTokens (
32
- prompt = '' ,
33
- role : '' | `${ChatCompletionRequestMessageRoleEnum } ` = '' ,
34
- tools ?: any
37
+ prompt : string | ChatCompletionContentPart [ ] | null | undefined = '' ,
38
+ role : '' | `${ChatCompletionRequestMessageRoleEnum } ` = ''
35
39
) {
36
40
const enc = getTikTokenEnc ( ) ;
37
- const toolText = tools
38
- ? JSON . stringify ( tools )
39
- . replace ( '"' , '' )
40
- . replace ( '\n' , '' )
41
- . replace ( / ( ) { 2 , } / g, ' ' )
42
- : '' ;
43
- const text = `${ role } \n${ prompt } \n${ toolText } ` . trim ( ) ;
41
+ const promptText = ( ( ) => {
42
+ if ( ! prompt ) return '' ;
43
+ if ( typeof prompt === 'string' ) return prompt ;
44
+ let promptText = '' ;
45
+ prompt . forEach ( ( item ) => {
46
+ if ( item . type === 'text' ) {
47
+ promptText += item . text ;
48
+ } else if ( item . type === 'image_url' ) {
49
+ promptText += item . image_url . url ;
50
+ }
51
+ } ) ;
52
+ return promptText ;
53
+ } ) ( ) ;
54
+
55
+ const text = `${ role } \n${ promptText } ` . trim ( ) ;
44
56
45
57
try {
46
58
const encodeText = enc . encode ( text ) ;
@@ -50,15 +62,66 @@ export function countPromptTokens(
50
62
return text . length ;
51
63
}
52
64
}
65
+ export const countToolsTokens = (
66
+ tools ?: ChatCompletionTool [ ] | ChatCompletionCreateParams . Function [ ]
67
+ ) => {
68
+ if ( ! tools || tools . length === 0 ) return 0 ;
69
+
70
+ const enc = getTikTokenEnc ( ) ;
71
+
72
+ const toolText = tools
73
+ ? JSON . stringify ( tools )
74
+ . replace ( '"' , '' )
75
+ . replace ( '\n' , '' )
76
+ . replace ( / ( ) { 2 , } / g, ' ' )
77
+ : '' ;
78
+
79
+ return enc . encode ( toolText ) . length ;
80
+ } ;
53
81
54
82
/* count messages tokens */
55
- export const countMessagesTokens = ( messages : ChatItemType [ ] , tools ?: any ) => {
56
- const adaptMessages = adaptChat2GptMessages ( { messages, reserveId : true } ) ;
83
+ export const countMessagesTokens = ( messages : ChatItemType [ ] ) => {
84
+ const adaptMessages = chats2GPTMessages ( { messages, reserveId : true } ) ;
57
85
58
- return countGptMessagesTokens ( adaptMessages , tools ) ;
86
+ return countGptMessagesTokens ( adaptMessages ) ;
59
87
} ;
60
- export const countGptMessagesTokens = ( messages : ChatMessageItemType [ ] , tools ?: any ) =>
61
- messages . reduce ( ( sum , item ) => sum + countPromptTokens ( item . content , item . role , tools ) , 0 ) ;
88
+ export const countGptMessagesTokens = (
89
+ messages : ChatCompletionMessageParam [ ] ,
90
+ tools ?: ChatCompletionTool [ ] ,
91
+ functionCall ?: ChatCompletionCreateParams . Function [ ]
92
+ ) =>
93
+ messages . reduce ( ( sum , item ) => {
94
+ // Evaluates the text of toolcall and functioncall
95
+ const functionCallPrompt = ( ( ) => {
96
+ let prompt = '' ;
97
+ if ( item . role === ChatCompletionRequestMessageRoleEnum . Assistant ) {
98
+ const toolCalls = item . tool_calls ;
99
+ prompt +=
100
+ toolCalls
101
+ ?. map ( ( item ) => `${ item ?. function ?. name } ${ item ?. function ?. arguments } ` . trim ( ) )
102
+ ?. join ( '' ) || '' ;
103
+
104
+ const functionCall = item . function_call ;
105
+ prompt += `${ functionCall ?. name } ${ functionCall ?. arguments } ` . trim ( ) ;
106
+ }
107
+ return prompt ;
108
+ } ) ( ) ;
109
+
110
+ const contentPrompt = ( ( ) => {
111
+ if ( ! item . content ) return '' ;
112
+ if ( typeof item . content === 'string' ) return item . content ;
113
+ return item . content
114
+ . map ( ( item ) => {
115
+ if ( item . type === 'text' ) return item . text ;
116
+ return '' ;
117
+ } )
118
+ . join ( '' ) ;
119
+ } ) ( ) ;
120
+
121
+ return sum + countPromptTokens ( `${ contentPrompt } ${ functionCallPrompt } ` , item . role ) ;
122
+ } , 0 ) +
123
+ countToolsTokens ( tools ) +
124
+ countToolsTokens ( functionCall ) ;
62
125
63
126
/* slice messages from top to bottom by maxTokens */
64
127
export function sliceMessagesTB ( {
@@ -68,7 +131,7 @@ export function sliceMessagesTB({
68
131
messages : ChatItemType [ ] ;
69
132
maxTokens : number ;
70
133
} ) {
71
- const adaptMessages = adaptChat2GptMessages ( { messages, reserveId : true } ) ;
134
+ const adaptMessages = chats2GPTMessages ( { messages, reserveId : true } ) ;
72
135
let reduceTokens = maxTokens ;
73
136
let result : ChatItemType [ ] = [ ] ;
74
137
0 commit comments