Skip to content

Commit f97f879

Browse files
committed
Add tokenize detokenize compatibility
1 parent 70dd0b7 commit f97f879

File tree

4 files changed

+81
-0
lines changed

4 files changed

+81
-0
lines changed

examples/tokenization.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,10 @@
1+
import ollama
2+
3+
# Get tokens from a model
4+
response = ollama.tokenize(model='llama3.2', text='Hello world!')
5+
tokens = response.tokens
6+
print("tokens from model", tokens)
7+
8+
# Convert tokens back to text
9+
response = ollama.detokenize(model='llama3.2', tokens=tokens)
10+
print("text from tokens", response.text) # Prints: Hello world!

ollama/__init__.py

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,8 @@
1212
ListResponse,
1313
ShowResponse,
1414
ProcessResponse,
15+
TokenizeResponse,
16+
DetokenizeResponse,
1517
RequestError,
1618
ResponseError,
1719
)
@@ -31,6 +33,8 @@
3133
'ListResponse',
3234
'ShowResponse',
3335
'ProcessResponse',
36+
'TokenizeResponse',
37+
'DetokenizeResponse',
3438
'RequestError',
3539
'ResponseError',
3640
]
@@ -49,3 +53,5 @@
4953
copy = _client.copy
5054
show = _client.show
5155
ps = _client.ps
56+
tokenize = _client.tokenize
57+
detokenize = _client.detokenize

ollama/_client.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,8 @@
4848
CreateRequest,
4949
CopyRequest,
5050
DeleteRequest,
51+
DetokenizeRequest,
52+
DetokenizeResponse,
5153
EmbedRequest,
5254
EmbedResponse,
5355
EmbeddingsRequest,
@@ -67,6 +69,8 @@
6769
ShowRequest,
6870
ShowResponse,
6971
StatusResponse,
72+
TokenizeRequest,
73+
TokenizeResponse,
7074
Tool,
7175
)
7276

@@ -611,6 +615,27 @@ def ps(self) -> ProcessResponse:
611615
'/api/ps',
612616
)
613617

618+
def tokenize(self, model: str, text: str) -> TokenizeResponse:
619+
return self._request(
620+
TokenizeResponse,
621+
'POST',
622+
'/api/tokenize',
623+
json=TokenizeRequest(
624+
model=model,
625+
text=text,
626+
).model_dump(exclude_none=True),
627+
)
628+
629+
def detokenize(self, model: str, tokens: Sequence[int]) -> DetokenizeResponse:
630+
return self._request(
631+
DetokenizeResponse,
632+
'POST',
633+
'/api/detokenize',
634+
json=DetokenizeRequest(
635+
model=model,
636+
tokens=tokens,
637+
).model_dump(exclude_none=True),
638+
)
614639

615640
class AsyncClient(BaseClient):
616641
def __init__(self, host: Optional[str] = None, **kwargs) -> None:
@@ -1120,6 +1145,28 @@ async def ps(self) -> ProcessResponse:
11201145
'/api/ps',
11211146
)
11221147

1148+
async def tokenize(self, model: str, text: str) -> TokenizeResponse:
1149+
return await self._request(
1150+
TokenizeResponse,
1151+
'POST',
1152+
'/api/tokenize',
1153+
json=TokenizeRequest(
1154+
model=model,
1155+
text=text,
1156+
).model_dump(exclude_none=True),
1157+
)
1158+
1159+
async def detokenize(self, model: str, tokens: Sequence[int]) -> DetokenizeResponse:
1160+
return await self._request(
1161+
DetokenizeResponse,
1162+
'POST',
1163+
'/api/detokenize',
1164+
json=DetokenizeRequest(
1165+
model=model,
1166+
tokens=tokens,
1167+
).model_dump(exclude_none=True),
1168+
)
1169+
11231170

11241171
def _copy_messages(messages: Optional[Sequence[Union[Mapping[str, Any], Message]]]) -> Iterator[Message]:
11251172
for message in messages or []:

ollama/_types.py

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -494,6 +494,24 @@ class Model(SubscriptableBaseModel):
494494
models: Sequence[Model]
495495

496496

497+
class TokenizeRequest(BaseRequest):
498+
model: str
499+
text: str
500+
501+
502+
class TokenizeResponse(BaseGenerateResponse):
503+
tokens: Sequence[int]
504+
505+
506+
class DetokenizeRequest(BaseRequest):
507+
model: str
508+
tokens: Sequence[int]
509+
510+
511+
class DetokenizeResponse(BaseGenerateResponse):
512+
text: str
513+
514+
497515
class RequestError(Exception):
498516
"""
499517
Common class for request errors.

0 commit comments

Comments
 (0)