-
Notifications
You must be signed in to change notification settings - Fork 1
/
main.py
31 lines (26 loc) · 1.12 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
# A realistic, minimal Instructor large vectorizer using quantization to speed it up
from fastapi import FastAPI
from InstructorEmbedding import INSTRUCTOR
import torch
# Instructor model
model = INSTRUCTOR('hkunlp/instructor-large', device='cpu')
torch.backends.quantized.engine = 'qnnpack'
qmodel = torch.quantization.quantize_dynamic(model, {torch.nn.Linear}, dtype=torch.qint8)
app = FastAPI(title="InstructVectorizer",
description="Text embedding using quantized Instructor-large. Instruction is optional, it will represent the text for retrieval by default.",
version="1.0",
contact={
"name": "Pat Wendorf",
"email": "[email protected]",
},
license_info={
"name": "MIT",
"url": "https://opensource.org/license/mit/",
})
@app.get("/")
async def root():
return {"message": "Vectorize text with the Instructor-large model. See /docs for more info."}
@app.get("/vectorize/")
async def vectorize(text: str, instruction: str = "Represent the text for retrieval:"):
embedding = qmodel.encode([[instruction,text]]).tolist()[0]
return embedding