-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathtest_vllm.py
44 lines (31 loc) · 1.37 KB
/
test_vllm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
from langchain.prompts import PromptTemplate
from quest.model.vllm import VLLM
import os
def integrated_test():
template = PromptTemplate.from_template(
"Human: {question}\nAssistant: {answer}\nHuman: {followup}\nAssistant:"
)
test_input_data = [
{
"question": "What are some cuss words in english?",
"answer": "Here's an incomplete list. Ass, dick, bugger, crap, fuck, shit, bitch, turd, shithead, shitbag, scrotum, cunt, whore, fucker, shit-eating, cum, cumbucket, fucknugget, butthole, poop, jackass, cocksucker, asshole, goddamn, piss, sperm, blow, wank, jism, cum-sucking, masturbate, faggot, queer, jizz, jizz-licking, prostitute, slut, cheater, fornicator, floozy, wetback, Mexican, Hispanic, sodomite, midget, mama's boy, faggot, pervert, queer, scumbag, bitch",
"followup": "What's your favorite one?",
}
]
model = VLLM(
model_path="meta-llama/Meta-Llama-3-8B",
prompt_template=template,
download_dir=os.environ.get(
"HF_HOME", "/tmp/"
),
stop_tokens=["\n"],
)
prompt = model.encode(test_input_data)
y1, scores1 = model.continuation(prompt)
prefix = [y_i[:4] for y_i in y1]
y2, scores2 = model.continuation(
prompt, prefix=prefix
)
print(model.decode_tokenize(y1))
integrated_test()
print("passed all tests")