-
Notifications
You must be signed in to change notification settings - Fork 379
/
Copy pathremote_tgi.py
87 lines (76 loc) · 2.4 KB
/
remote_tgi.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import json
import requests
import sseclient
async def gen_text(
prompt,
remote_addr,
remote_port=None,
remote_token=None,
parameters=None
):
if remote_port and remote_port != "":
remote_addr = f"{remote_addr}:{remote_port}"
headers={
'Content-type': 'application/json'
}
if remote_token is not None and remote_token != "":
headers["Authorization"] = f'Bearer {remote_token}'
data = {
'inputs': prompt,
'stream': True,
'options': {
'use_cache': False,
},
'parameters': parameters
}
r = requests.post(
remote_addr,
headers=headers,
data=json.dumps(data),
stream=True
)
client = sseclient.SSEClient(r)
for event in client.events():
yield json.loads(event.data)['token']['text']
async def chat_stream(
idx, local_data, user_message, state,
global_context, ctx_num_lconv, ctx_sum_prompt,
res_temp, res_topp, res_topk, res_rpen, res_mnts, res_beams, res_cache, res_sample, res_eosid, res_padid,
sum_temp, sum_topp, sum_topk, sum_rpen, sum_mnts, sum_beams, sum_cache, sum_sample, sum_eosid, sum_padid,
internet_option, serper_api_key
):
res = [
state["ppmanager_type"].from_json(json.dumps(ppm))
for ppm in local_data
]
ppm = res[idx]
# add_ping returns a prompt structured in Alpaca form
ppm.add_pingpong(
PingPong(user_message, "")
)
prompt = build_prompts(ppm, global_context, ctx_num_lconv)
#######
if internet_option:
search_prompt = None
for tmp_prompt, uis in internet_search(ppm, serper_api_key, global_context, ctx_num_lconv):
search_prompt = tmp_prompt
yield "", uis, prompt, str(res)
async for result in gen_text(
prompt,
remote_addr=global_vars.remote_addr,
remote_port=global_vars.remote_port,
remote_token=global_vars.remote_token,
parameters={
'max_new_tokens': res_mnts,
'do_sample': res_sample,
'return_full_text': False,
'temperature': res_temp,
'top_k': res_topk,
# 'top_p": res_topp
'repetition_penalty': res_rpen
}
):
ppm.append_pong(result)
yield "", ppm.build_uis(), prompt, str(res)
ppm = post.strip_pong(ppm)
yield "", ppm.build_uis(), prompt, str(res)