Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/_base_test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -183,7 +183,7 @@ jobs:
export URL=http://localhost:${FD_API_PORT}/v1/chat/completions
export TEMPLATE=TOKEN_LOGPROB
TEST_EXIT_CODE=0
python -m pytest -sv test_base_chat.py test_compare_top_logprobs.py test_logprobs.py test_params_boundary.py test_seed_usage.py test_stream.py test_evil_cases.py || TEST_EXIT_CODE=1
python -m pytest -sv test_base_chat.py test_compare_top_logprobs.py test_logprobs.py test_params_boundary.py test_seed_usage.py test_stream.py test_evil_cases.py test_completions.py test_return_token_ids.py || TEST_EXIT_CODE=1
curl -X POST http://0.0.0.0:${FLASK_PORT}/switch \
-H "Content-Type: application/json" \
-d "{\"--model\": \"/MODELDATA/ERNIE-4.5-0.3B-Paddle\", \"--early-stop-config\": \"{\\\"enable_early_stop\\\":true, \\\"window_size\\\":6, \\\"threshold\\\":0.93}\"}"
Expand Down
204 changes: 201 additions & 3 deletions tests/ce/server/test_completions.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,7 @@

from core import TEMPLATE, URL, build_request_payload, send_request

URL = URL.replace("/v1/chat/completions", "/v1/completions")

COMPLETIONS_URL = URL.replace("/v1/chat/completions", "/v1/completions")

def test_completion_total_tokens():
data = {
Expand All @@ -22,7 +21,7 @@ def test_completion_total_tokens():
}

payload = build_request_payload(TEMPLATE, data)
resp = send_request(URL, payload, stream=True)
resp = send_request(COMPLETIONS_URL, payload, stream=True)
last_data = None
for line in resp.iter_lines(decode_unicode=True):
if line.strip() == "data: [DONE]":
Expand All @@ -35,3 +34,202 @@ def test_completion_total_tokens():
total_tokens = usage["completion_tokens"] + usage["prompt_tokens"]
assert "total_tokens" in usage, "total_tokens 不存在"
assert usage["total_tokens"] == total_tokens, "total_tokens计数不正确"


def test_completion_echo_stream_one_prompt_rti():
"""
测试echo参数在流式回复中,且设置为仅回复一个prompt
"""
data = {
"prompt": "水果的营养价值是如何的?",
"stream": True,
"stream_options": {"include_usage": True, "continuous_usage_stats": True},
"echo": True,
"max_tokens": 2,
"return_token_ids": True,
}

payload = build_request_payload(TEMPLATE, data)
resp = send_request(COMPLETIONS_URL, payload, stream=True)
last_data = None
# 初始化计数器
counter = 0
second_data = None
for line in resp.iter_lines(decode_unicode=True):
if line.strip() == "data: [DONE]":
break
if line.strip() == "" or not line.startswith("data: "):
continue
line = line[len("data: "):]
stream_data = json.loads(line)
counter += 1
if counter == 2: # 当计数器为2时,保存第二包数据
second_data = stream_data
break # 如果只需要第二包数据,可以在这里直接退出循环
text = second_data["choices"][0]["text"]
assert data["prompt"] in text, "echo回显不正确"
position = text.find(data["prompt"])
assert position == 0, "echo回显没有在靠前的位置"


def test_completion_echo_stream_one_prompt():
"""
测试echo参数在流式回复中,且设置为仅回复一个prompt
"""
data = {
"prompt": "水果的营养价值是如何的?",
"stream": True,
"stream_options": {"include_usage": True, "continuous_usage_stats": True},
"echo": True,
"max_tokens": 2
}

payload = build_request_payload(TEMPLATE, data)
resp = send_request(COMPLETIONS_URL, payload, stream=True)
last_data = None
# 初始化计数器
counter = 0
second_data = None
for line in resp.iter_lines(decode_unicode=True):
if line.strip() == "data: [DONE]":
break
if line.strip() == "" or not line.startswith("data: "):
continue
line = line[len("data: "):]
stream_data = json.loads(line)
counter += 1
if counter == 1: # 当计数器为1时,保存第一包数据
second_data = stream_data
break # 如果只需要第二包数据,可以在这里直接退出循环
text = second_data["choices"][0]["text"]
assert data["prompt"] in text, "echo回显不正确"
position = text.find(data["prompt"])
assert position == 0, "echo回显没有在靠前的位置"


def test_completion_echo_stream_more_prompt():
"""
测试echo参数在流式回复中,且设置为回复多个prompt
"""
data = {
"prompt": ["水果的营养价值是如何的?","水的化学式是什么?"],
"stream": True,
"stream_options": {"include_usage": True, "continuous_usage_stats": True},
"echo": True,
"max_tokens": 2,
"return_token_ids": True
}

payload = build_request_payload(TEMPLATE, data)
resp = send_request(COMPLETIONS_URL, payload, stream=True)
last_data = None
# 初始化计数器
counter = 0
second_data = None
# 初始化字典来存储每个index的第二包数据
second_data_by_index = {0: None, 1: None}
# 初始化字典来记录每个index的包计数
packet_count_by_index = {0: 0, 1: 0}

for line in resp.iter_lines(decode_unicode=True):
if line.strip() == "data: [DONE]":
break
if line.strip() == "" or not line.startswith("data: "):
continue
line = line[len("data: "):]
stream_data = json.loads(line)

for choice in stream_data.get("choices", []):
index = choice.get("index")
if index in packet_count_by_index:
packet_count_by_index[index] += 1
if packet_count_by_index[index] == 2:
second_data_by_index[index] = choice
if all(value is not None for value in second_data_by_index.values()):
break
text_0 = second_data_by_index[0]["text"]
text_1 = second_data_by_index[1]["text"]
assert data["prompt"][0] in text_0, "echo回显不正确"
assert data["prompt"][1] in text_1, "echo回显不正确"
position_0 = text_0.find(data["prompt"][0])
assert position_0 == 0, "prompt[0]的echo回显没有在靠前的位置"
position_1 = text_1.find(data["prompt"][1])
assert position_1 == 0, "prompt[1]的echo回显没有在靠前的位置"


def test_completion_echo_one_prompt():
"""
测试echo参数在非流式回复中,且设置为仅发送一个prompt
"""
data = {
"stream": False,
"prompt": "水果的营养价值是如何的?",
"echo": True,
"max_tokens": 100,
}
payload = build_request_payload(TEMPLATE, data)
response = send_request(COMPLETIONS_URL, payload)
response = response.json()

text = response["choices"][0]["text"]
assert data["prompt"] in text, "echo回显不正确"
position = text.find(data["prompt"])
assert position == 0, "echo回显没有在靠前的位置"


def test_completion_echo_more_prompt():
"""
测试echo参数在非流式回复中,且设置为发送多个prompt
"""
data = {
"stream": False,
"prompt": ["水果的营养价值是如何的?","水的化学式是什么?"],
"echo": True,
"max_tokens": 100
}
payload = build_request_payload(TEMPLATE, data)
response = send_request(COMPLETIONS_URL, payload).json()

text_0 = response["choices"][0]["text"]
text_1 = response["choices"][1]["text"]
assert data["prompt"][0] in text_0, "echo回显不正确"
assert data["prompt"][1] in text_1, "echo回显不正确"
position_0 = text_0.find(data["prompt"][0])
assert position_0 == 0, "prompt[0]的echo回显没有在靠前的位置"
position_1 = text_1.find(data["prompt"][1])
assert position_1 == 0, "prompt[1]的echo回显没有在靠前的位置"


def test_completion_finish_length():
"""
非流式回复中,因达到max_token截断检查finish_reasoning参数
"""
data = {
"stream": False,
"prompt": "水果的营养价值是如何的?",
"max_tokens": 10
}

payload = build_request_payload(TEMPLATE, data)
response = send_request(COMPLETIONS_URL, payload).json()

finish_reason = response["choices"][0]["finish_reason"]
assert finish_reason == "length", "达到max_token时,finish_reason不为length"


def test_completion_finish_stop():
"""
非流式回复中,模型自然回复完成,检查finish_reasoning参数
"""
data = {
"stream": False,
"prompt": "简短的回答我:苹果是水果吗?"
}

payload = build_request_payload(TEMPLATE, data)
response = send_request(COMPLETIONS_URL, payload).json()

finish_reason = response["choices"][0]["finish_reason"]
assert finish_reason == "stop", "无任何中介,finish_reason不为stop"


Loading
Loading