diff --git a/.github/workflows/_base_test.yml b/.github/workflows/_base_test.yml
index 6672d1260f..40590ec3f5 100644
--- a/.github/workflows/_base_test.yml
+++ b/.github/workflows/_base_test.yml
@@ -183,7 +183,7 @@ jobs:
           export URL=http://localhost:${FD_API_PORT}/v1/chat/completions
           export TEMPLATE=TOKEN_LOGPROB
           TEST_EXIT_CODE=0
-          python -m pytest -sv test_base_chat.py test_compare_top_logprobs.py test_logprobs.py test_params_boundary.py test_seed_usage.py test_stream.py test_evil_cases.py || TEST_EXIT_CODE=1
+          python -m pytest -sv test_base_chat.py test_compare_top_logprobs.py test_logprobs.py test_params_boundary.py test_seed_usage.py test_stream.py test_evil_cases.py test_completions.py test_return_token_ids.py || TEST_EXIT_CODE=1
           curl -X POST http://0.0.0.0:${FLASK_PORT}/switch \
             -H "Content-Type: application/json" \
             -d "{\"--model\": \"/MODELDATA/ERNIE-4.5-0.3B-Paddle\", \"--early-stop-config\": \"{\\\"enable_early_stop\\\":true, \\\"window_size\\\":6, \\\"threshold\\\":0.93}\"}"
diff --git a/tests/ce/server/test_completions.py b/tests/ce/server/test_completions.py
index 1ee7cbaa8e..2dae312a29 100644
--- a/tests/ce/server/test_completions.py
+++ b/tests/ce/server/test_completions.py
@@ -11,8 +11,7 @@
 
 from core import TEMPLATE, URL, build_request_payload, send_request
 
-URL = URL.replace("/v1/chat/completions", "/v1/completions")
-
+COMPLETIONS_URL = URL.replace("/v1/chat/completions", "/v1/completions")
 
 def test_completion_total_tokens():
     data = {
@@ -22,7 +21,7 @@ def test_completion_total_tokens():
     }
 
     payload = build_request_payload(TEMPLATE, data)
-    resp = send_request(URL, payload, stream=True)
+    resp = send_request(COMPLETIONS_URL, payload, stream=True)
     last_data = None
     for line in resp.iter_lines(decode_unicode=True):
         if line.strip() == "data: [DONE]":
@@ -35,3 +34,202 @@ def test_completion_total_tokens():
     total_tokens = usage["completion_tokens"] + usage["prompt_tokens"]
     assert "total_tokens" in usage, "total_tokens 不存在"
     assert usage["total_tokens"] == total_tokens, "total_tokens计数不正确"
+
+
+def test_completion_echo_stream_one_prompt_rti():
+    """
+    测试echo参数在流式回复中，且设置为仅回复一个prompt
+    """
+    data = {
+        "prompt": "水果的营养价值是如何的？",
+        "stream": True,
+        "stream_options": {"include_usage": True, "continuous_usage_stats": True},
+        "echo": True,
+        "max_tokens": 2,
+        "return_token_ids": True,
+    }
+    
+    payload = build_request_payload(TEMPLATE, data)
+    resp = send_request(COMPLETIONS_URL, payload, stream=True)
+    last_data = None
+    # 初始化计数器
+    counter = 0
+    second_data = None
+    for line in resp.iter_lines(decode_unicode=True):
+        if line.strip() == "data: [DONE]":
+            break
+        if line.strip() == "" or not line.startswith("data: "):
+            continue
+        line = line[len("data: "):]
+        stream_data = json.loads(line)
+        counter += 1
+        if counter == 2:  # 当计数器为2时，保存第二包数据
+            second_data = stream_data
+            break  # 如果只需要第二包数据，可以在这里直接退出循环
+    text = second_data["choices"][0]["text"]
+    assert data["prompt"] in text, "echo回显不正确"
+    position = text.find(data["prompt"])
+    assert position == 0, "echo回显没有在靠前的位置"
+
+
+def test_completion_echo_stream_one_prompt():
+    """
+    测试echo参数在流式回复中，且设置为仅回复一个prompt
+    """
+    data = {
+        "prompt": "水果的营养价值是如何的？",
+        "stream": True,
+        "stream_options": {"include_usage": True, "continuous_usage_stats": True},
+        "echo": True,
+        "max_tokens": 2
+    }
+    
+    payload = build_request_payload(TEMPLATE, data)
+    resp = send_request(COMPLETIONS_URL, payload, stream=True)
+    last_data = None
+    # 初始化计数器
+    counter = 0
+    second_data = None
+    for line in resp.iter_lines(decode_unicode=True):
+        if line.strip() == "data: [DONE]":
+            break
+        if line.strip() == "" or not line.startswith("data: "):
+            continue
+        line = line[len("data: "):]
+        stream_data = json.loads(line)
+        counter += 1
+        if counter == 1:  # 当计数器为1时，保存第一包数据
+            second_data = stream_data
+            break  # 如果只需要第二包数据，可以在这里直接退出循环
+    text = second_data["choices"][0]["text"]
+    assert data["prompt"] in text, "echo回显不正确"
+    position = text.find(data["prompt"])
+    assert position == 0, "echo回显没有在靠前的位置"
+
+
+def test_completion_echo_stream_more_prompt():
+    """
+    测试echo参数在流式回复中，且设置为回复多个prompt
+    """
+    data = {
+        "prompt": ["水果的营养价值是如何的？","水的化学式是什么？"],
+        "stream": True,
+        "stream_options": {"include_usage": True, "continuous_usage_stats": True},
+        "echo": True,
+        "max_tokens": 2,
+        "return_token_ids": True
+    }
+    
+    payload = build_request_payload(TEMPLATE, data)
+    resp = send_request(COMPLETIONS_URL, payload, stream=True)
+    last_data = None
+    # 初始化计数器
+    counter = 0
+    second_data = None
+    # 初始化字典来存储每个index的第二包数据
+    second_data_by_index = {0: None, 1: None}
+    # 初始化字典来记录每个index的包计数
+    packet_count_by_index = {0: 0, 1: 0}
+
+    for line in resp.iter_lines(decode_unicode=True):
+        if line.strip() == "data: [DONE]":
+            break
+        if line.strip() == "" or not line.startswith("data: "):
+            continue
+        line = line[len("data: "):]
+        stream_data = json.loads(line)
+        
+        for choice in stream_data.get("choices", []):
+            index = choice.get("index")
+            if index in packet_count_by_index:
+                packet_count_by_index[index] += 1
+                if packet_count_by_index[index] == 2:
+                    second_data_by_index[index] = choice
+                    if all(value is not None for value in second_data_by_index.values()):
+                        break
+    text_0 = second_data_by_index[0]["text"]
+    text_1 = second_data_by_index[1]["text"]
+    assert data["prompt"][0] in text_0, "echo回显不正确"
+    assert data["prompt"][1] in text_1, "echo回显不正确"
+    position_0 = text_0.find(data["prompt"][0])
+    assert position_0 == 0, "prompt[0]的echo回显没有在靠前的位置"
+    position_1 = text_1.find(data["prompt"][1])
+    assert position_1 == 0, "prompt[1]的echo回显没有在靠前的位置"
+
+
+def test_completion_echo_one_prompt():
+    """
+    测试echo参数在非流式回复中，且设置为仅发送一个prompt
+    """
+    data = {
+        "stream": False,
+        "prompt": "水果的营养价值是如何的？",
+        "echo": True,
+        "max_tokens": 100,
+    }
+    payload = build_request_payload(TEMPLATE, data)
+    response = send_request(COMPLETIONS_URL, payload)
+    response = response.json()
+
+    text = response["choices"][0]["text"]
+    assert data["prompt"] in text, "echo回显不正确"
+    position = text.find(data["prompt"])
+    assert position == 0, "echo回显没有在靠前的位置"
+
+
+def test_completion_echo_more_prompt():
+    """
+    测试echo参数在非流式回复中，且设置为发送多个prompt
+    """
+    data = {
+        "stream": False,
+        "prompt": ["水果的营养价值是如何的？","水的化学式是什么？"],
+        "echo": True,
+        "max_tokens": 100
+    }
+    payload = build_request_payload(TEMPLATE, data)
+    response = send_request(COMPLETIONS_URL, payload).json()
+    
+    text_0 = response["choices"][0]["text"]
+    text_1 = response["choices"][1]["text"]
+    assert data["prompt"][0] in text_0, "echo回显不正确"
+    assert data["prompt"][1] in text_1, "echo回显不正确"
+    position_0 = text_0.find(data["prompt"][0])
+    assert position_0 == 0, "prompt[0]的echo回显没有在靠前的位置"
+    position_1 = text_1.find(data["prompt"][1])
+    assert position_1 == 0, "prompt[1]的echo回显没有在靠前的位置"
+
+
+def test_completion_finish_length():
+    """
+    非流式回复中,因达到max_token截断检查finish_reasoning参数
+    """
+    data = {
+        "stream": False,
+        "prompt": "水果的营养价值是如何的？",
+        "max_tokens": 10
+    }
+    
+    payload = build_request_payload(TEMPLATE, data)
+    response = send_request(COMPLETIONS_URL, payload).json()
+
+    finish_reason = response["choices"][0]["finish_reason"]
+    assert finish_reason == "length", "达到max_token时，finish_reason不为length"
+
+
+def test_completion_finish_stop():
+    """
+    非流式回复中,模型自然回复完成，检查finish_reasoning参数
+    """
+    data = {
+        "stream": False,
+        "prompt": "简短的回答我：苹果是水果吗？"
+    }
+    
+    payload = build_request_payload(TEMPLATE, data)
+    response = send_request(COMPLETIONS_URL, payload).json()
+
+    finish_reason = response["choices"][0]["finish_reason"]
+    assert finish_reason == "stop", "无任何中介，finish_reason不为stop"
+
+    
\ No newline at end of file
diff --git a/tests/ce/server/test_return_token_ids.py b/tests/ce/server/test_return_token_ids.py
new file mode 100644
index 0000000000..941c217144
--- /dev/null
+++ b/tests/ce/server/test_return_token_ids.py
@@ -0,0 +1,183 @@
+#!/bin/env python3
+# -*- coding: utf-8 -*-
+# @author xujing43
+# encoding=utf-8 vi:ts=4:sw=4:expandtab:ft=python
+
+"""
+Checking for /v1/completions parameters
+"""
+
+import json
+
+from core import (
+    TEMPLATE,
+    URL,
+    build_request_payload,
+    send_request,
+)
+
+COMPLETIONS_URL = URL.replace("/v1/chat/completions", "/v1/completions")
+
+
+def test_completion_stream_text_after_process_raw_prediction():
+    """
+    /v1/completions接口, stream=True
+    返回属性"text_after_process"和"reasoning_content"
+    """
+    data = {
+        "prompt": "你是谁",
+        "stream": True,
+        "stream_options": {"include_usage": True, "continuous_usage_stats": True},
+        "max_tokens": 50,
+        "return_token_ids": True
+    }
+    
+    payload = build_request_payload(TEMPLATE, data)
+    resp = send_request(COMPLETIONS_URL, payload, stream=True)
+    for line in resp.iter_lines(decode_unicode=True):
+        if line.strip() == "data: [DONE]":
+            break
+        if line.strip() == "" or not line.startswith("data: "):
+            continue
+        line = line[len("data: "):]
+        response_data = json.loads(line)
+
+        choice = response_data["choices"][0]
+        if "prompt_token_ids" in choice and choice["prompt_token_ids"] is not None:
+            text_after_process = choice["text_after_process"]
+            assert data["prompt"] in text_after_process, "text_after_process取值结果不正确"
+        else:
+            raw_prediction = choice["raw_prediction"]
+            reasoning_content = choice["reasoning_content"]
+            text = choice["text"]
+            assert reasoning_content or text in raw_prediction, "raw_prediction取值结果不正确"
+        if "finish_reason" in line.strip() :
+            break
+
+    
+def test_completion_text_after_process_raw_predictio_return_tokrn_ids():
+    """
+    /v1/completions接口,非流式接口
+    返回属性"text_after_process"和"reasoning_content"
+    """
+    data = {
+        "stream": False,
+        "prompt": "你是谁",
+        "max_tokens": 50,
+        "return_token_ids": True
+    }
+    payload = build_request_payload(TEMPLATE, data)
+    resp = send_request(COMPLETIONS_URL, payload).json()
+
+    text_after_process = resp["choices"][0]["text_after_process"]
+    assert data["prompt"] in text_after_process, "text_after_process取值结果不正确"
+
+    raw_prediction = resp["choices"][0]["raw_prediction"]
+    reasoning_content = resp["choices"][0]["reasoning_content"]
+    text = resp["choices"][0]["text"]
+    assert reasoning_content or text in raw_prediction, "raw_prediction取值结果不正确"
+
+
+def test_completion_text_after_process_raw_prediction():
+    """
+    /v1/completions接口,无return_tokrn_ids参数
+    非流式接口中,无return token ids 属性"text_after_process"和"reasoning_content"值为null
+    """
+    data = {
+        "stream": False,
+        "prompt": "你是谁",
+        "max_tokens": 50
+    }
+    payload = build_request_payload(TEMPLATE, data)
+    resp = send_request(COMPLETIONS_URL, payload).json()
+
+    text_after_process = resp["choices"][0]["text_after_process"]
+    assert text_after_process is None, "text_after_process取值结果不正确"
+
+    raw_prediction = resp["choices"][0]["raw_prediction"]
+    assert raw_prediction is None, "raw_prediction取值结果不正确"
+
+
+def test_stream_text_after_process_raw_prediction():
+    """
+    /v1/chat/completions接口,"stream": True
+    返回属性"text_after_process"和"reasoning_content"
+    """
+    data = {
+        "messages": [{"role": "user", "content": "你是谁"}],
+        "stream": True,
+        "stream_options": {"include_usage": True, "continuous_usage_stats": True},
+        "max_tokens": 50,
+        "return_token_ids": True
+    }
+
+    payload = build_request_payload(TEMPLATE, data)
+    resp = send_request(URL, payload, stream=True)
+    for line in resp.iter_lines(decode_unicode=True):
+        if line.strip() == "data: [DONE]"  :
+            break
+        if line.strip() == "" or not line.startswith("data: "):
+            continue
+        line = line[len("data: "):]
+        response_data = json.loads(line)
+
+        choice = response_data["choices"][0]
+        if "prompt_token_ids" in choice["delta"] and choice["delta"]["prompt_token_ids"] is not None:
+            text_after_process = choice["delta"]["text_after_process"]
+            assert data["messages"][0]["content"] in text_after_process, "text_after_process取值结果不正确"
+        else:
+            raw_prediction = choice["delta"]["raw_prediction"]
+            reasoning_content = choice["delta"]["reasoning_content"]
+            content = choice["delta"]["content"]
+            assert reasoning_content or content in raw_prediction, "raw_prediction取值结果不正确"
+        if "finish_reason" in line.strip() :
+            break
+
+    
+def test_text_after_process_raw_prediction_return_tokrn_ids():
+    """
+    /v1/chat/completions接口,非流式接口
+    返回属性"text_after_process"和"reasoning_content"
+    """
+    data = {
+        "stream": False,
+        "messages": [{"role": "user", "content": "你是谁"}],
+        "max_tokens": 50,
+        "return_token_ids": True,
+        "logprobs": False,
+        "top_logprobs": None,
+    }
+    payload = build_request_payload(TEMPLATE, data)
+    resp = send_request(URL, payload).json()
+
+    text_after_process = resp["choices"][0]["message"]["text_after_process"]
+    assert data["messages"][0]["content"] in text_after_process, "text_after_process取值结果不正确"
+
+    raw_prediction = resp["choices"][0]["message"]["raw_prediction"]
+    reasoning_content = resp["choices"][0]["message"]["reasoning_content"]
+    text = resp["choices"][0]["message"]["content"]
+    assert reasoning_content or text in raw_prediction, "raw_prediction取值结果不正确"
+
+
+def test_text_after_process_raw_prediction():
+    """
+    /v1/chat/completions接口,无return_tokrn_ids参数
+    无return token ids 属性"text_after_process"和"reasoning_content"值为null
+    """
+    data = {
+        "stream": False,
+        "messages": [{"role": "user", "content": "你是谁"}],
+        "max_tokens": 50,
+        "logprobs": False,
+        "top_logprobs": None,
+    }
+    payload = build_request_payload(TEMPLATE, data)
+    resp = send_request(URL, payload).json()
+
+    text_after_process = resp["choices"][0]["message"]["text_after_process"]
+    assert text_after_process is None, "text_after_process取值结果不正确"
+
+    raw_prediction = resp["choices"][0]["message"]["raw_prediction"]
+    assert raw_prediction is None, "raw_prediction取值结果不正确"
+ 
+
diff --git a/tools/codestyle/pre_commit.sh b/tools/codestyle/pre_commit.sh
index 26d289b213..8544a4a97f 100644
--- a/tools/codestyle/pre_commit.sh
+++ b/tools/codestyle/pre_commit.sh
@@ -30,7 +30,7 @@ if ! [[ $(python -V 2>&1 | awk '{print $2}' | awk -F '.' '{print $1$2}') -ge 36
 fi
 
 # Exclude any files under the 'test/ce/server/' directory from code style checks.
-diff_files=$(git diff --name-only --diff-filter=ACMR ${BRANCH} | grep -v '^test/ce/server/')
+diff_files=$(git diff --name-only --diff-filter=ACMR ${BRANCH} | grep -v '^tests/ce/server/')
 num_diff_files=$(echo "$diff_files" | wc -l)
 echo -e "diff files between pr and ${BRANCH}:\n${diff_files}"