Skip to content

Commit 6aa892e

Browse files
authored
server : do not return error out of context (with ctx shift disabled) (#13577)
1 parent aea9f8b commit 6aa892e

File tree

2 files changed

+26
-0
lines changed

2 files changed

+26
-0
lines changed

tools/server/server.cpp

+8
Original file line numberDiff line numberDiff line change
@@ -2251,6 +2251,14 @@ struct server_context {
22512251
slot.has_next_token = true;
22522252
}
22532253

2254+
// if context shifting is disabled, make sure that we don't run out of context
2255+
if (!params_base.ctx_shift && slot.n_past + 1 >= slot.n_ctx) {
2256+
slot.stop = STOP_TYPE_LIMIT;
2257+
slot.has_next_token = false;
2258+
2259+
SLT_DBG(slot, "stopped due to running out of context, n_past = %d, n_ctx = %d\n", slot.n_past, slot.n_ctx);
2260+
}
2261+
22542262
// check the limits
22552263
if (slot.n_decoded > 0 && slot.has_next_token && !slot.has_budget(params_base)) {
22562264
slot.stop = STOP_TYPE_LIMIT;

tools/server/tests/unit/test_ctx_shift.py

+18
Original file line numberDiff line numberDiff line change
@@ -65,3 +65,21 @@ def test_ctx_shift_disabled_long_prompt():
6565
assert res.status_code != 200
6666
assert "error" in res.body
6767
assert "exceeds the available context size" in res.body["error"]["message"]
68+
69+
def test_ctx_shift_disabled_stream():
70+
global server
71+
server.disable_ctx_shift = True
72+
server.start()
73+
res = server.make_stream_request("POST", "/v1/completions", data={
74+
"n_predict": 256,
75+
"prompt": "Once",
76+
"stream": True,
77+
})
78+
content = ""
79+
for data in res:
80+
choice = data["choices"][0]
81+
if choice["finish_reason"] == "length":
82+
assert len(content) > 0
83+
else:
84+
assert choice["finish_reason"] is None
85+
content += choice["text"]

0 commit comments

Comments
 (0)