server: add exceed_context_size_error type (#15780)

* server: add exceed_context_size_error type * change error code to 400
2025-10-31 08:51:55 +00:00 · 2025-09-04 11:50:23 +02:00
parent badb80cadb
commit a68d914426
2 changed files with 45 additions and 8 deletions
--- a/tools/server/tests/unit/test_chat_completion.py
+++ b/tools/server/tests/unit/test_chat_completion.py
@@ -385,3 +385,20 @@ def test_logit_bias():
    output_text = res.choices[0].message.content
    assert output_text
    assert all(output_text.find(" " + tok + " ") == -1 for tok in exclude)
+
+def test_context_size_exceeded():
+    global server
+    server.start()
+    res = server.make_request("POST", "/chat/completions", data={
+        "messages": [
+            {"role": "system", "content": "Book"},
+            {"role": "user", "content": "What is the best book"},
+        ] * 100, # make the prompt too long
+    })
+    assert res.status_code == 400
+    assert "error" in res.body
+    assert res.body["error"]["type"] == "exceed_context_size_error"
+    assert res.body["error"]["n_prompt_tokens"] > 0
+    assert server.n_ctx is not None
+    assert server.n_slots is not None
+    assert res.body["error"]["n_ctx"] == server.n_ctx // server.n_slots