server : use slot context size instead of training context size

2025-11-06 09:46:50 +00:00 · 2025-10-28 11:39:07 +02:00
parent e776168267
commit 7ebe7f77a1
2 changed files with 4 additions and 6 deletions
--- a/tools/server/tests/unit/test_ctx_shift.py
+++ b/tools/server/tests/unit/test_ctx_shift.py
@@ -45,7 +45,7 @@ def test_ctx_shift_enabled():

@pytest.mark.parametrize("n_predict,n_token_output,truncated", [
    (64, 64, False),
-    (-1, 120, True),
+    (-1, 248, True), # 8 tokens prompt + 248 tokens generated = 256 tokens total
 ])
 def test_ctx_shift_disabled_short_prompt(n_predict: int, n_token_output: int, truncated: bool):
    global server