server : do context shift only while generating (#17000)

This commit is contained in:
Georgi Gerganov
2025-11-04 19:21:36 +02:00
committed by GitHub
parent afd353246d
commit 66d8eccd42

View File

@@ -3587,7 +3587,7 @@ struct server_context {
// apply context-shift if needed // apply context-shift if needed
// TODO: simplify and improve // TODO: simplify and improve
for (server_slot & slot : slots) { for (server_slot & slot : slots) {
if (slot.is_processing() && slot.prompt.n_tokens() + 1 >= slot.n_ctx) { if (slot.state == SLOT_STATE_GENERATING && slot.prompt.n_tokens() + 1 >= slot.n_ctx) {
if (!params_base.ctx_shift) { if (!params_base.ctx_shift) {
// this check is redundant (for good) // this check is redundant (for good)
// we should never get here, because generation should already stopped in process_token() // we should never get here, because generation should already stopped in process_token()