server : fix crash when prompt exceeds context size (#3996)

2025-11-04 09:32:00 +00:00 · 2023-11-11 05:48:21 +00:00
parent 34b0a08207
commit d96ca7ded7
1 changed files with 29 additions and 29 deletions
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -1557,15 +1557,6 @@ struct llama_server_context
                    slot.num_prompt_tokens = prompt_tokens.size();
                    if (!slot.params.cache_prompt)
                    {
                        llama_sampling_reset(slot.ctx_sampling);
                        slot.n_past = 0;
                        slot.num_prompt_tokens_processed = slot.num_prompt_tokens;
                    }
                    else
                    {
                    if (slot.params.n_keep < 0)
                    {
                        slot.params.n_keep = slot.num_prompt_tokens;
@@ -1595,6 +1586,15 @@ struct llama_server_context
                        GGML_ASSERT(slot.num_prompt_tokens < slot.n_ctx);
                    }
                    if (!slot.params.cache_prompt)
                    {
                        llama_sampling_reset(slot.ctx_sampling);
                        slot.n_past = 0;
                        slot.num_prompt_tokens_processed = slot.num_prompt_tokens;
                    }
                    else
                    {
                        // push the prompt into the sampling context (do not apply grammar)
                        for (auto &token : prompt_tokens)
                        {