server : fix crash when using verbose output with input tokens that are not in printable range (#12178) (#12338)

* Fix DOS index bug * Remove new APIs * remove extra line * Remove from API * Add extra newline * Update examples/server/server.cpp --------- Co-authored-by: Xuan-Son Nguyen <thichthat@gmail.com>
2025-10-30 08:42:00 +00:00 · 2025-03-13 06:10:05 -04:00
parent f08f4b3187
commit 2048b5913d
1 changed files with 17 additions and 0 deletions
--- a/examples/server/server.cpp
+++ b/examples/server/server.cpp
@@ -2040,6 +2040,18 @@ struct server_context {
        return ret;
    }
    bool can_be_detokenized(const struct llama_context * ctx, const std::vector<llama_token> & tokens) {
        const llama_model * model = llama_get_model(ctx);
        const llama_vocab * vocab = llama_model_get_vocab(model);
        const int32_t n_vocab = llama_vocab_n_tokens(vocab);
        for (const auto & token : tokens) {
            if (token < 0 || token >= n_vocab) {
                return false;
            }
        }
        return true;
    }
    bool launch_slot_with_task(server_slot & slot, const server_task & task) {
        slot.reset();
        slot.id_task       = task.id;
@@ -2054,6 +2066,11 @@ struct server_context {
            slot.lora = task.params.lora;
        }
        bool can_detokenize = can_be_detokenized(ctx, slot.prompt_tokens);
        if (!can_detokenize) {
            send_error(task, "Prompt contains invalid tokens", ERROR_TYPE_INVALID_REQUEST);
            return false;
        }
        SLT_DBG(slot, "launching slot : %s\n", safe_json_to_str(slot.to_json()).c_str());
        if (slot.n_predict > 0 && slot.params.n_predict > slot.n_predict) {