server : handle failures to restore host cache (#17078)

* server : handle failures to restore host cache

* server : add tests for the prompt cache
This commit is contained in:
Georgi Gerganov
2025-11-09 14:27:05 +02:00
committed by GitHub
parent ef1d826997
commit cb1adf8851
2 changed files with 45 additions and 0 deletions

View File

@@ -1690,6 +1690,9 @@ struct server_slot {
bool res = prompt_cache.load(prompt, tokens, ctx, id);
if (!res) {
SLT_WRN(*this, "%s", "failed to load prompt from cache\n");
llama_memory_seq_rm(llama_get_memory(ctx), id, -1, -1);
prompt.tokens.clear();
}
}