llama : remove implicit recurrent state rollbacks

This commit is contained in:
Francis Couture-Harpin
2024-11-24 20:35:30 -05:00
parent 124c222f76
commit 8006f3b3c8
25 changed files with 411 additions and 1119 deletions

View File

@@ -1029,7 +1029,7 @@ struct server_context {
SRV_DBG("%s", "clearing KV cache\n");
// clear the entire KV cache
llama_past_clear(ctx);
llama_kv_cache_clear(ctx);
clean_kv_cache = false;
}
@@ -1760,7 +1760,7 @@ struct server_context {
// Erase token cache
const size_t n_erased = slot->cache_tokens.size();
llama_past_seq_rm(ctx, slot->id + 1, -1, -1);
llama_kv_cache_seq_rm(ctx, slot->id + 1, -1, -1);
slot->cache_tokens.clear();
server_task_result result;