llama : keep track of used KV cells + better KV cache management

This commit is contained in:
Georgi Gerganov
2023-11-22 17:16:57 +02:00
parent 8e672efe63
commit 79cb8f0040
2 changed files with 40 additions and 7 deletions

View File

@@ -361,9 +361,12 @@ extern "C" {
// KV cache
//
// Returns the number of tokens in the KV cache
LLAMA_API DEPRECATED(int llama_get_kv_cache_token_count(const struct llama_context * ctx),
"avoid using this, it will be removed in the future, instead - count the tokens in user code");
// Returns the number of tokens in the KV cache (slow, use only for debug)
// If a KV cell has multiple sequences assigned to it, it will be counted multiple times
LLAMA_API int llama_get_kv_cache_token_count(const struct llama_context * ctx);
// Returns the number of used KV cells (i.e. have at least one sequence assigned to them)
LLAMA_API int llama_get_kv_cache_used_cells(const struct llama_context * ctx);
// Clear the KV cache
LLAMA_API void llama_kv_cache_clear(