llama : remove implicit recurrent state rollbacks

This commit is contained in:
Francis Couture-Harpin
2024-11-24 20:35:30 -05:00
parent 124c222f76
commit 8006f3b3c8
25 changed files with 411 additions and 1119 deletions

View File

@@ -191,7 +191,7 @@ int main(int argc, char ** argv){
// KV cache management
// clean the cache of draft tokens that weren't accepted
// FIXME: recurrent and hybrid models
llama_past_seq_rm(ctx, 0, n_past, -1);
llama_kv_cache_seq_rm(ctx, 0, n_past, -1);
common_batch_clear(batch_tgt);
common_batch_add(batch_tgt, draft[0], n_past, { 0 }, true);