llama : remove implicit recurrent state rollbacks

This commit is contained in:
Francis Couture-Harpin
2024-11-24 20:35:30 -05:00
parent 124c222f76
commit 8006f3b3c8
25 changed files with 411 additions and 1119 deletions

View File

@@ -410,15 +410,15 @@ int main(int argc, char ** argv) {
{
LOG_DBG("keeping sequence %d, n_past_tgt = %d, n_past_dft = %d\n", s_keep, n_past_tgt, n_past_dft);
llama_past_seq_keep(ctx_dft, s_keep);
llama_past_seq_cp (ctx_dft, s_keep, 0, -1, -1);
llama_past_seq_keep(ctx_dft, 0);
llama_kv_cache_seq_keep(ctx_dft, s_keep);
llama_kv_cache_seq_cp (ctx_dft, s_keep, 0, -1, -1);
llama_kv_cache_seq_keep(ctx_dft, 0);
// FIXME: recurrent and hybrid models
llama_past_seq_rm (ctx_tgt, s_keep, n_past_tgt, -1);
llama_past_seq_keep(ctx_tgt, s_keep);
llama_past_seq_cp (ctx_tgt, s_keep, 0, -1, -1);
llama_past_seq_keep(ctx_tgt, 0);
llama_kv_cache_seq_rm (ctx_tgt, s_keep, n_past_tgt, -1);
llama_kv_cache_seq_keep(ctx_tgt, s_keep);
llama_kv_cache_seq_cp (ctx_tgt, s_keep, 0, -1, -1);
llama_kv_cache_seq_keep(ctx_tgt, 0);
}
for (int s = 0; s < n_seq_dft; ++s) {
@@ -495,8 +495,8 @@ int main(int argc, char ** argv) {
if (n_seq_cur < n_seq_dft && cur_p->data[f].p > p_split) {
LOG_DBG("splitting seq %3d into %3d\n", s, n_seq_cur);
llama_past_seq_rm(ctx_dft, n_seq_cur, -1, -1);
llama_past_seq_cp(ctx_dft, s, n_seq_cur, -1, -1);
llama_kv_cache_seq_rm(ctx_dft, n_seq_cur, -1, -1);
llama_kv_cache_seq_cp(ctx_dft, s, n_seq_cur, -1, -1);
// all previous tokens from this branch are now also part of the new branch
for (int t = 0; t < batch_tgt.n_tokens; ++t) {
@@ -577,9 +577,9 @@ int main(int argc, char ** argv) {
// evaluate the target model on the drafted tokens
{
llama_past_seq_keep(ctx_tgt, 0);
llama_kv_cache_seq_keep(ctx_tgt, 0);
for (int s = 1; s < n_seq_dft; ++s) {
llama_past_seq_cp(ctx_tgt, 0, s, -1, -1);
llama_kv_cache_seq_cp(ctx_tgt, 0, s, -1, -1);
}
// LOG_DBG("target batch: %s\n", LOG_BATCH_TOSTR_PRETTY(ctx_tgt, batch_tgt).c_str());