mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	llama : refactor llama_context, llama_kv_cache, llm_build_context (#12181)
* llama : refactor llama_context, llama_kv_cache, llm_build_context ggml-ci * graph : don't mutate the KV cache during defrag ggml-ci * context : reduce virtuals + remove test function ggml-ci * context : move interface implementation to source file + factory ggml-ci * graph : move KV cache build functions to llama_context impl ggml-ci * graph : remove model reference from build_pooling ggml-ci * graph : remove llama_model reference ggml-ci * kv_cache : provide rope factors ggml-ci * graph : rework inputs to use only unique_ptr, remove attn input abstraction ggml-ci * context : remove llama_context_i abstraction ggml-ci * context : clean-up ggml-ci * graph : clean-up ggml-ci * llama : remove redundant keywords (struct, enum) ggml-ci * model : adapt gemma3 ggml-ci * graph : restore same attention ops as on master ggml-ci * llama : remove TODO + fix indent ggml-ci
This commit is contained in:
		| @@ -420,14 +420,14 @@ int main(int argc, char ** argv) { | ||||
|             { | ||||
|                 LOG_DBG("keeping sequence %d, n_past_tgt = %d, n_past_dft = %d\n", s_keep, n_past_tgt, n_past_dft); | ||||
|  | ||||
|                 llama_kv_cache_seq_keep(ctx_dft, s_keep); | ||||
|                 llama_kv_cache_seq_cp  (ctx_dft, s_keep, 0, -1, -1); | ||||
|                 llama_kv_cache_seq_keep(ctx_dft, 0); | ||||
|                 llama_kv_self_seq_keep(ctx_dft, s_keep); | ||||
|                 llama_kv_self_seq_cp  (ctx_dft, s_keep, 0, -1, -1); | ||||
|                 llama_kv_self_seq_keep(ctx_dft, 0); | ||||
|  | ||||
|                 llama_kv_cache_seq_rm  (ctx_tgt, s_keep, n_past_tgt, -1); | ||||
|                 llama_kv_cache_seq_keep(ctx_tgt, s_keep); | ||||
|                 llama_kv_cache_seq_cp  (ctx_tgt, s_keep, 0, -1, -1); | ||||
|                 llama_kv_cache_seq_keep(ctx_tgt, 0); | ||||
|                 llama_kv_self_seq_rm  (ctx_tgt, s_keep, n_past_tgt, -1); | ||||
|                 llama_kv_self_seq_keep(ctx_tgt, s_keep); | ||||
|                 llama_kv_self_seq_cp  (ctx_tgt, s_keep, 0, -1, -1); | ||||
|                 llama_kv_self_seq_keep(ctx_tgt, 0); | ||||
|             } | ||||
|  | ||||
|             for (int s = 0; s < n_seq_dft; ++s) { | ||||
| @@ -444,7 +444,7 @@ int main(int argc, char ** argv) { | ||||
|             common_batch_clear(batch_dft); | ||||
|             common_batch_add  (batch_dft, token_id, n_past_dft, { 0 }, true); | ||||
|  | ||||
|             llama_kv_cache_seq_rm(ctx_dft, 0, n_past_dft, -1); | ||||
|             llama_kv_self_seq_rm(ctx_dft, 0, n_past_dft, -1); | ||||
|             // LOG_DBG("dft batch: %s\n", LOG_BATCH_TOSTR_PRETTY(ctx_dft, batch_dft).c_str()); | ||||
|             llama_decode(ctx_dft, batch_dft); | ||||
|  | ||||
| @@ -503,8 +503,8 @@ int main(int argc, char ** argv) { | ||||
|                     if (n_seq_cur < n_seq_dft && cur_p->data[f].p > p_draft_split) { | ||||
|                         LOG_DBG("splitting seq %3d into %3d\n", s, n_seq_cur); | ||||
|  | ||||
|                         llama_kv_cache_seq_rm(ctx_dft,    n_seq_cur, -1, -1); | ||||
|                         llama_kv_cache_seq_cp(ctx_dft, s, n_seq_cur, -1, -1); | ||||
|                         llama_kv_self_seq_rm(ctx_dft,    n_seq_cur, -1, -1); | ||||
|                         llama_kv_self_seq_cp(ctx_dft, s, n_seq_cur, -1, -1); | ||||
|  | ||||
|                         // all previous tokens from this branch are now also part of the new branch | ||||
|                         for (int t = 0; t < batch_tgt.n_tokens; ++t) { | ||||
| @@ -585,9 +585,9 @@ int main(int argc, char ** argv) { | ||||
|  | ||||
|         // evaluate the target model on the drafted tokens | ||||
|         { | ||||
|             llama_kv_cache_seq_keep(ctx_tgt, 0); | ||||
|             llama_kv_self_seq_keep(ctx_tgt, 0); | ||||
|             for (int s = 1; s < n_seq_dft; ++s) { | ||||
|                 llama_kv_cache_seq_cp(ctx_tgt, 0, s, -1, -1); | ||||
|                 llama_kv_self_seq_cp(ctx_tgt, 0, s, -1, -1); | ||||
|             } | ||||
|  | ||||
|             // LOG_DBG("target batch: %s\n", LOG_BATCH_TOSTR_PRETTY(ctx_tgt, batch_tgt).c_str()); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov