mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	llama : refactor k-shift implementation + KV defragmentation (#5691)
* llama : refactor k-shift implementation ggml-ci * llama : rename llama_kv_cache_seq_shift to llama_kv_cache_seq_add * llama : cont k-shift refactoring + normalize type names ggml-ci * minor : fix MPI builds * llama : reuse n_rot from the build context ggml-ci * llama : revert enum name changes from this PR ggml-ci * llama : update llama_rope_type * llama : add comment about rope values * llama : fix build * passkey : apply kv cache updates explicitly ggml-ci * llama : change name to llama_kv_cache_update() * llama : add llama_kv_cache_seq_pos_max() * passkey : fix llama_kv_cache_seq_pos_max() usage * llama : some llama_kv_cell simplifications * llama : add llama_kv_cache_compress (EXPERIMENTAL) * llama : add alternative KV cache merging (EXPERIMENTAL) * llama : add llama_kv_cache_defrag * llama : comments * llama : remove llama_kv_cache_compress will add in a separate PR ggml-ci * llama : defragment via non-overlapping moves * llama : ggml_graph based defrag implementation ggml-ci * llama : switch the loop order in build_defrag * llama : add comments
This commit is contained in:
		@@ -447,8 +447,8 @@ int main(int argc, char ** argv) {
 | 
			
		||||
                LOG("context full, swapping: n_past = %d, n_left = %d, n_ctx = %d, n_keep = %d, n_discard = %d\n",
 | 
			
		||||
                    n_past, n_left, n_ctx, params.n_keep, n_discard);
 | 
			
		||||
 | 
			
		||||
                llama_kv_cache_seq_rm   (ctx, 0, params.n_keep + 1            , params.n_keep + n_discard + 1);
 | 
			
		||||
                llama_kv_cache_seq_shift(ctx, 0, params.n_keep + 1 + n_discard, n_past, -n_discard);
 | 
			
		||||
                llama_kv_cache_seq_rm (ctx, 0, params.n_keep + 1            , params.n_keep + n_discard + 1);
 | 
			
		||||
                llama_kv_cache_seq_add(ctx, 0, params.n_keep + 1 + n_discard, n_past, -n_discard);
 | 
			
		||||
 | 
			
		||||
                n_past -= n_discard;
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user