mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	memory : handle kv_unified for hybrid models (#15050)
This commit is contained in:
		| @@ -25,6 +25,7 @@ llama_memory_hybrid::llama_memory_hybrid( | ||||
|                          /* common */ | ||||
|              uint32_t    n_seq_max, | ||||
|                  bool    offload, | ||||
|                  bool    unified, | ||||
|                          /* layer filters */ | ||||
|       layer_filter_cb && filter_attn, | ||||
|       layer_filter_cb && filter_recr) : | ||||
| @@ -38,7 +39,7 @@ llama_memory_hybrid::llama_memory_hybrid( | ||||
|         type_v, | ||||
|         v_trans, | ||||
|         offload, | ||||
|         1, | ||||
|         unified, | ||||
|         kv_size, | ||||
|         n_seq_max, | ||||
|         n_pad, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 compilade
					compilade