mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	kv-cache : support layer reuse (#15504)
* kv-cache : support layer reuse ggml-ci * cont : update comments [no ci]
This commit is contained in:
		| @@ -15,18 +15,14 @@ | ||||
| //       see the implementation of llama_kv_cache_context_i for an example how to do it | ||||
| class llama_memory_recurrent : public llama_memory_i { | ||||
| public: | ||||
|  | ||||
|     // this callback is used to filter out layers that should not be included in the cache | ||||
|     using layer_filter_cb = std::function<bool(int32_t il)>; | ||||
|  | ||||
|     llama_memory_recurrent( | ||||
|             const llama_model &  model, | ||||
|               layer_filter_cb && filter, | ||||
|                     ggml_type    type_r, | ||||
|                     ggml_type    type_s, | ||||
|                          bool    offload, | ||||
|                      uint32_t    mem_size, | ||||
|                      uint32_t    n_seq_max); | ||||
|             const llama_model & model, | ||||
|                     ggml_type   type_r, | ||||
|                     ggml_type   type_s, | ||||
|                          bool   offload, | ||||
|                      uint32_t   mem_size, | ||||
|                      uint32_t   n_seq_max, | ||||
|         const layer_filter_cb & filter); | ||||
|  | ||||
|     ~llama_memory_recurrent() = default; | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov