mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-04 09:32:00 +00:00 
			
		
		
		
	kv-cache : support layer reuse (#15504)
* kv-cache : support layer reuse ggml-ci * cont : update comments [no ci]
This commit is contained in:
		@@ -41,6 +41,7 @@ struct llama_hparams {
 | 
			
		||||
    uint32_t n_embd;
 | 
			
		||||
    uint32_t n_embd_features = 0;
 | 
			
		||||
    uint32_t n_layer;
 | 
			
		||||
     int32_t n_layer_kv_from_start = -1; // if non-negative, the first n_layer_kv_from_start layers have KV cache
 | 
			
		||||
    uint32_t n_rot;
 | 
			
		||||
    uint32_t n_embd_head_k; // dimension of keys (d_k). d_q is assumed to be the same, but there are n_head q heads, and only n_head_kv k-v heads
 | 
			
		||||
    uint32_t n_embd_head_v; // dimension of values (d_v) aka n_embd_head
 | 
			
		||||
@@ -221,6 +222,11 @@ struct llama_hparams {
 | 
			
		||||
    uint32_t n_pos_per_embd() const;
 | 
			
		||||
 | 
			
		||||
    bool is_swa(uint32_t il) const;
 | 
			
		||||
 | 
			
		||||
    bool has_kv(uint32_t il) const;
 | 
			
		||||
 | 
			
		||||
    // number of layers for which has_kv() returns true
 | 
			
		||||
    uint32_t n_layer_kv() const;
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
static_assert(std::is_trivially_copyable<llama_hparams>::value, "llama_hparams must be trivially copyable");
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user