mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	llama : parameter conversion and loading fixes for PLaMo2 variants (#16075)
* Fix to use hidden_size_per_head * Fix num heads * Fix array * Fix loading weights * Support old GGUF converted by the previous version of llama.cpp * Update src/llama-model.cpp Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * Move shared parameter definitions to the outside of loop * Not calculating n_embd_head_k,v by n_embd / n_head --------- Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
This commit is contained in:
		@@ -42,7 +42,7 @@ struct llama_hparams {
 | 
			
		||||
    uint32_t n_embd;
 | 
			
		||||
    uint32_t n_embd_features = 0;
 | 
			
		||||
    uint32_t n_layer;
 | 
			
		||||
     int32_t n_layer_kv_from_start = -1; // if non-negative, the first n_layer_kv_from_start layers have KV cache
 | 
			
		||||
    int32_t n_layer_kv_from_start = -1; // if non-negative, the first n_layer_kv_from_start layers have KV cache
 | 
			
		||||
    uint32_t n_rot;
 | 
			
		||||
    uint32_t n_embd_head_k; // dimension of keys (d_k). d_q is assumed to be the same, but there are n_head q heads, and only n_head_kv k-v heads
 | 
			
		||||
    uint32_t n_embd_head_v; // dimension of values (d_v) aka n_embd_head
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user