mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	llama : allow gguf RoPE keys to be overridden with defaults (#3240)
This commit is contained in:
		
							
								
								
									
										56
									
								
								llama.cpp
									
									
									
									
									
								
							
							
						
						
									
										56
									
								
								llama.cpp
									
									
									
									
									
								
							| @@ -929,23 +929,22 @@ static const size_t kB = 1024; | ||||
| static const size_t MB = kB*kB; | ||||
| static const size_t GB = kB*kB*kB; | ||||
|  | ||||
| // default hparams (LLaMA 7B) | ||||
| struct llama_hparams { | ||||
|     uint32_t n_vocab     = 32000; | ||||
|     uint32_t n_ctx_train = 2048;  // the context size used during training | ||||
|     uint32_t n_ctx       = 512;   // the context size used during inference | ||||
|     uint32_t n_embd      = 4096; | ||||
|     uint32_t n_head      = 32; | ||||
|     uint32_t n_head_kv   = 32; | ||||
|     uint32_t n_layer     = 32; | ||||
|     uint32_t n_rot       = 64; | ||||
|     uint32_t n_ff        = 11008; | ||||
|     uint32_t n_vocab; | ||||
|     uint32_t n_ctx_train; // context size the model was trained on | ||||
|     uint32_t n_ctx;       // context size used during inference | ||||
|     uint32_t n_embd; | ||||
|     uint32_t n_head; | ||||
|     uint32_t n_head_kv; | ||||
|     uint32_t n_layer; | ||||
|     uint32_t n_rot; | ||||
|     uint32_t n_ff; | ||||
|  | ||||
|     float f_norm_eps     = 1e-5; | ||||
|     float f_norm_rms_eps = 1e-5; | ||||
|     float f_norm_eps; | ||||
|     float f_norm_rms_eps; | ||||
|  | ||||
|     float rope_freq_base  = 10000.0f; | ||||
|     float rope_freq_scale = 1.0f; | ||||
|     float rope_freq_base; | ||||
|     float rope_freq_scale; | ||||
|  | ||||
|     bool operator!=(const llama_hparams & other) const { | ||||
|         return static_cast<bool>(memcmp(this, &other, sizeof(llama_hparams))); // NOLINT | ||||
| @@ -1076,7 +1075,7 @@ struct llama_model { | ||||
|  | ||||
|     std::string name = "n/a"; | ||||
|  | ||||
|     llama_hparams hparams; | ||||
|     llama_hparams hparams = {}; | ||||
|     llama_vocab   vocab; | ||||
|  | ||||
|     struct ggml_tensor * tok_embeddings; | ||||
| @@ -1674,28 +1673,17 @@ static void llm_load_hparams( | ||||
|     hparams.n_head_kv = hparams.n_head; | ||||
|     GGUF_GET_KEY(ctx, hparams.n_head_kv, gguf_get_val_u32, GGUF_TYPE_UINT32, false, kv(LLM_KV_ATTENTION_HEAD_COUNT_KV)); | ||||
|  | ||||
|     // TODO: manually setting rope freq base and scale should override this | ||||
|     // FIXME: partial fix when the param specified is not the default value, but | ||||
|     //        will not work for overriding the model value to the params default | ||||
|  | ||||
|     llama_context_params defaults = llama_context_default_params(); | ||||
|  | ||||
|     // rope_freq_base | ||||
|     { | ||||
|         float ropebase = 10000.0f; | ||||
|         GGUF_GET_KEY(ctx, ropebase, gguf_get_val_f32, GGUF_TYPE_FLOAT32, false, kv(LLM_KV_ROPE_FREQ_BASE)); | ||||
|         if (ropebase != 10000.0f && rope_freq_base == defaults.rope_freq_base) { | ||||
|             rope_freq_base = ropebase; | ||||
|         } | ||||
|     // rope_freq_base (optional) | ||||
|     if (rope_freq_base == 0.0f) { | ||||
|         rope_freq_base = 10000.0f; | ||||
|         GGUF_GET_KEY(ctx, rope_freq_base, gguf_get_val_f32, GGUF_TYPE_FLOAT32, false, kv(LLM_KV_ROPE_FREQ_BASE)); | ||||
|     } | ||||
|  | ||||
|     // rope_freq_scale (inverse of the kv) is optional | ||||
|     { | ||||
|     if (rope_freq_scale == 0.0f) { | ||||
|         float ropescale = 1.0f; | ||||
|         GGUF_GET_KEY(ctx, ropescale, gguf_get_val_f32, GGUF_TYPE_FLOAT32, false, kv(LLM_KV_ROPE_SCALE_LINEAR)); | ||||
|         if (ropescale != 1.0f && rope_freq_scale == defaults.rope_freq_scale) { | ||||
|             rope_freq_scale = 1.0f/ropescale; | ||||
|         } | ||||
|         rope_freq_scale = 1.0f/ropescale; | ||||
|     } | ||||
|  | ||||
|     // sanity check for n_rot (optional) | ||||
| @@ -6188,8 +6176,8 @@ struct llama_context_params llama_context_default_params() { | ||||
|         /*.n_gpu_layers                =*/ 0, | ||||
|         /*.main_gpu                    =*/ 0, | ||||
|         /*.tensor_split                =*/ nullptr, | ||||
|         /*.rope_freq_base              =*/ 10000.0f, | ||||
|         /*.rope_freq_scale             =*/ 1.0f, | ||||
|         /*.rope_freq_base              =*/ 0.0f, | ||||
|         /*.rope_freq_scale             =*/ 0.0f, | ||||
|         /*.progress_callback           =*/ nullptr, | ||||
|         /*.progress_callback_user_data =*/ nullptr, | ||||
|         /*.low_vram                    =*/ false, | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Cebtenzzre
					Cebtenzzre