mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-06 09:46:50 +00:00
fixed multipliers
This commit is contained in:
@@ -6645,11 +6645,11 @@ class FalconH1Model(Mamba2Model):
|
|||||||
self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"])
|
self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"])
|
||||||
self.gguf_writer.add_key_length(self.hparams["head_dim"])
|
self.gguf_writer.add_key_length(self.hparams["head_dim"])
|
||||||
self.gguf_writer.add_value_length(self.hparams["head_dim"])
|
self.gguf_writer.add_value_length(self.hparams["head_dim"])
|
||||||
self.gguf_writer.add_float32("falcon_h1.key_multiplier", self.hparams["key_multiplier"])
|
self.gguf_writer.add_float64("falcon_h1.key_multiplier", self.hparams["key_multiplier"])
|
||||||
|
|
||||||
## Other params
|
## Other params
|
||||||
self.gguf_writer.add_float32("falcon_h1.lm_head_multiplier", self.hparams["lm_head_multiplier"])
|
self.gguf_writer.add_float64("falcon_h1.lm_head_multiplier", self.hparams["lm_head_multiplier"])
|
||||||
self.gguf_writer.add_float32("falcon_h1.embedding_multiplier", self.hparams["embedding_multiplier"])
|
self.gguf_writer.add_float64("falcon_h1.embedding_multiplier", self.hparams["embedding_multiplier"])
|
||||||
|
|
||||||
## Validation ##
|
## Validation ##
|
||||||
assert self.hparams.get("hidden_act") in [None, "silu"], "Only SILU activation supported"
|
assert self.hparams.get("hidden_act") in [None, "silu"], "Only SILU activation supported"
|
||||||
@@ -6666,15 +6666,15 @@ class FalconH1Model(Mamba2Model):
|
|||||||
self.find_hparam(["num_attention_heads"]))
|
self.find_hparam(["num_attention_heads"]))
|
||||||
|
|
||||||
# Add multipliers as metadata instead of tensors
|
# Add multipliers as metadata instead of tensors
|
||||||
self.gguf_writer.add_float32("falcon_h1.attention_in_multiplier", self.attention_in_multiplier)
|
self.gguf_writer.add_float64("falcon_h1.attention_in_multiplier", self.attention_in_multiplier)
|
||||||
self.gguf_writer.add_float32("falcon_h1.attention_out_multiplier", self.attention_out_multiplier)
|
self.gguf_writer.add_float64("falcon_h1.attention_out_multiplier", self.attention_out_multiplier)
|
||||||
self.gguf_writer.add_float32("falcon_h1.ssm_in_multiplier", self.ssm_in_multiplier)
|
self.gguf_writer.add_float64("falcon_h1.ssm_in_multiplier", self.ssm_in_multiplier)
|
||||||
self.gguf_writer.add_float32("falcon_h1.ssm_out_multiplier", self.ssm_out_multiplier)
|
self.gguf_writer.add_float64("falcon_h1.ssm_out_multiplier", self.ssm_out_multiplier)
|
||||||
|
|
||||||
# Add MLP multipliers
|
# Add MLP multipliers
|
||||||
if isinstance(self.mlp_multipliers, (list, tuple)) and len(self.mlp_multipliers) == 2:
|
if isinstance(self.mlp_multipliers, (list, tuple)) and len(self.mlp_multipliers) == 2:
|
||||||
self.gguf_writer.add_float32("falcon_h1.mlp_gate_multiplier", self.mlp_multipliers[0])
|
self.gguf_writer.add_float64("falcon_h1.mlp_gate_multiplier", self.mlp_multipliers[0])
|
||||||
self.gguf_writer.add_float32("falcon_h1.mlp_down_multiplier", self.mlp_multipliers[1])
|
self.gguf_writer.add_float64("falcon_h1.mlp_down_multiplier", self.mlp_multipliers[1])
|
||||||
|
|
||||||
# Add has MuP flag if SSM multipliers are present
|
# Add has MuP flag if SSM multipliers are present
|
||||||
if self.ssm_multipliers is not None:
|
if self.ssm_multipliers is not None:
|
||||||
@@ -6684,7 +6684,7 @@ class FalconH1Model(Mamba2Model):
|
|||||||
self.gguf_writer.add_bool("falcon_h1.mamba_use_mlp", self.find_hparam(["mamba_use_mlp"], optional=True))
|
self.gguf_writer.add_bool("falcon_h1.mamba_use_mlp", self.find_hparam(["mamba_use_mlp"], optional=True))
|
||||||
self.gguf_writer.add_bool("falcon_h1.mamba_norm_before_gate", self.find_hparam(["mamba_norm_before_gate"], optional=True))
|
self.gguf_writer.add_bool("falcon_h1.mamba_norm_before_gate", self.find_hparam(["mamba_norm_before_gate"], optional=True))
|
||||||
self.gguf_writer.add_bool("falcon_h1.mamba_rms_norm", self.find_hparam(["mamba_rms_norm"], optional=True))
|
self.gguf_writer.add_bool("falcon_h1.mamba_rms_norm", self.find_hparam(["mamba_rms_norm"], optional=True))
|
||||||
self.gguf_writer.add_float32("falcon_h1.rope_theta", self.find_hparam(["rope_theta"], optional=True))
|
self.gguf_writer.add_float64("falcon_h1.rope_theta", self.find_hparam(["rope_theta"], optional=True))
|
||||||
|
|
||||||
###### CONVERSION LOGIC ######
|
###### CONVERSION LOGIC ######
|
||||||
|
|
||||||
|
|||||||
@@ -122,17 +122,17 @@ struct llama_hparams {
|
|||||||
bool mamba_use_mlp = false;
|
bool mamba_use_mlp = false;
|
||||||
bool mamba_norm_before_gate = false;
|
bool mamba_norm_before_gate = false;
|
||||||
bool mamba_rms_norm = false;
|
bool mamba_rms_norm = false;
|
||||||
float attention_in_multiplier = 1.0f;
|
double attention_in_multiplier = 1.0;
|
||||||
float attention_out_multiplier = 1.0f;
|
double attention_out_multiplier = 1.0;
|
||||||
float ssm_in_multiplier = 1.0f;
|
double ssm_in_multiplier = 1.0;
|
||||||
float ssm_out_multiplier = 1.0f;
|
double ssm_out_multiplier = 1.0;
|
||||||
float mlp_gate_multiplier = 1.0f;
|
double mlp_gate_multiplier = 1.0;
|
||||||
float mlp_down_multiplier = 1.0f;
|
double mlp_down_multiplier = 1.0;
|
||||||
float key_multiplier = 1.0f;
|
double key_multiplier = 1.0;
|
||||||
float lm_head_multiplier = 1.0f;
|
double lm_head_multiplier = 1.0;
|
||||||
float rope_theta = 10000.0f;
|
double rope_theta = 10000.0;
|
||||||
|
double embedding_multiplier = 1.0;
|
||||||
bool ssm_has_mup = false;
|
bool ssm_has_mup = false;
|
||||||
float embedding_multiplier = 1.0f;
|
|
||||||
uint32_t vocab_size = 0;
|
uint32_t vocab_size = 0;
|
||||||
uint32_t intermediate_size = 0;
|
uint32_t intermediate_size = 0;
|
||||||
float mamba_expand = 0.0f;
|
float mamba_expand = 0.0f;
|
||||||
|
|||||||
@@ -400,6 +400,7 @@ namespace GGUFMeta {
|
|||||||
|
|
||||||
template bool llama_model_loader::get_key<bool> (enum llm_kv kid, bool & result, bool required);
|
template bool llama_model_loader::get_key<bool> (enum llm_kv kid, bool & result, bool required);
|
||||||
template bool llama_model_loader::get_key<float> (enum llm_kv kid, float & result, bool required);
|
template bool llama_model_loader::get_key<float> (enum llm_kv kid, float & result, bool required);
|
||||||
|
template bool llama_model_loader::get_key<double> (enum llm_kv kid, double & result, bool required);
|
||||||
template bool llama_model_loader::get_key<uint32_t> (enum llm_kv kid, uint32_t & result, bool required);
|
template bool llama_model_loader::get_key<uint32_t> (enum llm_kv kid, uint32_t & result, bool required);
|
||||||
template bool llama_model_loader::get_key<std::string>(enum llm_kv kid, std::string & result, bool required);
|
template bool llama_model_loader::get_key<std::string>(enum llm_kv kid, std::string & result, bool required);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user