diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 108f12be22..17feb9430c 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -6645,11 +6645,11 @@ class FalconH1Model(Mamba2Model): self.gguf_writer.add_layer_norm_rms_eps(self.hparams["rms_norm_eps"]) self.gguf_writer.add_key_length(self.hparams["head_dim"]) self.gguf_writer.add_value_length(self.hparams["head_dim"]) - self.gguf_writer.add_float32("falcon_h1.key_multiplier", self.hparams["key_multiplier"]) + self.gguf_writer.add_float64("falcon_h1.key_multiplier", self.hparams["key_multiplier"]) ## Other params - self.gguf_writer.add_float32("falcon_h1.lm_head_multiplier", self.hparams["lm_head_multiplier"]) - self.gguf_writer.add_float32("falcon_h1.embedding_multiplier", self.hparams["embedding_multiplier"]) + self.gguf_writer.add_float64("falcon_h1.lm_head_multiplier", self.hparams["lm_head_multiplier"]) + self.gguf_writer.add_float64("falcon_h1.embedding_multiplier", self.hparams["embedding_multiplier"]) ## Validation ## assert self.hparams.get("hidden_act") in [None, "silu"], "Only SILU activation supported" @@ -6666,15 +6666,15 @@ class FalconH1Model(Mamba2Model): self.find_hparam(["num_attention_heads"])) # Add multipliers as metadata instead of tensors - self.gguf_writer.add_float32("falcon_h1.attention_in_multiplier", self.attention_in_multiplier) - self.gguf_writer.add_float32("falcon_h1.attention_out_multiplier", self.attention_out_multiplier) - self.gguf_writer.add_float32("falcon_h1.ssm_in_multiplier", self.ssm_in_multiplier) - self.gguf_writer.add_float32("falcon_h1.ssm_out_multiplier", self.ssm_out_multiplier) + self.gguf_writer.add_float64("falcon_h1.attention_in_multiplier", self.attention_in_multiplier) + self.gguf_writer.add_float64("falcon_h1.attention_out_multiplier", self.attention_out_multiplier) + self.gguf_writer.add_float64("falcon_h1.ssm_in_multiplier", self.ssm_in_multiplier) + self.gguf_writer.add_float64("falcon_h1.ssm_out_multiplier", self.ssm_out_multiplier) # Add MLP multipliers if isinstance(self.mlp_multipliers, (list, tuple)) and len(self.mlp_multipliers) == 2: - self.gguf_writer.add_float32("falcon_h1.mlp_gate_multiplier", self.mlp_multipliers[0]) - self.gguf_writer.add_float32("falcon_h1.mlp_down_multiplier", self.mlp_multipliers[1]) + self.gguf_writer.add_float64("falcon_h1.mlp_gate_multiplier", self.mlp_multipliers[0]) + self.gguf_writer.add_float64("falcon_h1.mlp_down_multiplier", self.mlp_multipliers[1]) # Add has MuP flag if SSM multipliers are present if self.ssm_multipliers is not None: @@ -6684,7 +6684,7 @@ class FalconH1Model(Mamba2Model): self.gguf_writer.add_bool("falcon_h1.mamba_use_mlp", self.find_hparam(["mamba_use_mlp"], optional=True)) self.gguf_writer.add_bool("falcon_h1.mamba_norm_before_gate", self.find_hparam(["mamba_norm_before_gate"], optional=True)) self.gguf_writer.add_bool("falcon_h1.mamba_rms_norm", self.find_hparam(["mamba_rms_norm"], optional=True)) - self.gguf_writer.add_float32("falcon_h1.rope_theta", self.find_hparam(["rope_theta"], optional=True)) + self.gguf_writer.add_float64("falcon_h1.rope_theta", self.find_hparam(["rope_theta"], optional=True)) ###### CONVERSION LOGIC ###### diff --git a/src/llama-hparams.h b/src/llama-hparams.h index d671edaa4d..2142a74aaf 100644 --- a/src/llama-hparams.h +++ b/src/llama-hparams.h @@ -122,17 +122,17 @@ struct llama_hparams { bool mamba_use_mlp = false; bool mamba_norm_before_gate = false; bool mamba_rms_norm = false; - float attention_in_multiplier = 1.0f; - float attention_out_multiplier = 1.0f; - float ssm_in_multiplier = 1.0f; - float ssm_out_multiplier = 1.0f; - float mlp_gate_multiplier = 1.0f; - float mlp_down_multiplier = 1.0f; - float key_multiplier = 1.0f; - float lm_head_multiplier = 1.0f; - float rope_theta = 10000.0f; + double attention_in_multiplier = 1.0; + double attention_out_multiplier = 1.0; + double ssm_in_multiplier = 1.0; + double ssm_out_multiplier = 1.0; + double mlp_gate_multiplier = 1.0; + double mlp_down_multiplier = 1.0; + double key_multiplier = 1.0; + double lm_head_multiplier = 1.0; + double rope_theta = 10000.0; + double embedding_multiplier = 1.0; bool ssm_has_mup = false; - float embedding_multiplier = 1.0f; uint32_t vocab_size = 0; uint32_t intermediate_size = 0; float mamba_expand = 0.0f; diff --git a/src/llama-model-loader.cpp b/src/llama-model-loader.cpp index bd9e6da883..43079c32a6 100644 --- a/src/llama-model-loader.cpp +++ b/src/llama-model-loader.cpp @@ -400,6 +400,7 @@ namespace GGUFMeta { template bool llama_model_loader::get_key (enum llm_kv kid, bool & result, bool required); template bool llama_model_loader::get_key (enum llm_kv kid, float & result, bool required); + template bool llama_model_loader::get_key (enum llm_kv kid, double & result, bool required); template bool llama_model_loader::get_key (enum llm_kv kid, uint32_t & result, bool required); template bool llama_model_loader::get_key(enum llm_kv kid, std::string & result, bool required);