From c4af0f3ca531bc6e295efae66aea5a8b6ae149de Mon Sep 17 00:00:00 2001 From: ibrahimkhadraoui Date: Mon, 7 Jul 2025 11:17:31 +0400 Subject: [PATCH] mamba_d_ssm added to d_inner find_hparam --- convert_hf_to_gguf.py | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index 89bfc33c8c..316d9b59d3 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -4899,7 +4899,7 @@ class Mamba2Model(TextModel): def set_gguf_parameters(self): d_model = self.find_hparam(["hidden_size", "d_model", "dim"]) d_conv = self.find_hparam(["conv_kernel", "d_conv"], optional=True) or 4 - d_inner = self.find_hparam(["intermediate_size", "d_inner"], optional=True) or 2 * d_model + d_inner = self.find_hparam(["mamba_d_ssm", "intermediate_size", "d_inner"], optional=True) or 2 * d_model d_state = self.find_hparam(["state_size", "d_state"], optional=True) or 128 head_dim = self.find_hparam(["head_dim"], optional=True) or 64 n_group = self.find_hparam(["n_groups"], optional=True) or 1 @@ -4948,12 +4948,8 @@ class Mamba2Model(TextModel): data_torch = data_torch.reshape((*data_torch.shape, 1)) elif self.match_model_tensor_name(new_name, gguf.MODEL_TENSOR.SSM_NORM, bid): d_model = self.find_hparam(["hidden_size", "d_model", "dim"]) - d_inner = self.find_hparam(["intermediate_size", "d_inner"], optional=True) or 2 * d_model + d_inner = self.find_hparam(["mamba_d_ssm", "intermediate_size", "d_inner"], optional=True) or 2 * d_model n_group = self.hparams.get("n_groups", 1) - architectures = self.hparams.get("architectures") - if architectures is not None and architectures[0] == "FalconH1ForCausalLM": - # FalconH1F has a different d_inner - d_inner = self.hparams.get("mamba_d_ssm") data_torch = data_torch.reshape((n_group, d_inner // n_group)) if name.endswith(".A_log"):