From fb879b40c05d204e3cb298b4ec5cbd3106155e82 Mon Sep 17 00:00:00 2001 From: Francis Couture-Harpin Date: Thu, 4 Sep 2025 18:43:10 -0400 Subject: [PATCH] convert : use F32 operations on Mamba A_log This matches the previous behavior for BF16 tensors. --- convert_hf_to_gguf.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/convert_hf_to_gguf.py b/convert_hf_to_gguf.py index f14eef1452..8556c26825 100755 --- a/convert_hf_to_gguf.py +++ b/convert_hf_to_gguf.py @@ -4356,7 +4356,7 @@ class Plamo2Model(TextModel): del bid # unused if name.endswith(".A_log"): - data_torch = -torch.exp(data_torch) + data_torch = -torch.exp(data_torch.float()) elif name.endswith(".dt_bias"): name = name.rpartition(".dt_bias")[0] + ".dt_proj.bias" elif name.endswith(".dt_norm_weight"): @@ -5829,7 +5829,7 @@ class MambaModel(TextModel): if name.endswith(".A_log"): logger.debug("A_log --> A ==> " + new_name) - data_torch = -torch.exp(data_torch) + data_torch = -torch.exp(data_torch.float()) # [4 1 8192 1] -> [4 8192 1 1] if self.match_model_tensor_name(new_name, gguf.MODEL_TENSOR.SSM_CONV1D, bid): @@ -5934,7 +5934,7 @@ class Mamba2Model(TextModel): if name.endswith(".A_log"): logger.debug("A_log --> A ==> " + new_name) - data_torch = -torch.exp(data_torch) + data_torch = -torch.exp(data_torch.float()) yield (new_name, data_torch) @@ -6042,7 +6042,7 @@ class JambaModel(TextModel): if name.endswith(".A_log"): logger.debug("A_log --> A ==> " + new_name) - data_torch = -torch.exp(data_torch) + data_torch = -torch.exp(data_torch.float()) yield (new_name, data_torch)