mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	convert : use F32 operations on Mamba A_log
This matches the previous behavior for BF16 tensors.
This commit is contained in:
		| @@ -4356,7 +4356,7 @@ class Plamo2Model(TextModel): | ||||
|         del bid  # unused | ||||
|  | ||||
|         if name.endswith(".A_log"): | ||||
|             data_torch = -torch.exp(data_torch) | ||||
|             data_torch = -torch.exp(data_torch.float()) | ||||
|         elif name.endswith(".dt_bias"): | ||||
|             name = name.rpartition(".dt_bias")[0] + ".dt_proj.bias" | ||||
|         elif name.endswith(".dt_norm_weight"): | ||||
| @@ -5829,7 +5829,7 @@ class MambaModel(TextModel): | ||||
|  | ||||
|         if name.endswith(".A_log"): | ||||
|             logger.debug("A_log --> A ==> " + new_name) | ||||
|             data_torch = -torch.exp(data_torch) | ||||
|             data_torch = -torch.exp(data_torch.float()) | ||||
|  | ||||
|         # [4 1 8192 1] -> [4 8192 1 1] | ||||
|         if self.match_model_tensor_name(new_name, gguf.MODEL_TENSOR.SSM_CONV1D, bid): | ||||
| @@ -5934,7 +5934,7 @@ class Mamba2Model(TextModel): | ||||
|  | ||||
|         if name.endswith(".A_log"): | ||||
|             logger.debug("A_log --> A ==> " + new_name) | ||||
|             data_torch = -torch.exp(data_torch) | ||||
|             data_torch = -torch.exp(data_torch.float()) | ||||
|  | ||||
|         yield (new_name, data_torch) | ||||
|  | ||||
| @@ -6042,7 +6042,7 @@ class JambaModel(TextModel): | ||||
|  | ||||
|         if name.endswith(".A_log"): | ||||
|             logger.debug("A_log --> A ==> " + new_name) | ||||
|             data_torch = -torch.exp(data_torch) | ||||
|             data_torch = -torch.exp(data_torch.float()) | ||||
|  | ||||
|         yield (new_name, data_torch) | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Francis Couture-Harpin
					Francis Couture-Harpin