convert-hf : support Mini-Jamba conversion

2025-10-30 08:42:00 +00:00 · 2024-05-25 13:55:11 -04:00
parent ea2e63e9d2
commit fc59407efe
2 changed files with 23 additions and 1 deletions
--- a/gguf-py/gguf/tensor_mapping.py
+++ b/gguf-py/gguf/tensor_mapping.py
@@ -207,6 +207,7 @@ class TensorNameMap:
            "model.layers.{bid}.ffn_norm",                                   # internlm2
            "transformer.decoder_layer.{bid}.rms_norm_2",                    # Grok
            "model.layers.{bid}.pre_ff_layernorm",                           # jamba
+            "model.layers.{bid}.pre_moe_layernorm",                          # mini-jamba
        ),

        MODEL_TENSOR.FFN_GATE_INP: (
@@ -390,10 +391,12 @@ class TensorNameMap:

        MODEL_TENSOR.SSM_B_NORM: (
            "model.layers.{bid}.mamba.b_layernorm",  # jamba
+            "model.layers.{bid}.mamba.B_layernorm",  # mini-jamba
        ),

        MODEL_TENSOR.SSM_C_NORM: (
            "model.layers.{bid}.mamba.c_layernorm",  # jamba
+            "model.layers.{bid}.mamba.C_layernorm",  # mini-jamba
        ),

        MODEL_TENSOR.SSM_D: (