injected mup

2025-11-03 09:22:01 +00:00 · 2025-07-07 15:00:25 +04:00
parent b3bc1fb237
commit a9f3a63dc1
9 changed files with 43 additions and 101 deletions
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -527,7 +527,6 @@ class MODEL_TENSOR(IntEnum):
    POSNET_ATTN_K        = auto()
    POSNET_ATTN_V        = auto()
    POSNET_ATTN_OUT      = auto()
-    SSM_MUP_VEC          = auto()
    # vision
    V_MMPROJ             = auto()
    V_MMPROJ_FC          = auto()
@@ -740,7 +739,6 @@ TENSOR_NAMES: dict[MODEL_TENSOR, str] = {
    MODEL_TENSOR.SSM_D:                     "blk.{bid}.ssm_d",
    MODEL_TENSOR.SSM_NORM:                  "blk.{bid}.ssm_norm",
    MODEL_TENSOR.SSM_OUT:                   "blk.{bid}.ssm_out",
-    MODEL_TENSOR.SSM_MUP_VEC:               "blk.{bid}.ssm_mup_vec",
    MODEL_TENSOR.TIME_MIX_W0:               "blk.{bid}.time_mix_w0",
    MODEL_TENSOR.TIME_MIX_W1:               "blk.{bid}.time_mix_w1",
    MODEL_TENSOR.TIME_MIX_W2:               "blk.{bid}.time_mix_w2",
@@ -2230,7 +2228,6 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
        MODEL_TENSOR.ATTN_OUT,       # Output projection

        # SSM components (Mamba2 specific)
-        MODEL_TENSOR.SSM_MUP_VEC,    # Mup vector
        MODEL_TENSOR.SSM_IN,         # Input projection for SSM
        MODEL_TENSOR.SSM_CONV1D,     # Convolution layer
        MODEL_TENSOR.SSM_DT,         # Delta time projection
--- a/gguf-py/gguf/tensor_mapping.py
+++ b/gguf-py/gguf/tensor_mapping.py
@@ -1176,11 +1176,7 @@ class TensorNameMap:
        MODEL_TENSOR.V_RESMPL_ATTN_OUT: (
            "resampler.attn.out_proj",
        ),
-
-        MODEL_TENSOR.SSM_MUP_VEC: (
-            "model.layers.{bid}.mamba.mup_vector",            # falcon_h1
-        ),
-
+        
        MODEL_TENSOR.SSM_NORM: (
            "model.layers.{bid}.mamba.norm",
        ),