Merge branch 'add-fh1-rebased' of https://github.com/tiiuae/llama.cpp-public into add-fh1-rebased

2025-11-03 09:22:01 +00:00 · 2025-07-08 10:48:07 +04:00
parent f028a43a91 a846d02327
commit d41f111462
3 changed files with 4 additions and 8 deletions
--- a/gguf-py/gguf/constants.py
+++ b/gguf-py/gguf/constants.py
@@ -289,7 +289,7 @@ class MODEL_ARCH(IntEnum):
    LLAMA4           = auto()
    DECI             = auto()
    FALCON           = auto()
-    FALCON_H1           = auto()
+    FALCON_H1        = auto()
    BAICHUAN         = auto()
    GROK             = auto()
    GPT2             = auto()
@@ -662,7 +662,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
    MODEL_ARCH.DOTS1:            "dots1",
    MODEL_ARCH.ARCEE:            "arcee",
    MODEL_ARCH.ERNIE4_5:         "ernie4_5",
-    MODEL_ARCH.FALCON_H1:         "falcon_h1",
+    MODEL_ARCH.FALCON_H1:        "falcon_h1",
 }

 VISION_PROJECTOR_TYPE_NAMES: dict[VISION_PROJECTOR_TYPE, str] = {
--- a/gguf-py/gguf/tensor_mapping.py
+++ b/gguf-py/gguf/tensor_mapping.py
@@ -583,6 +583,7 @@ class TensorNameMap:
        ),

        MODEL_TENSOR.SSM_NORM: (
+            "model.layers.{bid}.mamba.norm", # falcon-h1
            "backbone.layers.{bid}.mixer.norm",  # mamba2
        ),

@@ -1177,10 +1178,6 @@ class TensorNameMap:
            "resampler.attn.out_proj",
        ),
        
-        MODEL_TENSOR.SSM_NORM: (
-            "model.layers.{bid}.mamba.norm",
-        ),
-
        MODEL_TENSOR.V_RESMPL_KV: (
            "resampler.kv_proj",
        ),
--- a/src/llama-arch.cpp
+++ b/src/llama-arch.cpp
@@ -1951,8 +1951,7 @@ bool llm_arch_is_recurrent(const llm_arch & arch) {
 }

 bool llm_arch_is_hybrid(const llm_arch & arch) {
-    // TODO: There are currently no hybrid models! Once there are, this will be
-    //  the place to identify them
+    // List all mamba-attention hybrid models here
    switch (arch) {
        case LLM_ARCH_FALCON_H1:
            return true;