From 2dee7cf96479ea7cfc50dc5bb0520cba6a51aaca Mon Sep 17 00:00:00 2001 From: Younes B <49240599+younesbelkada@users.noreply.github.com> Date: Tue, 8 Jul 2025 10:43:50 +0400 Subject: [PATCH 1/2] Apply suggestions from code review Co-authored-by: Georgi Gerganov --- gguf-py/gguf/constants.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gguf-py/gguf/constants.py b/gguf-py/gguf/constants.py index 84a91e82ca..5ae4eee802 100644 --- a/gguf-py/gguf/constants.py +++ b/gguf-py/gguf/constants.py @@ -289,7 +289,7 @@ class MODEL_ARCH(IntEnum): LLAMA4 = auto() DECI = auto() FALCON = auto() - FALCON_H1 = auto() + FALCON_H1 = auto() BAICHUAN = auto() GROK = auto() GPT2 = auto() @@ -662,7 +662,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = { MODEL_ARCH.DOTS1: "dots1", MODEL_ARCH.ARCEE: "arcee", MODEL_ARCH.ERNIE4_5: "ernie4_5", - MODEL_ARCH.FALCON_H1: "falcon_h1", + MODEL_ARCH.FALCON_H1: "falcon_h1", } VISION_PROJECTOR_TYPE_NAMES: dict[VISION_PROJECTOR_TYPE, str] = { From a846d02327b08e58528d5967ad5cfe3f30f2087a Mon Sep 17 00:00:00 2001 From: younesbelkada Date: Tue, 8 Jul 2025 10:44:59 +0400 Subject: [PATCH 2/2] remove todo --- gguf-py/gguf/tensor_mapping.py | 5 +---- src/llama-arch.cpp | 3 +-- 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py index ff3f273bd5..b8b081a90b 100644 --- a/gguf-py/gguf/tensor_mapping.py +++ b/gguf-py/gguf/tensor_mapping.py @@ -583,6 +583,7 @@ class TensorNameMap: ), MODEL_TENSOR.SSM_NORM: ( + "model.layers.{bid}.mamba.norm", # falcon-h1 "backbone.layers.{bid}.mixer.norm", # mamba2 ), @@ -1177,10 +1178,6 @@ class TensorNameMap: "resampler.attn.out_proj", ), - MODEL_TENSOR.SSM_NORM: ( - "model.layers.{bid}.mamba.norm", - ), - MODEL_TENSOR.V_RESMPL_KV: ( "resampler.kv_proj", ), diff --git a/src/llama-arch.cpp b/src/llama-arch.cpp index b6bd815090..f6c092cf20 100644 --- a/src/llama-arch.cpp +++ b/src/llama-arch.cpp @@ -1955,8 +1955,7 @@ bool llm_arch_is_recurrent(const llm_arch & arch) { } bool llm_arch_is_hybrid(const llm_arch & arch) { - // TODO: There are currently no hybrid models! Once there are, this will be - // the place to identify them + // List all mamba-attention hybrid models here switch (arch) { case LLM_ARCH_FALCON_H1: return true;