diff --git a/gguf-py/gguf/tensor_mapping.py b/gguf-py/gguf/tensor_mapping.py index ff3f273bd5..b8b081a90b 100644 --- a/gguf-py/gguf/tensor_mapping.py +++ b/gguf-py/gguf/tensor_mapping.py @@ -583,6 +583,7 @@ class TensorNameMap: ), MODEL_TENSOR.SSM_NORM: ( + "model.layers.{bid}.mamba.norm", # falcon-h1 "backbone.layers.{bid}.mixer.norm", # mamba2 ), @@ -1177,10 +1178,6 @@ class TensorNameMap: "resampler.attn.out_proj", ), - MODEL_TENSOR.SSM_NORM: ( - "model.layers.{bid}.mamba.norm", - ), - MODEL_TENSOR.V_RESMPL_KV: ( "resampler.kv_proj", ), diff --git a/src/llama-arch.cpp b/src/llama-arch.cpp index b6bd815090..f6c092cf20 100644 --- a/src/llama-arch.cpp +++ b/src/llama-arch.cpp @@ -1955,8 +1955,7 @@ bool llm_arch_is_recurrent(const llm_arch & arch) { } bool llm_arch_is_hybrid(const llm_arch & arch) { - // TODO: There are currently no hybrid models! Once there are, this will be - // the place to identify them + // List all mamba-attention hybrid models here switch (arch) { case LLM_ARCH_FALCON_H1: return true;