Merge branch 'add-fh1-rebased' of https://github.com/tiiuae/llama.cpp-public into add-fh1-rebased

This commit is contained in:
ibrahimkhadraoui
2025-07-08 10:48:07 +04:00
3 changed files with 4 additions and 8 deletions

View File

@@ -289,7 +289,7 @@ class MODEL_ARCH(IntEnum):
LLAMA4 = auto() LLAMA4 = auto()
DECI = auto() DECI = auto()
FALCON = auto() FALCON = auto()
FALCON_H1 = auto() FALCON_H1 = auto()
BAICHUAN = auto() BAICHUAN = auto()
GROK = auto() GROK = auto()
GPT2 = auto() GPT2 = auto()
@@ -662,7 +662,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
MODEL_ARCH.DOTS1: "dots1", MODEL_ARCH.DOTS1: "dots1",
MODEL_ARCH.ARCEE: "arcee", MODEL_ARCH.ARCEE: "arcee",
MODEL_ARCH.ERNIE4_5: "ernie4_5", MODEL_ARCH.ERNIE4_5: "ernie4_5",
MODEL_ARCH.FALCON_H1: "falcon_h1", MODEL_ARCH.FALCON_H1: "falcon_h1",
} }
VISION_PROJECTOR_TYPE_NAMES: dict[VISION_PROJECTOR_TYPE, str] = { VISION_PROJECTOR_TYPE_NAMES: dict[VISION_PROJECTOR_TYPE, str] = {

View File

@@ -583,6 +583,7 @@ class TensorNameMap:
), ),
MODEL_TENSOR.SSM_NORM: ( MODEL_TENSOR.SSM_NORM: (
"model.layers.{bid}.mamba.norm", # falcon-h1
"backbone.layers.{bid}.mixer.norm", # mamba2 "backbone.layers.{bid}.mixer.norm", # mamba2
), ),
@@ -1177,10 +1178,6 @@ class TensorNameMap:
"resampler.attn.out_proj", "resampler.attn.out_proj",
), ),
MODEL_TENSOR.SSM_NORM: (
"model.layers.{bid}.mamba.norm",
),
MODEL_TENSOR.V_RESMPL_KV: ( MODEL_TENSOR.V_RESMPL_KV: (
"resampler.kv_proj", "resampler.kv_proj",
), ),

View File

@@ -1951,8 +1951,7 @@ bool llm_arch_is_recurrent(const llm_arch & arch) {
} }
bool llm_arch_is_hybrid(const llm_arch & arch) { bool llm_arch_is_hybrid(const llm_arch & arch) {
// TODO: There are currently no hybrid models! Once there are, this will be // List all mamba-attention hybrid models here
// the place to identify them
switch (arch) { switch (arch) {
case LLM_ARCH_FALCON_H1: case LLM_ARCH_FALCON_H1:
return true; return true;