mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-07 09:57:00 +00:00
Merge branch 'master' into compilade/refactor-kv-cache
This commit is contained in:
@@ -288,6 +288,7 @@ class MODEL_ARCH(IntEnum):
|
||||
LLAMA4 = auto()
|
||||
DECI = auto()
|
||||
FALCON = auto()
|
||||
FALCON_H1 = auto()
|
||||
BAICHUAN = auto()
|
||||
GROK = auto()
|
||||
GPT2 = auto()
|
||||
@@ -667,6 +668,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
||||
MODEL_ARCH.DOTS1: "dots1",
|
||||
MODEL_ARCH.ARCEE: "arcee",
|
||||
MODEL_ARCH.ERNIE4_5: "ernie4_5",
|
||||
MODEL_ARCH.FALCON_H1: "falcon-h1",
|
||||
MODEL_ARCH.HUNYUAN_MOE: "hunyuan-moe",
|
||||
MODEL_ARCH.SMOLLM3: "smollm3",
|
||||
}
|
||||
@@ -2251,6 +2253,40 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
||||
MODEL_TENSOR.FFN_DOWN,
|
||||
MODEL_TENSOR.FFN_UP,
|
||||
],
|
||||
MODEL_ARCH.FALCON_H1: [
|
||||
# Token embedding
|
||||
MODEL_TENSOR.TOKEN_EMBD,
|
||||
|
||||
# Input layernorm
|
||||
MODEL_TENSOR.ATTN_NORM,
|
||||
|
||||
# Attention components
|
||||
MODEL_TENSOR.ATTN_Q, # Query projection
|
||||
MODEL_TENSOR.ATTN_K, # Key projection
|
||||
MODEL_TENSOR.ATTN_V, # Value projection
|
||||
MODEL_TENSOR.ATTN_OUT, # Output projection
|
||||
|
||||
# SSM components (Mamba2 specific)
|
||||
MODEL_TENSOR.SSM_IN, # Input projection for SSM
|
||||
MODEL_TENSOR.SSM_CONV1D, # Convolution layer
|
||||
MODEL_TENSOR.SSM_DT, # Delta time projection
|
||||
MODEL_TENSOR.SSM_A, # A parameter (log form)
|
||||
MODEL_TENSOR.SSM_D, # D parameter
|
||||
MODEL_TENSOR.SSM_NORM, # Normalization in SSM
|
||||
MODEL_TENSOR.SSM_OUT, # Output projection
|
||||
|
||||
# Pre-feedforward layernorm
|
||||
MODEL_TENSOR.FFN_PRE_NORM,
|
||||
|
||||
# Feed-forward network components
|
||||
MODEL_TENSOR.FFN_GATE, # Gate projection (SwiGLU)
|
||||
MODEL_TENSOR.FFN_DOWN, # Down projection
|
||||
MODEL_TENSOR.FFN_UP, # Up projection
|
||||
|
||||
# Post-feedforward layernorm
|
||||
MODEL_TENSOR.OUTPUT_NORM, # Final layer norm
|
||||
MODEL_TENSOR.OUTPUT, # Output projection (lm_head)
|
||||
],
|
||||
MODEL_ARCH.HUNYUAN_MOE: [
|
||||
MODEL_TENSOR.TOKEN_EMBD,
|
||||
MODEL_TENSOR.OUTPUT_NORM,
|
||||
|
||||
Reference in New Issue
Block a user