Add LLaDA-7b-MoE diffusion model (#16003)

This commit is contained in:
Aman Gupta
2025-09-16 10:38:28 +08:00
committed by GitHub
parent 3d4053f77f
commit 6d758839ff
9 changed files with 315 additions and 9 deletions

View File

@@ -399,6 +399,7 @@ class MODEL_ARCH(IntEnum):
DREAM = auto()
SMALLTHINKER = auto()
LLADA = auto()
LLADA_MOE = auto()
SEED_OSS = auto()
@@ -735,6 +736,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
MODEL_ARCH.DREAM: "dream",
MODEL_ARCH.SMALLTHINKER: "smallthinker",
MODEL_ARCH.LLADA: "llada",
MODEL_ARCH.LLADA_MOE: "llada-moe",
MODEL_ARCH.SEED_OSS: "seed_oss",
}
@@ -2693,6 +2695,23 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
MODEL_TENSOR.FFN_DOWN_EXP,
MODEL_TENSOR.FFN_UP_EXP,
],
MODEL_ARCH.LLADA_MOE: [
MODEL_TENSOR.TOKEN_EMBD,
MODEL_TENSOR.OUTPUT_NORM,
MODEL_TENSOR.OUTPUT,
MODEL_TENSOR.ATTN_OUT,
MODEL_TENSOR.ATTN_Q,
MODEL_TENSOR.ATTN_K,
MODEL_TENSOR.ATTN_V,
MODEL_TENSOR.ATTN_NORM,
MODEL_TENSOR.ATTN_Q_NORM,
MODEL_TENSOR.ATTN_K_NORM,
MODEL_TENSOR.FFN_NORM,
MODEL_TENSOR.FFN_GATE_INP,
MODEL_TENSOR.FFN_GATE_EXP,
MODEL_TENSOR.FFN_UP_EXP,
MODEL_TENSOR.FFN_DOWN_EXP,
],
# TODO
}