model: Add support for CogVLM model (#15002)

* Added GGUF mappings for CogVLM model

* Add tensor mapping for CogVLM visual encoder

* Add CogVLM to conversion script, no vision part yet

* Added CogVLM vision model to conversion script

* Add graph for CogVLM CLIP model

* Add graph for CogVLM

* Fixes for CogVLM. Now compiles.

* Model now runs

* Fixes for cogvlm graph

* Account for graph context change after rebase

* Changes for whitespace

* Changes in convert script according to comments

* Switch CogVLM LLM graph to merged QKV tensor

* Use rope_type variable instead of direct definition

* Change CogVLM CLIP encoder to use SWIGLU

* Switch CogVLM CLIP to use merged QKV

* Apply rebase edits and remove ggml_cont call that is now unnecessary

* clean up

---------

Co-authored-by: Xuan Son Nguyen <son@huggingface.co>
This commit is contained in:
Tianyue-Zhao
2025-10-30 07:18:50 -04:00
committed by GitHub
parent 229bf68628
commit bacddc049a
9 changed files with 501 additions and 26 deletions

View File

@@ -103,6 +103,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
{ LLM_ARCH_SEED_OSS, "seed_oss" },
{ LLM_ARCH_GROVEMOE, "grovemoe" },
{ LLM_ARCH_APERTUS, "apertus" },
{ LLM_ARCH_COGVLM, "cogvlm" },
{ LLM_ARCH_UNKNOWN, "(unknown)" },
};
@@ -2312,6 +2313,26 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
{ LLM_TENSOR_FFN_UP_CHEXPS, "blk.%d.ffn_up_chexps" },
},
},
{
LLM_ARCH_COGVLM,
{
{ LLM_TENSOR_TOKEN_EMBD, "token_embd" },
{ LLM_TENSOR_OUTPUT_NORM, "output_norm" },
{ LLM_TENSOR_OUTPUT, "output" },
{ LLM_TENSOR_ATTN_NORM, "blk.%d.attn_norm" },
{ LLM_TENSOR_ATTN_QKV, "blk.%d.attn_qkv" },
{ LLM_TENSOR_ATTN_OUT, "blk.%d.attn_output" },
{ LLM_TENSOR_FFN_NORM, "blk.%d.ffn_norm" },
{ LLM_TENSOR_FFN_GATE, "blk.%d.ffn_gate" },
{ LLM_TENSOR_FFN_DOWN, "blk.%d.ffn_down" },
{ LLM_TENSOR_FFN_UP, "blk.%d.ffn_up" },
{ LLM_TENSOR_VISEXP_ATTN_QKV, "blk.%d.vis_attn_qkv" },
{ LLM_TENSOR_VISEXP_ATTN_OUT, "blk.%d.vis_attn_output" },
{ LLM_TENSOR_VISEXP_FFN_GATE, "blk.%d.vis_gate" },
{ LLM_TENSOR_VISEXP_FFN_DOWN, "blk.%d.vis_down" },
{ LLM_TENSOR_VISEXP_FFN_UP, "blk.%d.vis_up" },
},
},
{
LLM_ARCH_UNKNOWN,
{
@@ -2488,6 +2509,11 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
{LLM_TENSOR_SHORTCONV_CONV, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_CONV}},
{LLM_TENSOR_SHORTCONV_INPROJ, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
{LLM_TENSOR_SHORTCONV_OUTPROJ, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
{LLM_TENSOR_VISEXP_ATTN_QKV, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
{LLM_TENSOR_VISEXP_ATTN_OUT, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
{LLM_TENSOR_VISEXP_FFN_GATE, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
{LLM_TENSOR_VISEXP_FFN_DOWN, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
{LLM_TENSOR_VISEXP_FFN_UP, {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
// NextN/MTP tensors are currently ignored (reserved for future MTP support)
// These tensors only exist in the last layer(s) and are treated as output tensors
{LLM_TENSOR_NEXTN_EH_PROJ, {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},