model: Add support for CogVLM model (#15002)

* Added GGUF mappings for CogVLM model * Add tensor mapping for CogVLM visual encoder * Add CogVLM to conversion script, no vision part yet * Added CogVLM vision model to conversion script * Add graph for CogVLM CLIP model * Add graph for CogVLM * Fixes for CogVLM. Now compiles. * Model now runs * Fixes for cogvlm graph * Account for graph context change after rebase * Changes for whitespace * Changes in convert script according to comments * Switch CogVLM LLM graph to merged QKV tensor * Use rope_type variable instead of direct definition * Change CogVLM CLIP encoder to use SWIGLU * Switch CogVLM CLIP to use merged QKV * Apply rebase edits and remove ggml_cont call that is now unnecessary * clean up --------- Co-authored-by: Xuan Son Nguyen <son@huggingface.co>
2025-11-14 11:07:10 +00:00 · 2025-10-30 07:18:50 -04:00
parent 229bf68628
commit bacddc049a
9 changed files with 501 additions and 26 deletions
--- a/src/llama-arch.h
+++ b/src/llama-arch.h
@@ -107,6 +107,7 @@ enum llm_arch {
    LLM_ARCH_SEED_OSS,
    LLM_ARCH_GROVEMOE,
    LLM_ARCH_APERTUS,
+    LLM_ARCH_COGVLM,
    LLM_ARCH_UNKNOWN,
 };

@@ -455,6 +456,11 @@ enum llm_tensor {
    LLM_TENSOR_SHORTCONV_CONV,
    LLM_TENSOR_SHORTCONV_INPROJ,
    LLM_TENSOR_SHORTCONV_OUTPROJ,
+    LLM_TENSOR_VISEXP_ATTN_QKV,
+    LLM_TENSOR_VISEXP_ATTN_OUT,
+    LLM_TENSOR_VISEXP_FFN_GATE,
+    LLM_TENSOR_VISEXP_FFN_DOWN,
+    LLM_TENSOR_VISEXP_FFN_UP,
    LLM_TENSOR_NEXTN_EH_PROJ,
    LLM_TENSOR_NEXTN_EMBED_TOKENS,
    LLM_TENSOR_NEXTN_ENORM,