mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-04 09:32:00 +00:00 
			
		
		
		
	model: Add support for CogVLM model (#15002)
* Added GGUF mappings for CogVLM model * Add tensor mapping for CogVLM visual encoder * Add CogVLM to conversion script, no vision part yet * Added CogVLM vision model to conversion script * Add graph for CogVLM CLIP model * Add graph for CogVLM * Fixes for CogVLM. Now compiles. * Model now runs * Fixes for cogvlm graph * Account for graph context change after rebase * Changes for whitespace * Changes in convert script according to comments * Switch CogVLM LLM graph to merged QKV tensor * Use rope_type variable instead of direct definition * Change CogVLM CLIP encoder to use SWIGLU * Switch CogVLM CLIP to use merged QKV * Apply rebase edits and remove ggml_cont call that is now unnecessary * clean up --------- Co-authored-by: Xuan Son Nguyen <son@huggingface.co>
This commit is contained in:
		@@ -103,6 +103,7 @@ static const std::map<llm_arch, const char *> LLM_ARCH_NAMES = {
 | 
			
		||||
    { LLM_ARCH_SEED_OSS,         "seed_oss"         },
 | 
			
		||||
    { LLM_ARCH_GROVEMOE,         "grovemoe"         },
 | 
			
		||||
    { LLM_ARCH_APERTUS,          "apertus"          },
 | 
			
		||||
    { LLM_ARCH_COGVLM,           "cogvlm"           },
 | 
			
		||||
    { LLM_ARCH_UNKNOWN,          "(unknown)"        },
 | 
			
		||||
};
 | 
			
		||||
 | 
			
		||||
@@ -2312,6 +2313,26 @@ static const std::map<llm_arch, std::map<llm_tensor, const char *>> LLM_TENSOR_N
 | 
			
		||||
            { LLM_TENSOR_FFN_UP_CHEXPS,      "blk.%d.ffn_up_chexps" },
 | 
			
		||||
        },
 | 
			
		||||
    },
 | 
			
		||||
    {
 | 
			
		||||
        LLM_ARCH_COGVLM,
 | 
			
		||||
        {
 | 
			
		||||
            { LLM_TENSOR_TOKEN_EMBD,      "token_embd" },
 | 
			
		||||
            { LLM_TENSOR_OUTPUT_NORM,     "output_norm" },
 | 
			
		||||
            { LLM_TENSOR_OUTPUT,          "output" },
 | 
			
		||||
            { LLM_TENSOR_ATTN_NORM,       "blk.%d.attn_norm" },
 | 
			
		||||
            { LLM_TENSOR_ATTN_QKV,        "blk.%d.attn_qkv" },
 | 
			
		||||
            { LLM_TENSOR_ATTN_OUT,        "blk.%d.attn_output" },
 | 
			
		||||
            { LLM_TENSOR_FFN_NORM,        "blk.%d.ffn_norm" },
 | 
			
		||||
            { LLM_TENSOR_FFN_GATE,        "blk.%d.ffn_gate" },
 | 
			
		||||
            { LLM_TENSOR_FFN_DOWN,        "blk.%d.ffn_down" },
 | 
			
		||||
            { LLM_TENSOR_FFN_UP,          "blk.%d.ffn_up" },
 | 
			
		||||
            { LLM_TENSOR_VISEXP_ATTN_QKV, "blk.%d.vis_attn_qkv" },
 | 
			
		||||
            { LLM_TENSOR_VISEXP_ATTN_OUT, "blk.%d.vis_attn_output" },
 | 
			
		||||
            { LLM_TENSOR_VISEXP_FFN_GATE, "blk.%d.vis_gate" },
 | 
			
		||||
            { LLM_TENSOR_VISEXP_FFN_DOWN, "blk.%d.vis_down" },
 | 
			
		||||
            { LLM_TENSOR_VISEXP_FFN_UP,   "blk.%d.vis_up" },
 | 
			
		||||
        },
 | 
			
		||||
    },
 | 
			
		||||
    {
 | 
			
		||||
        LLM_ARCH_UNKNOWN,
 | 
			
		||||
        {
 | 
			
		||||
@@ -2488,6 +2509,11 @@ static const std::map<llm_tensor, llm_tensor_info> LLM_TENSOR_INFOS = {
 | 
			
		||||
    {LLM_TENSOR_SHORTCONV_CONV,             {LLM_TENSOR_LAYER_REPEATING, GGML_OP_SSM_CONV}},
 | 
			
		||||
    {LLM_TENSOR_SHORTCONV_INPROJ,           {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
 | 
			
		||||
    {LLM_TENSOR_SHORTCONV_OUTPROJ,          {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
 | 
			
		||||
    {LLM_TENSOR_VISEXP_ATTN_QKV,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
 | 
			
		||||
    {LLM_TENSOR_VISEXP_ATTN_OUT,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
 | 
			
		||||
    {LLM_TENSOR_VISEXP_FFN_GATE,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
 | 
			
		||||
    {LLM_TENSOR_VISEXP_FFN_DOWN,            {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
 | 
			
		||||
    {LLM_TENSOR_VISEXP_FFN_UP,              {LLM_TENSOR_LAYER_REPEATING, GGML_OP_MUL_MAT}},
 | 
			
		||||
    // NextN/MTP tensors are currently ignored (reserved for future MTP support)
 | 
			
		||||
    // These tensors only exist in the last layer(s) and are treated as output tensors
 | 
			
		||||
    {LLM_TENSOR_NEXTN_EH_PROJ,              {LLM_TENSOR_LAYER_OUTPUT, GGML_OP_MUL_MAT}},
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user