mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-27 08:21:30 +00:00
mtmd : support InternVL 2.5 and 3 (#13422)
* convert : internvl support * InternVL3-1B working * fix regression * rm mobilevlm from test * fix conversion * add test for internvl * add to list of pre-quant * restore boi/eoi check * add clarify comment for norm eps
This commit is contained in:
@@ -33,9 +33,6 @@
|
||||
#define KEY_PROJ_TYPE "clip.projector_type"
|
||||
#define KEY_SPATIAL_MERGE_SIZE "clip.vision.spatial_merge_size"
|
||||
|
||||
#define KEY_USE_GLU_MLP "clip.use_glu_mlp" // for qwen2.5vl
|
||||
#define KEY_USE_RMS_NORM "clip.use_rms_norm" // for qwen2.5vl
|
||||
|
||||
#define KEY_MM_PATCH_MERGE_TYPE "clip.vision.mm_patch_merge_type"
|
||||
#define KEY_IMAGE_GRID_PINPOINTS "clip.vision.image_grid_pinpoints"
|
||||
#define KEY_IMAGE_CROP_RESOLUTION "clip.vision.image_crop_resolution"
|
||||
@@ -60,8 +57,10 @@
|
||||
#define TN_FFN_GATE "%s.blk.%d.ffn_gate.%s"
|
||||
#define TN_FFN_UP "%s.blk.%d.ffn_up.%s"
|
||||
#define TN_FFN_GATE "%s.blk.%d.ffn_gate.%s"
|
||||
#define TN_LN_1 "%s.blk.%d.ln1.%s"
|
||||
#define TN_LN_2 "%s.blk.%d.ln2.%s"
|
||||
#define TN_LN_1 "%s.blk.%d.ln1.%s" // layer norm
|
||||
#define TN_LN_2 "%s.blk.%d.ln2.%s" // layer norm
|
||||
#define TN_LS_1 "%s.blk.%d.ls1.%s" // layer scale
|
||||
#define TN_LS_2 "%s.blk.%d.ls2.%s" // layer scale
|
||||
#define TN_LN_PRE "%s.pre_ln.%s"
|
||||
#define TN_LN_POST "%s.post_ln.%s"
|
||||
#define TN_LLAVA_PROJ "mm.%d.%s"
|
||||
@@ -105,6 +104,7 @@ enum projector_type {
|
||||
PROJECTOR_TYPE_IDEFICS3,
|
||||
PROJECTOR_TYPE_PIXTRAL,
|
||||
PROJECTOR_TYPE_QWEN25VL,
|
||||
PROJECTOR_TYPE_INTERNVL,
|
||||
PROJECTOR_TYPE_UNKNOWN,
|
||||
};
|
||||
|
||||
@@ -119,6 +119,7 @@ static std::map<projector_type, std::string> PROJECTOR_TYPE_NAMES = {
|
||||
{ PROJECTOR_TYPE_GEMMA3, "gemma3"},
|
||||
{ PROJECTOR_TYPE_IDEFICS3, "idefics3"},
|
||||
{ PROJECTOR_TYPE_PIXTRAL, "pixtral"},
|
||||
{ PROJECTOR_TYPE_INTERNVL, "internvl"},
|
||||
};
|
||||
|
||||
static projector_type clip_projector_type_from_string(const std::string & str) {
|
||||
|
||||
Reference in New Issue
Block a user