mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-07 09:57:00 +00:00
mtmd: improve struct initialization (#16981)
This commit is contained in:
@@ -2761,6 +2761,7 @@ struct clip_model_loader {
|
|||||||
{
|
{
|
||||||
// ref: https://huggingface.co/mistral-community/pixtral-12b/blob/main/preprocessor_config.json
|
// ref: https://huggingface.co/mistral-community/pixtral-12b/blob/main/preprocessor_config.json
|
||||||
// TODO: verify the image_min_tokens
|
// TODO: verify the image_min_tokens
|
||||||
|
hparams.n_merge = 1; // the original pixtral does not use patch merging
|
||||||
hparams.rope_theta = 10000.0f;
|
hparams.rope_theta = 10000.0f;
|
||||||
get_u32(KEY_SPATIAL_MERGE_SIZE, hparams.n_merge, false);
|
get_u32(KEY_SPATIAL_MERGE_SIZE, hparams.n_merge, false);
|
||||||
hparams.set_limit_image_tokens(8, 1024);
|
hparams.set_limit_image_tokens(8, 1024);
|
||||||
|
|||||||
@@ -101,16 +101,17 @@ static clip_flash_attn_type mtmd_get_clip_flash_attn_type(enum llama_flash_attn_
|
|||||||
}
|
}
|
||||||
|
|
||||||
mtmd_context_params mtmd_context_params_default() {
|
mtmd_context_params mtmd_context_params_default() {
|
||||||
mtmd_context_params params;
|
mtmd_context_params params {
|
||||||
params.use_gpu = true;
|
/* use_gpu */ true,
|
||||||
params.print_timings = true;
|
/* print_timings */ true,
|
||||||
params.n_threads = 4;
|
/* n_threads */ 4,
|
||||||
params.verbosity = GGML_LOG_LEVEL_INFO;
|
/* verbosity */ GGML_LOG_LEVEL_INFO,
|
||||||
params.image_marker = MTMD_DEFAULT_IMAGE_MARKER;
|
/* image_marker */ MTMD_DEFAULT_IMAGE_MARKER,
|
||||||
params.media_marker = mtmd_default_marker();
|
/* media_marker */ mtmd_default_marker(),
|
||||||
params.flash_attn_type = LLAMA_FLASH_ATTN_TYPE_AUTO;
|
/* flash_attn_type */ LLAMA_FLASH_ATTN_TYPE_AUTO,
|
||||||
params.image_min_tokens = -1;
|
/* image_min_tokens */ -1,
|
||||||
params.image_max_tokens = -1;
|
/* image_max_tokens */ -1,
|
||||||
|
};
|
||||||
return params;
|
return params;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -172,13 +173,13 @@ struct mtmd_context {
|
|||||||
throw std::runtime_error("media_marker must not be empty");
|
throw std::runtime_error("media_marker must not be empty");
|
||||||
}
|
}
|
||||||
|
|
||||||
clip_context_params ctx_clip_params;
|
clip_context_params ctx_clip_params {
|
||||||
ctx_clip_params.use_gpu = ctx_params.use_gpu;
|
/* use_gpu */ ctx_params.use_gpu,
|
||||||
ctx_clip_params.verbosity = ctx_params.verbosity;
|
/* verbosity */ ctx_params.verbosity,
|
||||||
ctx_clip_params.flash_attn_type = mtmd_get_clip_flash_attn_type(ctx_params.flash_attn_type);
|
/* flash_attn_type */ CLIP_FLASH_ATTN_TYPE_AUTO,
|
||||||
// custom image token limits
|
/* image_min_tokens */ ctx_params.image_min_tokens,
|
||||||
ctx_clip_params.image_min_tokens = ctx_params.image_min_tokens;
|
/* image_max_tokens */ ctx_params.image_max_tokens,
|
||||||
ctx_clip_params.image_max_tokens = ctx_params.image_max_tokens;
|
};
|
||||||
|
|
||||||
auto res = clip_init(mmproj_fname, ctx_clip_params);
|
auto res = clip_init(mmproj_fname, ctx_clip_params);
|
||||||
ctx_v = res.ctx_v;
|
ctx_v = res.ctx_v;
|
||||||
|
|||||||
Reference in New Issue
Block a user