mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-06 09:46:50 +00:00
mtmd: pad mask for qwen2.5vl (#16954)
* mtmd: pad mask for qwen2.5vl * improve
This commit is contained in:
@@ -761,6 +761,15 @@ struct clip_graph {
|
|||||||
ggml_set_name(window_mask, "window_mask");
|
ggml_set_name(window_mask, "window_mask");
|
||||||
ggml_set_input(window_mask);
|
ggml_set_input(window_mask);
|
||||||
|
|
||||||
|
// if flash attn is used, we need to pad the mask and cast to f16
|
||||||
|
if (ctx->flash_attn_type == CLIP_FLASH_ATTN_TYPE_ENABLED) {
|
||||||
|
int n_pad = GGML_PAD(window_mask->ne[1], GGML_KQ_MASK_PAD) - window_mask->ne[1];
|
||||||
|
if (n_pad > 0) {
|
||||||
|
window_mask = ggml_pad(ctx0, window_mask, 0, n_pad, 0, 0);
|
||||||
|
}
|
||||||
|
window_mask = ggml_cast(ctx0, window_mask, GGML_TYPE_F16);
|
||||||
|
}
|
||||||
|
|
||||||
// inpL shape: [n_embd, n_patches_x * n_patches_y, batch_size]
|
// inpL shape: [n_embd, n_patches_x * n_patches_y, batch_size]
|
||||||
GGML_ASSERT(batch_size == 1);
|
GGML_ASSERT(batch_size == 1);
|
||||||
inpL = ggml_reshape_2d(ctx0, inpL, n_embd * 4, n_patches_x * n_patches_y * batch_size / 4);
|
inpL = ggml_reshape_2d(ctx0, inpL, n_embd * 4, n_patches_x * n_patches_y * batch_size / 4);
|
||||||
|
|||||||
Reference in New Issue
Block a user