From 4b13a684c595ab9323be8dfa6266d9d0fb13c232 Mon Sep 17 00:00:00 2001 From: Xuan-Son Nguyen Date: Mon, 10 Nov 2025 11:41:05 +0100 Subject: [PATCH] mtmd: fix patch_size initialized to random value in audio models (#17128) * mtmd: fix patch_size initialized to random value in audio models * add default hparams --- tools/mtmd/clip.cpp | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/tools/mtmd/clip.cpp b/tools/mtmd/clip.cpp index d1423b67f9..1d78f5954e 100644 --- a/tools/mtmd/clip.cpp +++ b/tools/mtmd/clip.cpp @@ -160,13 +160,13 @@ enum patch_merge_type { }; struct clip_hparams { - int32_t image_size; - int32_t patch_size; - int32_t n_embd; - int32_t n_ff; - int32_t projection_dim; - int32_t n_head; - int32_t n_layer; + int32_t image_size = 0; + int32_t patch_size = 0; + int32_t n_embd = 0; + int32_t n_ff = 0; + int32_t projection_dim = 0; + int32_t n_head = 0; + int32_t n_layer = 0; // idefics3 int32_t image_longest_edge = 0; int32_t image_min_pixels = -1; @@ -2683,6 +2683,9 @@ struct clip_model_loader { } } else if (is_audio) { get_u32(KEY_A_NUM_MEL_BINS, hparams.n_mel_bins); + // some hparams are unused, but still need to set to avoid issues + hparams.image_size = 0; + hparams.patch_size = 1; } else { GGML_ASSERT(false && "unknown modality");