From 86f5623d904cfd392fdeb14a143097b4074660f6 Mon Sep 17 00:00:00 2001 From: yummy <57988893+jk3456a@users.noreply.github.com> Date: Thu, 24 Jul 2025 17:50:51 +0800 Subject: [PATCH] llama : fix MiniCPM inference after Granite Four changes (#14850) MiniCPM models use the llm_build_granite constructor which was changed in the Granite Four PR to use hparams.rope_finetuned instead of a use_rope parameter. MiniCPM models need rope enabled by default. Fixes inference from gibberish to correct responses. --- src/llama-model.cpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/llama-model.cpp b/src/llama-model.cpp index 35e718aa98..a997a1e80f 100644 --- a/src/llama-model.cpp +++ b/src/llama-model.cpp @@ -646,6 +646,9 @@ void llama_model::load_hparams(llama_model_loader & ml) { ml.get_key(LLM_KV_RESIDUAL_SCALE, hparams.f_residual_scale); ml.get_key(LLM_KV_LOGIT_SCALE, hparams.f_logit_scale); + // MiniCPM uses rope by default, unlike Granite which uses it as a switch + hparams.rope_finetuned = true; + switch (hparams.n_layer) { case 52: type = LLM_TYPE_1B; break; case 40: type = LLM_TYPE_2B; break;