hparams : add n_embd_inp() to support extended embed (#16928)

* add n_embd_full to support extended embed * don't change output * rename to n_embd_inp * restore n_embd where applicable
2025-11-11 10:36:54 +00:00 · 2025-11-07 19:27:58 +01:00
parent 16bcc1259d
commit 9008027aa3
9 changed files with 29 additions and 28 deletions
--- a/src/llama-graph.cpp
+++ b/src/llama-graph.cpp
@@ -1142,7 +1142,7 @@ ggml_tensor * llm_graph_context::build_moe_ffn(

 // input embeddings with optional lora
 ggml_tensor * llm_graph_context::build_inp_embd(ggml_tensor * tok_embd) const {
-    const int64_t n_embd = hparams.n_embd;
+    const int64_t n_embd = hparams.n_embd_inp();

    auto inp = std::make_unique<llm_graph_input_embd>();

@@ -1279,7 +1279,7 @@ ggml_tensor * llm_graph_context::build_inp_cross_embd() const {
    //    return cur;
    //}

-    const auto n_embd = !cross->v_embd.empty() ? cross->n_embd : hparams.n_embd;
+    const auto n_embd = !cross->v_embd.empty() ? cross->n_embd : hparams.n_embd_inp();
    const auto n_enc  = !cross->v_embd.empty() ? cross->n_enc : hparams.n_ctx_train;

    cur = ggml_new_tensor_2d(ctx0, GGML_TYPE_F32, n_embd, n_enc);