context : pass embeddings tensor from encoder to decoder

ggml-ci
2025-11-08 10:07:01 +00:00 · 2025-02-25 16:11:17 +02:00
parent e2b3294f2c
commit 4efe989886
2 changed files with 29 additions and 23 deletions
--- a/src/llama-context.h
+++ b/src/llama-context.h
@@ -748,11 +748,12 @@ private:
    llama_kv_cache_recurrent kv_self;
 };

-// TODO: tmp - need something better
+// TODO: tmp - need something better to pass the data from the encoder to the decoder
 struct llama_cross {
-    int32_t n_outputs;
-    float * embd_enc;
+    // the output embeddings from the encoder
+    ggml_tensor * t_embd = nullptr;

+    // needed to construct the cross-attention mask in the decoder
    std::vector<std::set<llama_seq_id>> seq_ids_enc;
 };