mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-08 10:07:01 +00:00
context : pass embeddings tensor from encoder to decoder
ggml-ci
This commit is contained in:
@@ -748,11 +748,12 @@ private:
|
||||
llama_kv_cache_recurrent kv_self;
|
||||
};
|
||||
|
||||
// TODO: tmp - need something better
|
||||
// TODO: tmp - need something better to pass the data from the encoder to the decoder
|
||||
struct llama_cross {
|
||||
int32_t n_outputs;
|
||||
float * embd_enc;
|
||||
// the output embeddings from the encoder
|
||||
ggml_tensor * t_embd = nullptr;
|
||||
|
||||
// needed to construct the cross-attention mask in the decoder
|
||||
std::vector<std::set<llama_seq_id>> seq_ids_enc;
|
||||
};
|
||||
|
||||
|
||||
Reference in New Issue
Block a user