context : simplify output counting logic during decode (#14142)

* batch : remove logits_all flag

ggml-ci

* context : simplify output counting logic during decode

ggml-ci

* cont : fix comments
This commit is contained in:
Georgi Gerganov
2025-06-12 11:50:01 +03:00
committed by GitHub
parent c3ee46fab4
commit f6e1a7aa87
3 changed files with 28 additions and 23 deletions

View File

@@ -85,7 +85,7 @@ struct llama_batch_allocr {
std::vector<llama_pos> pos;
std::vector<int32_t> n_seq_id;
std::vector<llama_seq_id *> seq_id;
std::vector<int8_t> logits;
std::vector<int8_t> output;
// optionally fulfill the batch returned by llama_batch_get_one
llama_batch_allocr(struct llama_batch in_batch, llama_pos p0);