context : simplify output counting logic during decode (#14142)

* batch : remove logits_all flag ggml-ci * context : simplify output counting logic during decode ggml-ci * cont : fix comments
2025-11-04 09:32:00 +00:00 · 2025-06-12 11:50:01 +03:00
parent c3ee46fab4
commit f6e1a7aa87
3 changed files with 28 additions and 23 deletions
--- a/src/llama-batch.h
+++ b/src/llama-batch.h
@@ -85,7 +85,7 @@ struct llama_batch_allocr {
    std::vector<llama_pos>      pos;
    std::vector<int32_t>        n_seq_id;
    std::vector<llama_seq_id *> seq_id;
-    std::vector<int8_t>         logits;
+    std::vector<int8_t>         output;

    // optionally fulfill the batch returned by llama_batch_get_one
    llama_batch_allocr(struct llama_batch in_batch, llama_pos p0);