llama : extend batch API to select which logits to output

2025-11-06 09:46:50 +00:00 · 2023-09-19 00:24:13 +03:00
parent 897caccdf4
commit fa0e677820
4 changed files with 46 additions and 6 deletions
--- a/llama.h
+++ b/llama.h
@@ -70,11 +70,11 @@ extern "C" {
    typedef struct llama_batch {
        uint32_t n_tokens;

-        // TODO: not sure about these consts - might just get in the way all the time with no benefit
        const llama_token  * token;
        const float        * embd;
        const llama_pos    * pos;
        const llama_seq_id * seq_id;
+        const int8_t       * logits; // if 0, do not extract logits for that token

        // NOTE: helpers for smooth API transition - can be deprecated in the future
        //       for future-proof code, use the above fields instead and ignore everything below