llama : improve llama_batch API + simplify parallel example

2025-11-01 09:01:57 +00:00 · 2023-09-20 10:46:18 +03:00
parent a1327c71c6
commit addae65fd4
6 changed files with 111 additions and 70 deletions
--- a/examples/perplexity/perplexity.cpp
+++ b/examples/perplexity/perplexity.cpp
@@ -419,7 +419,7 @@ static results_perplexity perplexity(llama_context * ctx, const gpt_params & par
 }

 static std::vector<float> hellaswag_evaluate_tokens(
-    llama_context * ctx, const std::vector<int>& tokens, int n_past, int n_batch, int n_vocab, int n_thread
+    llama_context * ctx, std::vector<int> & tokens, int n_past, int n_batch, int n_vocab, int n_thread
 ) {
    std::vector<float> result;
    result.reserve(tokens.size() * n_vocab);