From fe12a5d47e422009ca49c534e94c95cec917dcd7 Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Tue, 27 May 2025 11:24:40 +0300 Subject: [PATCH] sampling : min-p should always return at least one token ggml-ci --- src/llama-sampling.cpp | 2 +- tests/test-sampling.cpp | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp index 804b11e0a9..bbc31d9af4 100644 --- a/src/llama-sampling.cpp +++ b/src/llama-sampling.cpp @@ -798,7 +798,7 @@ static void llama_sampler_min_p_apply(struct llama_sampler * smpl, llama_token_d } // if we have enough values the operation was a success - if (filtered_tokens.size() >= ctx->min_keep) { + if (!filtered_tokens.empty() && filtered_tokens.size() >= ctx->min_keep) { memcpy(cur_p->data, filtered_tokens.data(), filtered_tokens.size()*sizeof(llama_token_data)); cur_p->size = filtered_tokens.size(); min_p_applied = true; diff --git a/tests/test-sampling.cpp b/tests/test-sampling.cpp index 60ac62b385..8642aeefdb 100644 --- a/tests/test-sampling.cpp +++ b/tests/test-sampling.cpp @@ -109,7 +109,7 @@ static void test_min_p(const std::vector & probs, const std::vector