From fe12a5d47e422009ca49c534e94c95cec917dcd7 Mon Sep 17 00:00:00 2001
From: Georgi Gerganov <ggerganov@gmail.com>
Date: Tue, 27 May 2025 11:24:40 +0300
Subject: [PATCH] sampling : min-p should always return at least one token

ggml-ci
---
 src/llama-sampling.cpp  | 2 +-
 tests/test-sampling.cpp | 3 ++-
 2 files changed, 3 insertions(+), 2 deletions(-)
diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp
index 804b11e0a9..bbc31d9af4 100644
--- a/src/llama-sampling.cpp
+++ b/src/llama-sampling.cpp
@@ -798,7 +798,7 @@ static void llama_sampler_min_p_apply(struct llama_sampler * smpl, llama_token_d
         }
 
         // if we have enough values the operation was a success
-        if (filtered_tokens.size() >= ctx->min_keep) {
+        if (!filtered_tokens.empty() && filtered_tokens.size() >= ctx->min_keep) {
             memcpy(cur_p->data, filtered_tokens.data(), filtered_tokens.size()*sizeof(llama_token_data));
             cur_p->size = filtered_tokens.size();
             min_p_applied = true;
diff --git a/tests/test-sampling.cpp b/tests/test-sampling.cpp
index 60ac62b385..8642aeefdb 100644
--- a/tests/test-sampling.cpp
+++ b/tests/test-sampling.cpp
@@ -109,7 +109,7 @@ static void test_min_p(const std::vector<float> & probs, const std::vector<float
     sampler_tester tester(probs, probs_expected);
 
     DUMP(&tester.cur_p);
-    tester.apply(llama_sampler_init_min_p(p, 1));
+    tester.apply(llama_sampler_init_min_p(p, 0));
     tester.apply(llama_sampler_init_dist (0));
     DUMP(&tester.cur_p);
 
@@ -332,6 +332,7 @@ int main(void) {
     test_min_p({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f/0.7f, 0.3f/0.7f},                       0.74f);
     test_min_p({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f/0.4f},                                  0.76f);
     test_min_p({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f/0.4f},                                  1.00f);
+    test_min_p({0.1f, 0.2f, 0.3f, 0.4f}, {0.4f/0.4f},                                  1.05f);
 
     printf("XTC should:\n");
     test_xtc({0.4f, 0.3f, 0.2f, 0.1f},   {0.1f},                                0.99f, 0.09f);