Tests for min_p, sampling queue (#5147)

2025-11-01 09:01:57 +00:00 · 2024-01-28 09:35:14 +01:00
parent af4980bfed
commit b2b2bf988c
2 changed files with 159 additions and 15 deletions
--- a/llama.cpp
+++ b/llama.cpp
@@ -8133,6 +8133,11 @@ void llama_sample_softmax(struct llama_context * ctx, llama_token_data_array * c
 }

 void llama_sample_top_k(struct llama_context * ctx, llama_token_data_array * candidates, int32_t k, size_t min_keep) {
+    // TODO: move bucket sort to separate function so that top_p/tail_free/typical/softmax first is equally fast
+    // if (k >= (int32_t)candidates->size) {
+    //     return;
+    // }
+
    const int64_t t_start_sample_us = ggml_time_us();

    k = std::max(k, (int) min_keep);