diff --git a/src/llama-sampling.cpp b/src/llama-sampling.cpp index bbc31d9af4..bfbf5fa230 100644 --- a/src/llama-sampling.cpp +++ b/src/llama-sampling.cpp @@ -909,7 +909,7 @@ static void llama_sampler_typical_apply(struct llama_sampler * smpl, llama_token cum_sum += cur_p->data[idx].p; // Check if the running sum is greater than typical or if we have kept at least min_keep tokens - if (cum_sum > ctx->p && i >= ctx->min_keep - 1) { + if (cum_sum > ctx->p && (ctx->min_keep == 0 || i >= ctx->min_keep - 1)) { last_idx = i + 1; break; }