speculative : add infill mode

ggml-ci
2025-11-01 09:01:57 +00:00 · 2024-11-26 11:14:17 +02:00
parent 0eb4e12bee
commit b83cae088c
3 changed files with 26 additions and 22 deletions
--- a/common/speculative.h
+++ b/common/speculative.h
@@ -10,6 +10,8 @@ struct common_speculative_params {
    int n_reuse = 256;

    float p_min = 0.9f; // min probabiliy required to accept a token in the draft
+
+    bool infill = false; // use infill sampling (useful for FIM)
 };

 struct common_speculative * common_speculative_init(struct llama_context * ctx_dft);