llama : more robust cell_max heuristic + wip shift

2025-11-01 09:01:57 +00:00 · 2023-09-18 17:15:25 +03:00
parent 4d76d762ef
commit f015b26689
3 changed files with 39 additions and 52 deletions
--- a/examples/llama-bench/llama-bench.cpp
+++ b/examples/llama-bench/llama-bench.cpp
@@ -977,6 +977,8 @@ int main(int argc, char ** argv) {

        test t(inst, lmodel, ctx);

+        llama_kv_cache_keep_seq(ctx, -1);
+
        // warmup run
        if (t.n_prompt > 0) {
            test_prompt(ctx, std::min(2, t.n_batch), 0, t.n_batch, t.n_threads);
@@ -986,6 +988,8 @@ int main(int argc, char ** argv) {
        }

        for (int i = 0; i < params.reps; i++) {
+            llama_kv_cache_keep_seq(ctx, -1);
+
            uint64_t t_start = get_time_ns();
            if (t.n_prompt > 0) {
                test_prompt(ctx, t.n_prompt, 0, t.n_batch, t.n_threads);