llama : unified KV cache + batch inference API

2025-11-20 12:07:33 +00:00 · 2023-09-18 10:08:22 +03:00
parent fad56936d4
commit d29e76937c
10 changed files with 315 additions and 236 deletions
--- a/examples/main/main.cpp
+++ b/examples/main/main.cpp
@@ -198,15 +198,6 @@ int main(int argc, char ** argv) {
                params.n_threads, std::thread::hardware_concurrency(), llama_print_system_info());
    }

-    // export the cgraph and exit
-    if (params.export_cgraph) {
-        llama_eval_export(ctx, "llama.ggml");
-        llama_free(ctx);
-        llama_free_model(model);
-
-        return 0;
-    }
-
    std::string path_session = params.path_prompt_cache;
    std::vector<llama_token> session_tokens;