metal : reduce command encoding overhead (#9698)

* metal : reduce command encoding overhead ggml-ci * metal : add comments
2025-11-03 09:22:01 +00:00 · 2024-10-01 16:00:25 +03:00
parent a90484c6d9
commit cad341d889
5 changed files with 2000 additions and 1912 deletions
--- a/src/llama.cpp
+++ b/src/llama.cpp
@@ -17025,12 +17025,6 @@ static void llama_graph_compute(
            ggml_cgraph * gf,
                    int   n_threads,
        ggml_threadpool * threadpool) {
-#ifdef GGML_USE_METAL
-    if (ggml_backend_is_metal(lctx.backend_metal)) {
-        ggml_backend_metal_set_n_cb(lctx.backend_metal, n_threads);
-    }
-#endif
-
    if (lctx.backend_cpu != nullptr) {
        ggml_backend_cpu_set_n_threads(lctx.backend_cpu, n_threads);
        ggml_backend_cpu_set_threadpool(lctx.backend_cpu, threadpool);