metal : replace loop of dispatch_async with dispatch_apply (#4934)

* Replace loop of dispatch_async with dispatch_apply * Update ggml-metal.m --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
2025-10-30 08:42:00 +00:00 · 2024-01-16 14:41:27 +01:00
parent 7c8d3abd1a
commit 3a48d558a6
1 changed files with 1439 additions and 1443 deletions
--- a/ggml-metal.m
+++ b/ggml-metal.m
@@ -737,10 +737,10 @@ static bool ggml_metal_graph_compute(
        ctx->command_encoders[i] = [ctx->command_buffers[i] computeCommandEncoderWithDescriptor: edesc];
    }

-    for (int cb_idx = 0; cb_idx < n_cb; ++cb_idx) {
    const int n_nodes_per_cb = (n_nodes + n_cb - 1) / n_cb;
+    dispatch_apply(n_cb, ctx->d_queue, ^(size_t iter) {
+        const int cb_idx = iter;

-        dispatch_async(ctx->d_queue, ^{
        size_t offs_src0 = 0;
        size_t offs_src1 = 0;
        size_t offs_dst  = 0;
@@ -2248,10 +2248,6 @@ static bool ggml_metal_graph_compute(

        [command_buffer commit];
    });
-    }
-
-    // wait for all threads to finish
-    dispatch_barrier_sync(ctx->d_queue, ^{});

    // check status of command buffers
    // needed to detect if the device ran out-of-memory for example (#1881)