mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	metal : replace loop of dispatch_async with dispatch_apply (#4934)
* Replace loop of dispatch_async with dispatch_apply * Update ggml-metal.m --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
		| @@ -737,10 +737,10 @@ static bool ggml_metal_graph_compute( | ||||
|         ctx->command_encoders[i] = [ctx->command_buffers[i] computeCommandEncoderWithDescriptor: edesc]; | ||||
|     } | ||||
|  | ||||
|     for (int cb_idx = 0; cb_idx < n_cb; ++cb_idx) { | ||||
|     const int n_nodes_per_cb = (n_nodes + n_cb - 1) / n_cb; | ||||
|     dispatch_apply(n_cb, ctx->d_queue, ^(size_t iter) { | ||||
|         const int cb_idx = iter; | ||||
|  | ||||
|         dispatch_async(ctx->d_queue, ^{ | ||||
|         size_t offs_src0 = 0; | ||||
|         size_t offs_src1 = 0; | ||||
|         size_t offs_dst  = 0; | ||||
| @@ -2248,10 +2248,6 @@ static bool ggml_metal_graph_compute( | ||||
|  | ||||
|         [command_buffer commit]; | ||||
|     }); | ||||
|     } | ||||
|  | ||||
|     // wait for all threads to finish | ||||
|     dispatch_barrier_sync(ctx->d_queue, ^{}); | ||||
|  | ||||
|     // check status of command buffers | ||||
|     // needed to detect if the device ran out-of-memory for example (#1881) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Alex Azarov
					Alex Azarov