Very minor speedup via simd-group synchronization in f16 x f32

2025-11-16 11:27:03 +00:00 · 2023-09-01 13:21:50 +03:00
parent 69fdbb9abc
commit 2cb47e0e16
2 changed files with 8 additions and 37 deletions
--- a/ggml-metal.m
+++ b/ggml-metal.m
@@ -971,7 +971,7 @@ void ggml_metal_graph_compute(
                                else if (src0t == GGML_TYPE_Q6_K) {
                                    [encoder dispatchThreadgroups:MTLSizeMake((ne01 + 1)/2, ne11, ne12) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
                                } else {
-                                    [encoder setThreadgroupMemoryLength:nth0*sizeof(float) atIndex:0];
+                                    //[encoder setThreadgroupMemoryLength:nth0*sizeof(float) atIndex:0];
                                    [encoder dispatchThreadgroups:MTLSizeMake(ne01, ne11, ne12) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
                                }
                            }