Very minor speedup via simd-group synchronization in f16 x f32

This commit is contained in:
Iwan Kawrakow
2023-09-01 13:21:50 +03:00
parent 69fdbb9abc
commit 2cb47e0e16
2 changed files with 8 additions and 37 deletions

View File

@@ -971,7 +971,7 @@ void ggml_metal_graph_compute(
else if (src0t == GGML_TYPE_Q6_K) {
[encoder dispatchThreadgroups:MTLSizeMake((ne01 + 1)/2, ne11, ne12) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
} else {
[encoder setThreadgroupMemoryLength:nth0*sizeof(float) atIndex:0];
//[encoder setThreadgroupMemoryLength:nth0*sizeof(float) atIndex:0];
[encoder dispatchThreadgroups:MTLSizeMake(ne01, ne11, ne12) threadsPerThreadgroup:MTLSizeMake(nth0, nth1, 1)];
}
}