metal : optimize multi-sequence FA vec kernel

ggml-ci
This commit is contained in:
Georgi Gerganov
2025-05-13 08:03:27 +03:00
parent f078c79865
commit fdfc7de7fc

View File

@@ -3887,6 +3887,11 @@ kernel void kernel_flash_attn_ext_vec(
sm[tiisg] = pm[ic + tiisg];
}
// skip -INF blocks
if (simd_max(sm[tiisg]) == -INFINITY) {
continue;
}
// Q*K^T
{
// each simdgroup processes 1 query and NE (NW/NL) head elements