vulkan: fix rms_norm_mul to handle broadcasting dim0 (#14817)

This commit is contained in:
Jeff Bolz
2025-07-22 10:35:21 -05:00
committed by GitHub
parent d4d1522b20
commit 84712b6043
2 changed files with 9 additions and 3 deletions

View File

@@ -50,8 +50,14 @@ void main() {
const FLOAT_TYPE scale = inversesqrt(mean + FLOAT_TYPE(p.param1));
if (do_multiply) {
[[unroll]] for (uint col = tid; col < ncols; col += BLOCK_SIZE) {
data_d[d_offset + col] = D_TYPE(scale * FLOAT_TYPE(data_a[a_offset + col]) * FLOAT_TYPE(data_b[b_offset + col]));
if (ncols > p.ne10) {
[[unroll]] for (uint col = tid; col < ncols; col += BLOCK_SIZE) {
data_d[d_offset + col] = D_TYPE(scale * FLOAT_TYPE(data_a[a_offset + col]) * FLOAT_TYPE(data_b[b_offset + fastmod(col, p.ne10)]));
}
} else {
[[unroll]] for (uint col = tid; col < ncols; col += BLOCK_SIZE) {
data_d[d_offset + col] = D_TYPE(scale * FLOAT_TYPE(data_a[a_offset + col]) * FLOAT_TYPE(data_b[b_offset + col]));
}
}
} else {
[[unroll]] for (uint col = tid; col < ncols; col += BLOCK_SIZE) {