ggml webgpu: add support for soft_max, optimize rms_norm (#16357)

* Add inplace softmax

* Move rms_norm to split row approach

* Update debug for supports_op

* clean up debug statements

* Update tests/test-backend-ops.cpp

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
Reese Levine
2025-10-02 11:00:31 -07:00
committed by GitHub
parent 34fcc5a4ac
commit ef07a40906
6 changed files with 566 additions and 48 deletions

View File

@@ -3852,6 +3852,15 @@ struct ggml_tensor * ggml_soft_max_ext(
return ggml_soft_max_impl(ctx, a, mask, scale, max_bias, false);
}
struct ggml_tensor * ggml_soft_max_ext_inplace(
struct ggml_context * ctx,
struct ggml_tensor * a,
struct ggml_tensor * mask,
float scale,
float max_bias) {
return ggml_soft_max_impl(ctx, a, mask, scale, max_bias, true);
}
void ggml_soft_max_add_sinks(
struct ggml_tensor * a,
struct ggml_tensor * sinks) {