ggml webgpu: add support for soft_max, optimize rms_norm (#16357)

* Add inplace softmax * Move rms_norm to split row approach * Update debug for supports_op * clean up debug statements * Update tests/test-backend-ops.cpp Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
2025-11-09 10:17:06 +00:00 · 2025-10-02 11:00:31 -07:00
parent 34fcc5a4ac
commit ef07a40906
6 changed files with 566 additions and 48 deletions
--- a/ggml/src/ggml.c
+++ b/ggml/src/ggml.c
@@ -3852,6 +3852,15 @@ struct ggml_tensor * ggml_soft_max_ext(
    return ggml_soft_max_impl(ctx, a, mask, scale, max_bias, false);
 }

+struct ggml_tensor * ggml_soft_max_ext_inplace(
+        struct ggml_context * ctx,
+        struct ggml_tensor  * a,
+        struct ggml_tensor  * mask,
+        float                 scale,
+        float                 max_bias) {
+    return ggml_soft_max_impl(ctx, a, mask, scale, max_bias, true);
+}
+
 void ggml_soft_max_add_sinks(
        struct ggml_tensor * a,
        struct ggml_tensor * sinks) {