CANN: Optimize RMS_NORM using cache (#15419)

* [CANN] Optimize RMS_NORM using cache Signed-off-by: noemotiovon <757486878@qq.com> * fix typo Signed-off-by: noemotiovon <757486878@qq.com> * fix review comment Signed-off-by: noemotiovon <757486878@qq.com> * codestyle adjustment Signed-off-by: noemotiovon <757486878@qq.com> --------- Signed-off-by: noemotiovon <757486878@qq.com>
2025-11-17 11:37:10 +00:00 · 2025-08-22 14:12:07 +08:00
parent 54a241f505
commit a0f98dd604
2 changed files with 121 additions and 36 deletions
--- a/ggml/src/ggml-cann/common.h
+++ b/ggml/src/ggml-cann/common.h
@@ -379,6 +379,10 @@ struct ggml_backend_cann_context {
    cann_task_queue task_queue;
    bool async_mode;
    bool support_set_rows;
+    void* f32_zero_cache = nullptr;
+    void* f32_one_cache = nullptr;
+    int64_t f32_zero_cache_element = 0;
+    int64_t f32_one_cache_element = 0;

    aclrtStream streams[GGML_CANN_MAX_STREAMS] = {nullptr}; /**< Array of streams for the device. */