llama : avoid redundant state copy for Mamba 1 and 2

2025-11-04 09:32:00 +00:00 · 2024-09-30 15:52:42 -04:00
parent 0e601cafe9
commit 273e7a495a
4 changed files with 142 additions and 119 deletions
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@@ -1833,7 +1833,8 @@ extern "C" {
            struct ggml_tensor  * A,
            struct ggml_tensor  * B,
            struct ggml_tensor  * C,
-            struct ggml_tensor  * D);
+            struct ggml_tensor  * D,
+            struct ggml_tensor  * ids);

    // partition into non-overlapping windows with padding if needed
    // example: