First draft

2025-11-13 10:57:15 +00:00 · 2025-09-18 00:21:17 +02:00
parent cd08fc3ecc
commit 344331c2b6
11 changed files with 856 additions and 10 deletions
--- a/ggml/include/ggml.h
+++ b/ggml/include/ggml.h
@@ -539,7 +539,8 @@ extern "C" {
        GGML_OP_RWKV_WKV6,
        GGML_OP_GATED_LINEAR_ATTN,
        GGML_OP_RWKV_WKV7,
-
+        GGML_OP_DELTA_NET,
+    
        GGML_OP_UNARY,

        GGML_OP_MAP_CUSTOM1,
@@ -2278,6 +2279,31 @@ extern "C" {
            struct ggml_tensor  * state,
            float scale);

+    // Delta-Net linear layer activation
+    // Implements the complete Delta-Net gated linear attention mechanism
+    // This includes causal convolution preprocessing and gated delta rule computation
+    // k, v, q, g: [S, H, n_tokens, n_seqs] - key, value, query, gate tensors
+    // conv_weight: [conv_dim, 1, conv_kernel_size] - convolution kernel weights
+    // conv_bias: [conv_dim] - convolution bias (optional, can be NULL)
+    // beta: [H, n_tokens, n_seqs] - beta parameter for delta rule
+    // state: [S, S, H, n_seqs] - recurrent state tensor
+    // chunk_size: chunk size for chunked computation (0 for recurrent mode)
+    // use_qk_l2norm: whether to apply L2 normalization to query and key
+    // scale: attention scaling factor
+    GGML_API struct ggml_tensor * ggml_delta_net(
+            struct ggml_context * ctx,
+            struct ggml_tensor  * k,
+            struct ggml_tensor  * v,
+            struct ggml_tensor  * q,
+            struct ggml_tensor  * g,
+            struct ggml_tensor  * conv_weight,
+            struct ggml_tensor  * conv_bias,
+            struct ggml_tensor  * beta,
+            struct ggml_tensor  * state,
+            int                   chunk_size,
+            bool                  use_qk_l2norm,
+            float                 scale);
+
    GGML_API struct ggml_tensor * ggml_rwkv_wkv7(
            struct ggml_context * ctx,
            struct ggml_tensor  * r,