Merge branch 'master' into compilade/refactor-kv-cache

2025-11-18 11:46:58 +00:00 · 2024-06-08 17:30:49 -04:00
parent fee3c1d740 d4d915d351
commit 6840ac0bca
242 changed files with 83873 additions and 22119 deletions
--- a/ggml.h
+++ b/ggml.h
@@ -1465,7 +1465,6 @@ extern "C" {
    // rotary position embedding
    // if mode & 1 == 1, skip n_past elements (NOT SUPPORTED)
    // if mode & 2 == 1, GPT-NeoX style
-    // if mode & 4 == 1, ChatGLM style
    //
    // b is an int32 vector with size a->ne[2], it contains the positions
    // c is freq factors (e.g. phi3-128k), (optional)
@@ -1474,8 +1473,7 @@ extern "C" {
            struct ggml_tensor  * a,
            struct ggml_tensor  * b,
            int                   n_dims,
-            int                   mode,
-            int                   n_ctx);
+            int                   mode);

    // in-place, returns view(a)
    GGML_API struct ggml_tensor * ggml_rope_inplace(
@@ -1483,8 +1481,7 @@ extern "C" {
            struct ggml_tensor  * a,
            struct ggml_tensor  * b,
            int                   n_dims,
-            int                   mode,
-            int                   n_ctx);
+            int                   mode);

    // custom RoPE
    GGML_API struct ggml_tensor * ggml_rope_ext(
@@ -1494,8 +1491,7 @@ extern "C" {
            struct ggml_tensor  * c,
            int                   n_dims,
            int                   mode,
-            int                   n_ctx,
-            int                   n_orig_ctx,
+            int                   n_ctx_orig,
            float                 freq_base,
            float                 freq_scale,
            float                 ext_factor,
@@ -1511,8 +1507,7 @@ extern "C" {
            struct ggml_tensor  * c,
            int                   n_dims,
            int                   mode,
-            int                   n_ctx,
-            int                   n_orig_ctx,
+            int                   n_ctx_orig,
            float                 freq_base,
            float                 freq_scale,
            float                 ext_factor,
@@ -1526,8 +1521,7 @@ extern "C" {
            struct ggml_tensor  * b,
            int                   n_dims,
            int                   mode,
-            int                   n_ctx,
-            int                   n_orig_ctx,
+            int                   n_ctx_orig,
            float                 freq_base,
            float                 freq_scale,
            float                 ext_factor,
@@ -1542,8 +1536,7 @@ extern "C" {
            struct ggml_tensor  * b,
            int                   n_dims,
            int                   mode,
-            int                   n_ctx,
-            int                   n_orig_ctx,
+            int                   n_ctx_orig,
            float                 freq_base,
            float                 freq_scale,
            float                 ext_factor,
@@ -1552,17 +1545,9 @@ extern "C" {
            float                 beta_slow),
        "use ggml_rope_ext_inplace instead");

-    struct ggml_tensor * ggml_rope_xpos_inplace(
-        struct ggml_context * ctx,
-        struct ggml_tensor  * a,
-        struct ggml_tensor  * b,
-        int                   n_dims,
-        float                 base,
-        bool                  down);
-
    // compute correction dims for YaRN RoPE scaling
    GGML_CALL void ggml_rope_yarn_corr_dims(
-        int n_dims, int n_orig_ctx, float freq_base, float beta_fast, float beta_slow, float dims[2]);
+        int n_dims, int n_ctx_orig, float freq_base, float beta_fast, float beta_slow, float dims[2]);

    // rotary position embedding backward, i.e compute dx from dy
    // a - dy
@@ -1573,16 +1558,13 @@ extern "C" {
            struct ggml_tensor  * c,
            int                   n_dims,
            int                   mode,
-            int                   n_ctx,
-            int                   n_orig_ctx,
+            int                   n_ctx_orig,
            float                 freq_base,
            float                 freq_scale,
            float                 ext_factor,
            float                 attn_factor,
            float                 beta_fast,
-            float                 beta_slow,
-            float                 xpos_base,
-            bool                  xpos_down);
+            float                 beta_slow);

    // clamp
    // in-place, returns view(a)
@@ -2422,7 +2404,6 @@ extern "C" {
    GGML_API int ggml_cpu_has_wasm_simd  (void);
    GGML_API int ggml_cpu_has_blas       (void);
    GGML_API int ggml_cpu_has_cuda       (void);
-    GGML_API int ggml_cpu_has_clblast    (void);
    GGML_API int ggml_cpu_has_vulkan     (void);
    GGML_API int ggml_cpu_has_kompute    (void);
    GGML_API int ggml_cpu_has_gpublas    (void);