llama: store mrope data in KV cell (#16825)

* llama: store mrope data in KV cell * correct x,y ordering * address review comments * add consistency checks * Update src/llama-kv-cache.cpp Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * add TODO * fix asan error * kv-cells : improve ext handling * cont : fix headers --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
2025-11-14 11:07:10 +00:00 · 2025-10-29 18:09:18 +01:00
parent 10fcc41290
commit e3af5563bd
6 changed files with 144 additions and 33 deletions
--- a/src/llama-kv-cells.h
+++ b/src/llama-kv-cells.h
@@ -5,9 +5,27 @@

 #include <bitset>
 #include <cassert>
-#include <vector>
-#include <set>
+#include <cstring>
 #include <map>
+#include <set>
+#include <vector>
+
+struct llama_kv_cell_ext {
+    // 2D spatial positions, typically used for M-RoPE
+    llama_pos x = 0;
+    llama_pos y = 0;
+
+    // return true if the current 2D spatial position is greater than other
+    bool is_2d_gt(llama_pos ox, llama_pos oy) const {
+        return (y > oy) || (y == oy && x > ox);
+    }
+
+    void reset() {
+        static_assert(std::is_trivially_copyable_v<llama_kv_cell_ext>);
+
+        memset(this, 0, sizeof(*this));
+    }
+};

 // meta information about KV cells that can be part of multiple sequences at the same time
 // TODO: add unit tests
@@ -16,6 +34,7 @@ public:
    void reset() {
        for (uint32_t i = 0; i < pos.size(); ++i) {
            pos[i]   = -1;
+            ext[i].reset();
            shift[i] =  0;
            seq[i].reset();
        }
@@ -43,6 +62,7 @@ public:

    void resize(uint32_t n) {
        pos.resize(n);
+        ext.resize(n);
        shift.resize(n);
        seq.resize(n);

@@ -108,6 +128,7 @@ public:
            const auto idx = i + j;

            res.pos[j] = pos[idx];
+            res.ext[j] = ext[idx];
            res.seq[j] = seq[idx];

            assert(shift[idx] == 0);
@@ -126,6 +147,7 @@ public:
            const auto idx = idxs[j];

            res.pos[j] = pos[idx];
+            res.ext[j] = ext[idx];
            res.seq[j] = seq[idx];

            assert(shift[idx] == 0);
@@ -154,6 +176,7 @@ public:
            }

            pos[idx] = other.pos[j];
+            ext[idx] = other.ext[j];
            seq[idx] = other.seq[j];

            if (pos[idx] != -1) {
@@ -184,6 +207,7 @@ public:
            }

            pos[idx] = other.pos[j];
+            ext[idx] = other.ext[j];
            seq[idx] = other.seq[j];

            if (pos[idx] != -1) {
@@ -203,6 +227,7 @@ public:
        seq[i].reset();

        pos[i] = -1;
+        ext[i].reset();
        shift[i] = 0;

        used.erase(i);
@@ -221,6 +246,7 @@ public:

        if (seq[i].none()) {
            pos[i] = -1;
+            ext[i].reset();
            shift[i] = 0;

            used.erase(i);
@@ -250,6 +276,7 @@ public:
            seq[i].reset();

            pos[i] = -1;
+            ext[i].reset();
            shift[i] = 0;

            used.erase(i);
@@ -340,6 +367,13 @@ public:
        return pos[i];
    }

+    const llama_kv_cell_ext & ext_get(uint32_t i) const {
+        assert(i < pos.size());
+        assert(pos[i] != -1);
+
+        return ext[i];
+    }
+
    // note: call only if the cell is not empty
    llama_pos get_shift(uint32_t i) const {
        assert(i < pos.size());
@@ -368,6 +402,11 @@ public:
        used.insert(i);
    }

+    void ext_set(uint32_t i, llama_kv_cell_ext p) {
+        assert(i < ext.size());
+        ext[i] = p;
+    }
+
    // pos[i] = pos[i] + d
    // sets "has_shift" to true
    // note: call only if the cell is not empty
@@ -424,6 +463,9 @@ private:

    std::vector<llama_pos> pos;

+    // stores extra info per cell
+    std::vector<llama_kv_cell_ext> ext;
+
    // this array accumulates any applied shifts to the pos array since the last reset_shift() call
    // this is used to queue multiple updates to the pos array, which in the end can be applied in one go:
    //