llama: store mrope data in KV cell (#16825)

* llama: store mrope data in KV cell

* correct x,y ordering

* address review comments

* add consistency checks

* Update src/llama-kv-cache.cpp

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

* add TODO

* fix asan error

* kv-cells : improve ext handling

* cont : fix headers

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
Xuan-Son Nguyen
2025-10-29 18:09:18 +01:00
committed by GitHub
parent 10fcc41290
commit e3af5563bd
6 changed files with 144 additions and 33 deletions

View File

@@ -5,6 +5,15 @@
#include "llama.h"
// fix problem with std::min and std::max
#if defined(_WIN32)
#define WIN32_LEAN_AND_MEAN
#ifndef NOMINMAX
# define NOMINMAX
#endif
#include <windows.h>
#endif
#include <algorithm>
#include <cerrno>
#include <cstdio>
@@ -1031,7 +1040,9 @@ const char * mtmd_image_tokens_get_id(const mtmd_image_tokens * image_tokens) {
llama_pos mtmd_image_tokens_get_n_pos(const mtmd_image_tokens * image_tokens) {
if (image_tokens->use_mrope_pos) {
return 1; // for M-RoPE, the whole image is 1 in temporal dimension
// for M-RoPE, temporal dimension = max(t,h,w)
// t is omitted as we don't support video input
return std::max(image_tokens->nx, image_tokens->ny);
}
return image_tokens->n_tokens();
}