mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-07 09:57:00 +00:00
llama: store mrope data in KV cell (#16825)
* llama: store mrope data in KV cell * correct x,y ordering * address review comments * add consistency checks * Update src/llama-kv-cache.cpp Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * add TODO * fix asan error * kv-cells : improve ext handling * cont : fix headers --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
@@ -5,6 +5,15 @@
|
||||
|
||||
#include "llama.h"
|
||||
|
||||
// fix problem with std::min and std::max
|
||||
#if defined(_WIN32)
|
||||
#define WIN32_LEAN_AND_MEAN
|
||||
#ifndef NOMINMAX
|
||||
# define NOMINMAX
|
||||
#endif
|
||||
#include <windows.h>
|
||||
#endif
|
||||
|
||||
#include <algorithm>
|
||||
#include <cerrno>
|
||||
#include <cstdio>
|
||||
@@ -1031,7 +1040,9 @@ const char * mtmd_image_tokens_get_id(const mtmd_image_tokens * image_tokens) {
|
||||
|
||||
llama_pos mtmd_image_tokens_get_n_pos(const mtmd_image_tokens * image_tokens) {
|
||||
if (image_tokens->use_mrope_pos) {
|
||||
return 1; // for M-RoPE, the whole image is 1 in temporal dimension
|
||||
// for M-RoPE, temporal dimension = max(t,h,w)
|
||||
// t is omitted as we don't support video input
|
||||
return std::max(image_tokens->nx, image_tokens->ny);
|
||||
}
|
||||
return image_tokens->n_tokens();
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user