batch : fix consistency checks for the input positions (#16890)

This commit is contained in:
Georgi Gerganov
2025-10-31 13:50:33 +02:00
committed by GitHub
parent c22473b580
commit 8da3c0e200

View File

@@ -261,6 +261,7 @@ bool llama_batch_allocr::init(
const llama_pos p0 = memory ? memory->seq_pos_max(s) : -1; const llama_pos p0 = memory ? memory->seq_pos_max(s) : -1;
if (batch.token) {
if (p0 >= 0 && p0 >= seq_pos_min(s)) { if (p0 >= 0 && p0 >= seq_pos_min(s)) {
LLAMA_LOG_ERROR( LLAMA_LOG_ERROR(
"%s: the tokens of sequence %d in the input batch have inconsistent sequence positions:\n" "%s: the tokens of sequence %d in the input batch have inconsistent sequence positions:\n"
@@ -271,6 +272,19 @@ bool llama_batch_allocr::init(
return false; return false;
} }
} else {
// embedding inputs can have overlapping positions
if (p0 >= 0 && p0 > seq_pos_min(s)) {
LLAMA_LOG_ERROR(
"%s: the tokens of sequence %d in the input batch have inconsistent sequence positions:\n"
" - the last position stored in the memory module of the context (i.e. the KV cache) for sequence %d is X = %d\n"
" - the tokens for sequence %d in the input batch have a starting position of Y = %d\n"
" for M-RoPE, it is required that the position satisfies: X <= Y\n",
__func__, s, s, p0, s, seq_pos_min(s));
return false;
}
}
} }
} else { } else {
for (uint32_t s = 0; s < n_seq_max; ++s) { for (uint32_t s = 0; s < n_seq_max; ++s) {