server : fix "can batch with" bug (#17263)

This commit is contained in:
Georgi Gerganov
2025-11-14 14:03:45 +02:00
committed by GitHub
parent 45c6ef7307
commit d396b43748

View File

@@ -3591,13 +3591,13 @@ struct server_context {
// next, batch any pending prompts without exceeding n_batch
if (params_base.cont_batching || batch.n_tokens == 0) {
for (auto & slot : slots) {
if (!slot.is_processing()) {
continue;
}
// check if we can batch this slot with the previous one
if (slot.is_processing()) {
if (!slot_batched) {
slot_batched = &slot;
} else if (!slot_batched->can_batch_with(slot)) {
continue;
}
if (slot_batched && !slot_batched->can_batch_with(slot)) {
continue;
}
// this slot still has a prompt to be processed
@@ -4028,6 +4028,10 @@ struct server_context {
}
}
if (!slot_batched) {
slot_batched = &slot;
}
if (batch.n_tokens >= n_batch) {
break;
}