mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-16 11:27:03 +00:00
server : fix "can batch with" bug (#17263)
This commit is contained in:
@@ -3591,13 +3591,13 @@ struct server_context {
|
|||||||
// next, batch any pending prompts without exceeding n_batch
|
// next, batch any pending prompts without exceeding n_batch
|
||||||
if (params_base.cont_batching || batch.n_tokens == 0) {
|
if (params_base.cont_batching || batch.n_tokens == 0) {
|
||||||
for (auto & slot : slots) {
|
for (auto & slot : slots) {
|
||||||
|
if (!slot.is_processing()) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
// check if we can batch this slot with the previous one
|
// check if we can batch this slot with the previous one
|
||||||
if (slot.is_processing()) {
|
if (slot_batched && !slot_batched->can_batch_with(slot)) {
|
||||||
if (!slot_batched) {
|
continue;
|
||||||
slot_batched = &slot;
|
|
||||||
} else if (!slot_batched->can_batch_with(slot)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// this slot still has a prompt to be processed
|
// this slot still has a prompt to be processed
|
||||||
@@ -4028,6 +4028,10 @@ struct server_context {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (!slot_batched) {
|
||||||
|
slot_batched = &slot;
|
||||||
|
}
|
||||||
|
|
||||||
if (batch.n_tokens >= n_batch) {
|
if (batch.n_tokens >= n_batch) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user