server : improve context checkpoint logic (#16440)

This commit is contained in:
Georgi Gerganov
2025-10-08 10:57:29 +03:00
committed by GitHub
parent 74b8fc17f9
commit 7fdd16b432
2 changed files with 56 additions and 35 deletions

View File

@@ -861,9 +861,12 @@ void llama_memory_recurrent::state_write_data(llama_io_write_i & io, const std::
bool llama_memory_recurrent::state_read_meta(llama_io_read_i & io, uint32_t cell_count, llama_seq_id dest_seq_id) {
if (dest_seq_id != -1) {
// single sequence
seq_rm(dest_seq_id, -1, -1);
if (cell_count == 0) {
return true;
}
llama_batch_allocr balloc(hparams.n_pos_per_embd());
llama_ubatch ubatch = balloc.ubatch_reserve(cell_count, 1);