mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-27 08:21:30 +00:00
server : improve context checkpoint logic (#16440)
This commit is contained in:
@@ -861,9 +861,12 @@ void llama_memory_recurrent::state_write_data(llama_io_write_i & io, const std::
|
||||
bool llama_memory_recurrent::state_read_meta(llama_io_read_i & io, uint32_t cell_count, llama_seq_id dest_seq_id) {
|
||||
if (dest_seq_id != -1) {
|
||||
// single sequence
|
||||
|
||||
seq_rm(dest_seq_id, -1, -1);
|
||||
|
||||
if (cell_count == 0) {
|
||||
return true;
|
||||
}
|
||||
|
||||
llama_batch_allocr balloc(hparams.n_pos_per_embd());
|
||||
|
||||
llama_ubatch ubatch = balloc.ubatch_reserve(cell_count, 1);
|
||||
|
||||
Reference in New Issue
Block a user