mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-05 09:36:52 +00:00
server : add SWA checkpoints (#15293)
* server : add SWA checkpoints ggml-ci * cont : server clean-up * server : handle state restore fails * llama : add extended llama_state_seq_ API * server : do not make checkpoints if --swa-full ggml-ci * llama : remove flags value for NONE * server : configure number of SWA checkpoints with CLI arg ggml-ci * args : fix scope of new argument
This commit is contained in:
@@ -680,7 +680,9 @@ size_t llama_memory_recurrent::size_s_bytes() const {
|
||||
return size_s_bytes;
|
||||
}
|
||||
|
||||
void llama_memory_recurrent::state_write(llama_io_write_i & io, llama_seq_id seq_id) const {
|
||||
void llama_memory_recurrent::state_write(llama_io_write_i & io, llama_seq_id seq_id, llama_state_seq_flags flags) const {
|
||||
GGML_UNUSED(flags);
|
||||
|
||||
std::vector<std::pair<uint32_t, uint32_t>> cell_ranges; // ranges, from inclusive, to exclusive
|
||||
uint32_t cell_count = 0;
|
||||
|
||||
@@ -718,7 +720,9 @@ void llama_memory_recurrent::state_write(llama_io_write_i & io, llama_seq_id seq
|
||||
state_write_data(io, cell_ranges);
|
||||
}
|
||||
|
||||
void llama_memory_recurrent::state_read(llama_io_read_i & io, llama_seq_id seq_id) {
|
||||
void llama_memory_recurrent::state_read(llama_io_read_i & io, llama_seq_id seq_id, llama_state_seq_flags flags) {
|
||||
GGML_UNUSED(flags);
|
||||
|
||||
uint32_t cell_count;
|
||||
io.read_to(&cell_count, sizeof(cell_count));
|
||||
|
||||
|
||||
Reference in New Issue
Block a user