mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-04 09:32:00 +00:00
server : context checkpointing for hybrid and recurrent models (#16382)
* initial commit for branch 3 * generalize `swa_checkpoint` to `ctx_checkpoint` this extends `llama-server`'s SWA checkpointing logic to include hybrid/recurrent models such as Jamba, Granite * oops * disable debug prints * keep backwards compat with `--swa-checkpoints` Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * update prompt re-processing message * fix off-by-one error per GG * keep `seq_rm` log per GG Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * server : fix checkpoint logic to support recurrent caches * server : cleanup and fixes --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
@@ -220,7 +220,7 @@ bool llama_kv_cache_iswa::get_can_shift() const {
|
||||
}
|
||||
|
||||
void llama_kv_cache_iswa::state_write(llama_io_write_i & io, llama_seq_id seq_id, llama_state_seq_flags flags) const {
|
||||
if ((flags & LLAMA_STATE_SEQ_FLAGS_SWA_ONLY) == 0) {
|
||||
if ((flags & LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY) == 0) {
|
||||
kv_base->state_write(io, seq_id, flags);
|
||||
}
|
||||
|
||||
@@ -228,7 +228,7 @@ void llama_kv_cache_iswa::state_write(llama_io_write_i & io, llama_seq_id seq_id
|
||||
}
|
||||
|
||||
void llama_kv_cache_iswa::state_read(llama_io_read_i & io, llama_seq_id seq_id, llama_state_seq_flags flags) {
|
||||
if ((flags & LLAMA_STATE_SEQ_FLAGS_SWA_ONLY) == 0) {
|
||||
if ((flags & LLAMA_STATE_SEQ_FLAGS_PARTIAL_ONLY) == 0) {
|
||||
kv_base->state_read(io, seq_id, flags);
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user