mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-08 10:07:01 +00:00
server : context checkpointing for hybrid and recurrent models (#16382)
* initial commit for branch 3 * generalize `swa_checkpoint` to `ctx_checkpoint` this extends `llama-server`'s SWA checkpointing logic to include hybrid/recurrent models such as Jamba, Granite * oops * disable debug prints * keep backwards compat with `--swa-checkpoints` Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * update prompt re-processing message * fix off-by-one error per GG * keep `seq_rm` log per GG Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * server : fix checkpoint logic to support recurrent caches * server : cleanup and fixes --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
@@ -136,6 +136,7 @@ void llama_memory_recurrent::clear(bool data) {
|
||||
}
|
||||
|
||||
bool llama_memory_recurrent::seq_rm(llama_seq_id seq_id, llama_pos p0, llama_pos p1) {
|
||||
//printf("[DEBUG] calling llama_memory_recurrent::seq_rm` with `seq_id=%d, p0=%d, p1=%d`\n", seq_id, p0, p1);
|
||||
uint32_t new_head = size;
|
||||
|
||||
if (p0 < 0) {
|
||||
@@ -156,7 +157,8 @@ bool llama_memory_recurrent::seq_rm(llama_seq_id seq_id, llama_pos p0, llama_pos
|
||||
if (tail_id >= 0) {
|
||||
const auto & cell = cells[tail_id];
|
||||
// partial intersection is invalid
|
||||
if ((0 < p0 && p0 <= cell.pos) || (0 < p1 && p1 <= cell.pos)) {
|
||||
if ((0 < p0 && p0 < cell.pos) || (0 < p1 && p1 <= cell.pos)) {
|
||||
//printf("[DEBUG] inside `llama_memory_recurrent::seq_rm`: partial intersection is invalid, so returning false\n");
|
||||
return false;
|
||||
}
|
||||
// invalidate tails which will be cleared
|
||||
@@ -167,6 +169,7 @@ bool llama_memory_recurrent::seq_rm(llama_seq_id seq_id, llama_pos p0, llama_pos
|
||||
} else {
|
||||
// seq_id is negative, then the range should include everything or nothing
|
||||
if (p0 != p1 && (p0 != 0 || p1 != std::numeric_limits<llama_pos>::max())) {
|
||||
//printf("[DEBUG] inside `llama_memory_recurrent::seq_rm`: `seq_id` is negative, so returning false\n");
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user