From 93373cc54e625293d8faefe97392121e658574ec Mon Sep 17 00:00:00 2001 From: Georgi Gerganov Date: Thu, 30 Oct 2025 20:39:26 +0200 Subject: [PATCH] cont : update todos [no ci] --- tools/server/server.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tools/server/server.cpp b/tools/server/server.cpp index 763fead257..c9d26a1ea8 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -2699,7 +2699,7 @@ struct server_context { // return true if at least one slot has been purged // TODO: improve logic - // - smarter decision which slot to purge + // - smarter decision which slot to purge (LRU or longest prompt?) // - move slot to level 2 cache instead of removing? // - instead of purging, try to store and resume later? bool try_purge_idle_slots() { @@ -4159,7 +4159,7 @@ struct server_context { std::string err; if (n_batch == 1 && ret == 1) { - // TODO: try to terminate only the largest active slot and continue + // TODO: try to terminate only the largest active slot/sequence and continue with the rest // need to remove the tokens from the current batch too err = "Context size has been exceeded."; }