mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-04 09:32:00 +00:00
cont : update todos [no ci]
This commit is contained in:
@@ -2699,7 +2699,7 @@ struct server_context {
|
|||||||
|
|
||||||
// return true if at least one slot has been purged
|
// return true if at least one slot has been purged
|
||||||
// TODO: improve logic
|
// TODO: improve logic
|
||||||
// - smarter decision which slot to purge
|
// - smarter decision which slot to purge (LRU or longest prompt?)
|
||||||
// - move slot to level 2 cache instead of removing?
|
// - move slot to level 2 cache instead of removing?
|
||||||
// - instead of purging, try to store and resume later?
|
// - instead of purging, try to store and resume later?
|
||||||
bool try_purge_idle_slots() {
|
bool try_purge_idle_slots() {
|
||||||
@@ -4159,7 +4159,7 @@ struct server_context {
|
|||||||
std::string err;
|
std::string err;
|
||||||
|
|
||||||
if (n_batch == 1 && ret == 1) {
|
if (n_batch == 1 && ret == 1) {
|
||||||
// TODO: try to terminate only the largest active slot and continue
|
// TODO: try to terminate only the largest active slot/sequence and continue with the rest
|
||||||
// need to remove the tokens from the current batch too
|
// need to remove the tokens from the current batch too
|
||||||
err = "Context size has been exceeded.";
|
err = "Context size has been exceeded.";
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user