diff --git a/tools/server/server.cpp b/tools/server/server.cpp index cb49254a1c..92d30664e4 100644 --- a/tools/server/server.cpp +++ b/tools/server/server.cpp @@ -3796,7 +3796,7 @@ struct server_context { // when the prompt prefix does not match, print the tokens around the mismatch // this is useful for debugging prompt caching - { + if (slots_debug) { const int np0 = std::max(n_past - 4, 0); const int np1 = std::min(n_past + 6, std::min(slot.prompt.tokens.size(), slot.task->tokens.size()));