mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-27 08:21:30 +00:00
server : host-memory prompt caching (#16391)
* minor : code style * server : fix prompt similarity calculation * server : initial host-memory prompt caching * cont * server : refactor * cont * cont : make the server task of the slot const * cont : minor [no ci] * server : cache prompts and checkpoints only for completion tasks * server : improve prompt caching logic * cont : fix check for number of cached prompts [no ci] * server : improve caching logic, add -cram CLI arg * server : print prompt mismatch info * cont : better naming [no ci] * server : improve prompt cache loading logic * server : add option to debug the slot contents (#16482) * server : add option to debug the slot contents * Update tools/server/server.cpp --------- Co-authored-by: Xuan-Son Nguyen <son@huggingface.co> * server : add option to disable prompt cache --------- Co-authored-by: Xuan-Son Nguyen <son@huggingface.co>
This commit is contained in:
@@ -33,8 +33,8 @@ struct common_chat_msg_content_part {
|
||||
struct common_chat_msg {
|
||||
std::string role;
|
||||
std::string content;
|
||||
std::vector<common_chat_msg_content_part> content_parts = {};
|
||||
std::vector<common_chat_tool_call> tool_calls = {};
|
||||
std::vector<common_chat_msg_content_part> content_parts;
|
||||
std::vector<common_chat_tool_call> tool_calls;
|
||||
std::string reasoning_content;
|
||||
std::string tool_name;
|
||||
std::string tool_call_id;
|
||||
@@ -44,7 +44,7 @@ struct common_chat_msg {
|
||||
bool empty() const {
|
||||
return content.empty() && content_parts.empty() && tool_calls.empty() && reasoning_content.empty() && tool_name.empty() && tool_call_id.empty();
|
||||
}
|
||||
void ensure_tool_call_ids_set(std::vector<std::string> & ids_cache, const std::function<std::string()> & gen_tool_call_id) {
|
||||
void set_tool_call_ids(std::vector<std::string> & ids_cache, const std::function<std::string()> & gen_tool_call_id) {
|
||||
for (auto i = 0u; i < tool_calls.size(); i++) {
|
||||
if (ids_cache.size() <= i) {
|
||||
auto id = tool_calls[i].id;
|
||||
|
||||
Reference in New Issue
Block a user