Tool call support (generic + native for Llama, Functionary, Hermes, Mistral, Firefunction, DeepSeek) w/ lazy grammars (#9639)

--------- Co-authored-by: Xuan Son Nguyen <thichthat@gmail.com> Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: Xuan Son Nguyen <son@huggingface.co>
2025-10-31 08:51:55 +00:00 · 2025-01-30 19:13:58 +00:00
parent 27d135c970
commit 8b576b6c55
48 changed files with 3861 additions and 156 deletions
--- a/common/common.cpp
+++ b/common/common.cpp
@@ -12,6 +12,7 @@
 #include "json.hpp"
 #include "json-schema-to-grammar.h"
 #include "llama.h"
+#include "chat.hpp"
 #include "chat-template.hpp"

 #include <algorithm>
@@ -1774,11 +1775,13 @@ std::string common_detokenize(const struct llama_vocab * vocab, const std::vecto
 bool common_chat_verify_template(const std::string & tmpl, bool use_jinja) {
    if (use_jinja) {
        try {
-            auto chat_template = minja::chat_template(tmpl, "<s>", "</s>");
-            chat_template.apply({{
+            auto chat_template = common_chat_template(tmpl, "<s>", "</s>");
+            common_chat_inputs inputs;
+            inputs.messages = json::array({{
                {"role", "user"},
                {"content", "test"},
-            }}, json(), true);
+            }});
+            common_chat_params_init(chat_template, inputs);
            return true;
        } catch (const std::exception & e) {
            LOG_ERR("%s: failed to apply template: %s\n", __func__, e.what());
@@ -1800,7 +1803,10 @@ std::string common_chat_apply_template(
        for (const auto & msg : msgs) {
            messages.push_back({{"role", msg.role}, {"content", msg.content}});
        }
-        return tmpl.apply(messages, /* tools= */ json(), add_ass);
+        common_chat_inputs inputs;
+        inputs.messages = messages;
+        inputs.add_generation_prompt = add_ass;
+        return common_chat_params_init(tmpl, inputs).prompt;
    }

    int alloc_size = 0;
@@ -1855,10 +1861,10 @@ std::string common_chat_format_single(

 std::string common_chat_format_example(const common_chat_template & tmpl, bool use_jinja) {
    std::vector<common_chat_msg> msgs = {
-        {"system",    "You are a helpful assistant"},
-        {"user",      "Hello"},
-        {"assistant", "Hi there"},
-        {"user",      "How are you?"},
+        {"system",    "You are a helpful assistant", {}},
+        {"user",      "Hello", {}},
+        {"assistant", "Hi there", {}},
+        {"user",      "How are you?", {}},
    };
    return common_chat_apply_template(tmpl, msgs, true, use_jinja);
 }