mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	Forward decl minja::chat_template to avoid eager json dep
This commit is contained in:
		| @@ -12,6 +12,7 @@ | |||||||
| #include "json.hpp" | #include "json.hpp" | ||||||
| #include "json-schema-to-grammar.h" | #include "json-schema-to-grammar.h" | ||||||
| #include "llama.h" | #include "llama.h" | ||||||
|  | #include "chat-template.hpp" | ||||||
|  |  | ||||||
| #include <algorithm> | #include <algorithm> | ||||||
| #include <cinttypes> | #include <cinttypes> | ||||||
| @@ -1827,11 +1828,18 @@ llama_chat_templates llama_chat_templates_from_model(const struct llama_model * | |||||||
|     auto eos_token = common_token_to_piece(vocab, llama_vocab_eos(vocab), true); |     auto eos_token = common_token_to_piece(vocab, llama_vocab_eos(vocab), true); | ||||||
|     std::string default_template_src = chat_template_override; |     std::string default_template_src = chat_template_override; | ||||||
|     std::string tool_use_template_src = chat_template_override; |     std::string tool_use_template_src = chat_template_override; | ||||||
|  |     bool has_explicit_template = !chat_template_override.empty(); | ||||||
|     if (chat_template_override.empty()) { |     if (chat_template_override.empty()) { | ||||||
|         auto str = llama_model_chat_template(model, /* name */ nullptr); |         auto str = llama_model_chat_template(model, /* name */ nullptr); | ||||||
|         if (str) default_template_src = str; |         if (str) { | ||||||
|  |             default_template_src = str; | ||||||
|  |             has_explicit_template = true; | ||||||
|  |         } | ||||||
|         str = llama_model_chat_template(model, /* name */ "tool_use"); |         str = llama_model_chat_template(model, /* name */ "tool_use"); | ||||||
|         if (str) tool_use_template_src = str; |         if (str) { | ||||||
|  |             tool_use_template_src = str; | ||||||
|  |             has_explicit_template = true; | ||||||
|  |         } | ||||||
|     } |     } | ||||||
|     if (default_template_src.empty() || default_template_src == "chatml") { |     if (default_template_src.empty() || default_template_src == "chatml") { | ||||||
|         if (!tool_use_template_src.empty()) { |         if (!tool_use_template_src.empty()) { | ||||||
| @@ -1848,9 +1856,11 @@ llama_chat_templates llama_chat_templates_from_model(const struct llama_model * | |||||||
|         } |         } | ||||||
|     } |     } | ||||||
|     return { |     return { | ||||||
|         /* .default_template = */  { default_template_src, bos_token, eos_token }, |         has_explicit_template, | ||||||
|         /* .tool_use_template = */ tool_use_template_src.empty() ? std::nullopt |         std::move(std::make_unique<minja::chat_template>(default_template_src, bos_token, eos_token)), | ||||||
|             : std::optional<minja::chat_template>({ tool_use_template_src, bos_token, eos_token }), |         tool_use_template_src.empty() | ||||||
|  |             ? nullptr | ||||||
|  |             : std::move(std::make_unique<minja::chat_template>(tool_use_template_src, bos_token, eos_token)) | ||||||
|     }; |     }; | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -3,7 +3,6 @@ | |||||||
| #pragma once | #pragma once | ||||||
|  |  | ||||||
| #include "llama-cpp.h" | #include "llama-cpp.h" | ||||||
| #include "chat-template.hpp" |  | ||||||
|  |  | ||||||
| #include <optional> | #include <optional> | ||||||
| #include <string> | #include <string> | ||||||
| @@ -601,8 +600,18 @@ struct common_chat_msg { | |||||||
| // Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid | // Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid | ||||||
| bool common_chat_verify_template(const std::string & tmpl, bool use_jinja); | bool common_chat_verify_template(const std::string & tmpl, bool use_jinja); | ||||||
|  |  | ||||||
|  | namespace minja { | ||||||
|  |     class chat_template; | ||||||
|  | } | ||||||
|  |  | ||||||
| typedef minja::chat_template llama_chat_template; | typedef minja::chat_template llama_chat_template; | ||||||
|  |  | ||||||
|  | struct llama_chat_templates { | ||||||
|  |     bool has_explicit_template; // Model had builtin template or template overridde was specified. | ||||||
|  |     std::unique_ptr<llama_chat_template> default_template; // always set (defaults to chatml) | ||||||
|  |     std::unique_ptr<llama_chat_template> tool_use_template; | ||||||
|  | }; | ||||||
|  |  | ||||||
| // CPP wrapper for llama_chat_apply_template | // CPP wrapper for llama_chat_apply_template | ||||||
| // If the built-in template is not supported, we default to chatml | // If the built-in template is not supported, we default to chatml | ||||||
| // If the custom "tmpl" is not supported, we throw an error | // If the custom "tmpl" is not supported, we throw an error | ||||||
| @@ -624,11 +633,6 @@ std::string common_chat_format_single( | |||||||
| std::string common_chat_format_example( | std::string common_chat_format_example( | ||||||
|     const llama_chat_template & tmpl, bool use_jinja); |     const llama_chat_template & tmpl, bool use_jinja); | ||||||
|  |  | ||||||
| struct llama_chat_templates { |  | ||||||
|     llama_chat_template default_template; |  | ||||||
|     std::optional<llama_chat_template> tool_use_template; |  | ||||||
| }; |  | ||||||
|  |  | ||||||
| llama_chat_templates llama_chat_templates_from_model(const struct llama_model * model, const std::string & chat_template_override); | llama_chat_templates llama_chat_templates_from_model(const struct llama_model * model, const std::string & chat_template_override); | ||||||
|  |  | ||||||
| // | // | ||||||
|   | |||||||
| @@ -4,6 +4,7 @@ | |||||||
| #include "log.h" | #include "log.h" | ||||||
| #include "sampling.h" | #include "sampling.h" | ||||||
| #include "llama.h" | #include "llama.h" | ||||||
|  | #include "chat-template.hpp" | ||||||
|  |  | ||||||
| #include <cstdio> | #include <cstdio> | ||||||
| #include <cstring> | #include <cstring> | ||||||
| @@ -200,7 +201,7 @@ int main(int argc, char ** argv) { | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     // auto enable conversation mode if chat template is available |     // auto enable conversation mode if chat template is available | ||||||
|     const bool has_chat_template = !chat_templates.default_template.source().empty(); |     const bool has_chat_template = chat_templates.has_explicit_template && chat_templates.default_template; | ||||||
|     if (params.conversation_mode == COMMON_CONVERSATION_MODE_AUTO) { |     if (params.conversation_mode == COMMON_CONVERSATION_MODE_AUTO) { | ||||||
|         if (has_chat_template) { |         if (has_chat_template) { | ||||||
|             LOG_INF("%s: chat template is available, enabling conversation mode (disable it with -no-cnv)\n", __func__); |             LOG_INF("%s: chat template is available, enabling conversation mode (disable it with -no-cnv)\n", __func__); | ||||||
| @@ -218,7 +219,7 @@ int main(int argc, char ** argv) { | |||||||
|     // print chat template example in conversation mode |     // print chat template example in conversation mode | ||||||
|     if (params.conversation_mode) { |     if (params.conversation_mode) { | ||||||
|         if (params.enable_chat_template) { |         if (params.enable_chat_template) { | ||||||
|             LOG_INF("%s: chat template example:\n%s\n", __func__, common_chat_format_example(chat_templates.default_template, params.use_jinja).c_str()); |             LOG_INF("%s: chat template example:\n%s\n", __func__, common_chat_format_example(*chat_templates.default_template, params.use_jinja).c_str()); | ||||||
|         } else { |         } else { | ||||||
|             LOG_INF("%s: in-suffix/prefix is specified, chat template will be disabled\n", __func__); |             LOG_INF("%s: in-suffix/prefix is specified, chat template will be disabled\n", __func__); | ||||||
|         } |         } | ||||||
| @@ -264,7 +265,7 @@ int main(int argc, char ** argv) { | |||||||
|  |  | ||||||
|     auto chat_add_and_format = [&chat_msgs, &chat_templates](const std::string & role, const std::string & content) { |     auto chat_add_and_format = [&chat_msgs, &chat_templates](const std::string & role, const std::string & content) { | ||||||
|         common_chat_msg new_msg{role, content}; |         common_chat_msg new_msg{role, content}; | ||||||
|         auto formatted = common_chat_format_single(chat_templates.default_template, chat_msgs, new_msg, role == "user", g_params->use_jinja); |         auto formatted = common_chat_format_single(*chat_templates.default_template, chat_msgs, new_msg, role == "user", g_params->use_jinja); | ||||||
|         chat_msgs.push_back({role, content}); |         chat_msgs.push_back({role, content}); | ||||||
|         LOG_DBG("formatted: '%s'\n", formatted.c_str()); |         LOG_DBG("formatted: '%s'\n", formatted.c_str()); | ||||||
|         return formatted; |         return formatted; | ||||||
|   | |||||||
| @@ -26,6 +26,7 @@ | |||||||
| #include "common.h" | #include "common.h" | ||||||
| #include "json.hpp" | #include "json.hpp" | ||||||
| #include "llama-cpp.h" | #include "llama-cpp.h" | ||||||
|  | #include "chat-template.hpp" | ||||||
|  |  | ||||||
| #if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) || defined(_WIN32) | #if defined(__unix__) || (defined(__APPLE__) && defined(__MACH__)) || defined(_WIN32) | ||||||
| [[noreturn]] static void sigint_handler(int) { | [[noreturn]] static void sigint_handler(int) { | ||||||
| @@ -936,6 +937,7 @@ static int chat_loop(LlamaData & llama_data, const std::string & user, bool use_ | |||||||
|     int prev_len = 0; |     int prev_len = 0; | ||||||
|     llama_data.fmtted.resize(llama_n_ctx(llama_data.context.get())); |     llama_data.fmtted.resize(llama_n_ctx(llama_data.context.get())); | ||||||
|     auto chat_templates = llama_chat_templates_from_model(llama_data.model.get(), ""); |     auto chat_templates = llama_chat_templates_from_model(llama_data.model.get(), ""); | ||||||
|  |     GGML_ASSERT(chat_templates.default_template); | ||||||
|     static const bool stdout_a_terminal = is_stdout_a_terminal(); |     static const bool stdout_a_terminal = is_stdout_a_terminal(); | ||||||
|     while (true) { |     while (true) { | ||||||
|         // Get user input |         // Get user input | ||||||
| @@ -946,7 +948,7 @@ static int chat_loop(LlamaData & llama_data, const std::string & user, bool use_ | |||||||
|  |  | ||||||
|         add_message("user", user.empty() ? user_input : user, llama_data); |         add_message("user", user.empty() ? user_input : user, llama_data); | ||||||
|         int new_len; |         int new_len; | ||||||
|         if (apply_chat_template_with_error_handling(chat_templates.default_template, llama_data, true, new_len, use_jinja) < 0) { |         if (apply_chat_template_with_error_handling(*chat_templates.default_template, llama_data, true, new_len, use_jinja) < 0) { | ||||||
|             return 1; |             return 1; | ||||||
|         } |         } | ||||||
|  |  | ||||||
| @@ -961,7 +963,7 @@ static int chat_loop(LlamaData & llama_data, const std::string & user, bool use_ | |||||||
|         } |         } | ||||||
|  |  | ||||||
|         add_message("assistant", response, llama_data); |         add_message("assistant", response, llama_data); | ||||||
|         if (apply_chat_template_with_error_handling(chat_templates.default_template, llama_data, false, prev_len, use_jinja) < 0) { |         if (apply_chat_template_with_error_handling(*chat_templates.default_template, llama_data, false, prev_len, use_jinja) < 0) { | ||||||
|             return 1; |             return 1; | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
|   | |||||||
| @@ -1745,8 +1745,9 @@ struct server_context { | |||||||
|  |  | ||||||
|         if (use_jinja) { |         if (use_jinja) { | ||||||
|             auto templates = llama_chat_templates_from_model(model, ""); |             auto templates = llama_chat_templates_from_model(model, ""); | ||||||
|  |             GGML_ASSERT(templates.default_template); | ||||||
|             try { |             try { | ||||||
|                 templates.default_template.apply({{ |                 templates.default_template->apply({{ | ||||||
|                     {"role", "user"}, |                     {"role", "user"}, | ||||||
|                     {"content", "test"}, |                     {"content", "test"}, | ||||||
|                 }}, json(), true); |                 }}, json(), true); | ||||||
| @@ -3630,6 +3631,7 @@ int main(int argc, char ** argv) { | |||||||
|         std::lock_guard<std::mutex> lock(chat_templates_mutex); |         std::lock_guard<std::mutex> lock(chat_templates_mutex); | ||||||
|         if (!chat_templates) { |         if (!chat_templates) { | ||||||
|             chat_templates = llama_chat_templates_from_model(ctx_server.model, ctx_server.params_base.chat_template); |             chat_templates = llama_chat_templates_from_model(ctx_server.model, ctx_server.params_base.chat_template); | ||||||
|  |             GGML_ASSERT(chat_templates->default_template); | ||||||
|         } |         } | ||||||
|         return *chat_templates; |         return *chat_templates; | ||||||
|     }; |     }; | ||||||
| @@ -3641,7 +3643,7 @@ int main(int argc, char ** argv) { | |||||||
|             { "default_generation_settings", ctx_server.default_generation_settings_for_props }, |             { "default_generation_settings", ctx_server.default_generation_settings_for_props }, | ||||||
|             { "total_slots",                 ctx_server.params_base.n_parallel }, |             { "total_slots",                 ctx_server.params_base.n_parallel }, | ||||||
|             { "model_path",                  ctx_server.params_base.model }, |             { "model_path",                  ctx_server.params_base.model }, | ||||||
|             { "chat_template",               templates.default_template.source() }, |             { "chat_template",               templates.default_template->source() }, | ||||||
|             { "build_info",                  build_info }, |             { "build_info",                  build_info }, | ||||||
|         }; |         }; | ||||||
|         if (ctx_server.params_base.use_jinja && templates.tool_use_template) { |         if (ctx_server.params_base.use_jinja && templates.tool_use_template) { | ||||||
| @@ -3868,7 +3870,7 @@ int main(int argc, char ** argv) { | |||||||
|  |  | ||||||
|         auto body = json::parse(req.body); |         auto body = json::parse(req.body); | ||||||
|         const auto & templates = get_chat_templates(); |         const auto & templates = get_chat_templates(); | ||||||
|         const auto & chat_template = body.contains("tools") && templates.tool_use_template ? *templates.tool_use_template : templates.default_template; |         const auto & chat_template = body.contains("tools") && templates.tool_use_template ? *templates.tool_use_template : *templates.default_template; | ||||||
|         json data = oaicompat_completion_params_parse(body, chat_template, params.use_jinja); |         json data = oaicompat_completion_params_parse(body, chat_template, params.use_jinja); | ||||||
|  |  | ||||||
|         return handle_completions_impl( |         return handle_completions_impl( | ||||||
| @@ -4287,8 +4289,8 @@ int main(int argc, char ** argv) { | |||||||
|  |  | ||||||
|     // print sample chat example to make it clear which template is used |     // print sample chat example to make it clear which template is used | ||||||
|     LOG_INF("%s: chat template, chat_template: %s, example_format: '%s'\n", __func__, |     LOG_INF("%s: chat template, chat_template: %s, example_format: '%s'\n", __func__, | ||||||
|         get_chat_templates().default_template.source().c_str(), |         get_chat_templates().default_template->source().c_str(), | ||||||
|         common_chat_format_example(get_chat_templates().default_template, ctx_server.params_base.use_jinja).c_str()); |         common_chat_format_example(*get_chat_templates().default_template, ctx_server.params_base.use_jinja).c_str()); | ||||||
|  |  | ||||||
|     ctx_server.queue_tasks.on_new_task(std::bind( |     ctx_server.queue_tasks.on_new_task(std::bind( | ||||||
|                 &server_context::process_single_task, &ctx_server, std::placeholders::_1)); |                 &server_context::process_single_task, &ctx_server, std::placeholders::_1)); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 ochafik
					ochafik