mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-28 08:31:25 +00:00 
			
		
		
		
	 88021565f0
			
		
	
	88021565f0
	
	
	
		
			
			* Add DeepSeek V3.1 thinking mode support
- Added COMMON_CHAT_FORMAT_DEEPSEEK_V3_1 enum value
- Created common_chat_params_init_deepseek_v3_1() function (currently uses R1 implementation)
- Created common_chat_parse_deepseek_v3_1() function that handles V3.1 thinking format:
  - Extracts reasoning content before '</think>' tag into reasoning_content
  - Extracts regular content after '</think>' tag into content
  - No opening '<think>' tag in V3.1 format
- Added detection logic for V3.1 templates based on pattern: 'message['prefix'] is defined and message['prefix'] and thinking'
- Added V3.1 case to parsing switch statement
This addresses the issue where V3.1 outputs reasoning content followed by '</think>' and then regular content without the opening '<think>' tag.
* Another attempt by V3.1 non-thinking
* Fix test, but it's not asserting anything.
* Ignore vim swap files in tests dir
* Update the test
* Try using try_find_literal instead of regex
* passing test
* Revert "Try using try_find_literal instead of regex"
This reverts commit c50d887ec2.
* Remove unnecessary change
* Remove comment
* Add code to handle non-thinking mode.
* Try to set message['prefix'] when thinking is enabled.
* This fixes reasoning, but breaks normal content. We need state in the
chat parser.
* DeepSeek V3.1 thinking is now the default. Disable with `--reasoning-budget 0`.
* Simplify (DeepSeek V3.1 reasoning)
* Fix sign inversion bug
* Add some tool calling code (not working).
* Tool calls working in non-reasoning mode.
* Attempt a unit test for tool call parsing.
* Passing test
* Add tests for both happy path and broken fenced DeepSeek V3.1 tool call variants.
* Passing DeepSeek V3.1 tool call tests, but model is not working.
* Revert assistance response prefill change. Not my monkeys.
* Add fenced_thinking unit test variant. Passes, but thinking tool calling
still isn't working for some reason.
* Tests pass in reasoning mode. Also e2e tool test passes.
* Make a copy of the parse_json_tool_calls function for deepseek-v3.1 so
as to not accidentally introduce regressions.
* Fix thinking_forced_open logic. tool calling broken. Need to add another
test case.
* That's what I get for cargo culting a newline.
* Add multi tool call test for deepseek v3.1 non-reasoning
* Move test, remove .gitignore change
* Place deepseek-v3.1 reasoning test directly into existing reasoning
function per CISC's request.
* Address whitespace CI failure.
* Merge two assert_equals per CISC's request.
* Add DeepSeek-V3.1 tests to tests/test-chat.cpp per CISC's request.
* Merge deepseek V3.1 and regular parse_json_tool_calls() function
behaviors by adding optional update_cursor argument.
* Update tests/test-chat-parser.cpp
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
* Update tests/test-chat-parser.cpp
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
* Update tests/test-chat-parser.cpp
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
* Update tests/test-chat-parser.cpp
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
* Update tests/test-chat-parser.cpp
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
* Update tests/test-chat-parser.cpp
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
* Update tests/test-chat-parser.cpp
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
* Update tests/test-chat-parser.cpp
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
* Update tests/test-chat-parser.cpp
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
* DeepSeek V3.1 fix reasoning_format none
* Strip grammar down to strictly what we expect based on model card. Throw
out parts we cargo culted from R1 that don't make sense.
* Update tests/test-chat-parser.cpp
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
* DeepSeek V3.1 - Add edge case where thinking is forced open, there is
tool calling in the reasoning content, but then the model just stops the
output without closing the </think> tag, so it's not a partial. In this
case, use the tool call in the reasoning content.
* DeepSeek V3.1 - simplify update_cursor
* Update common/chat.cpp
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
* Update common/chat.cpp
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
* Update common/chat.cpp
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
* Fix indent
---------
Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
		
	
		
			
				
	
	
		
			216 lines
		
	
	
		
			8.5 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			216 lines
		
	
	
		
			8.5 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| // Chat support (incl. tool call grammar constraining & output parsing) w/ generic & custom template handlers.
 | |
| 
 | |
| #pragma once
 | |
| 
 | |
| #include "common.h"
 | |
| #include <functional>
 | |
| #include <chrono>
 | |
| #include <string>
 | |
| #include <vector>
 | |
| #include <map>
 | |
| 
 | |
| struct common_chat_templates;
 | |
| 
 | |
| struct common_chat_tool_call {
 | |
|     std::string name;
 | |
|     std::string arguments;
 | |
|     std::string id;
 | |
| 
 | |
|     bool operator==(const common_chat_tool_call & other) const {
 | |
|         return name == other.name && arguments == other.arguments && id == other.id;
 | |
|     }
 | |
| };
 | |
| 
 | |
| struct common_chat_msg_content_part {
 | |
|     std::string type;
 | |
|     std::string text;
 | |
| 
 | |
|     bool operator==(const common_chat_msg_content_part & other) const {
 | |
|         return type == other.type && text == other.text;
 | |
|     }
 | |
| };
 | |
| 
 | |
| struct common_chat_msg {
 | |
|     std::string role;
 | |
|     std::string content;
 | |
|     std::vector<common_chat_msg_content_part> content_parts = {};
 | |
|     std::vector<common_chat_tool_call> tool_calls = {};
 | |
|     std::string reasoning_content;
 | |
|     std::string tool_name;
 | |
|     std::string tool_call_id;
 | |
| 
 | |
|     template <class T> T to_json_oaicompat() const;
 | |
| 
 | |
|     bool empty() const {
 | |
|         return content.empty() && content_parts.empty() && tool_calls.empty() && reasoning_content.empty() && tool_name.empty() && tool_call_id.empty();
 | |
|     }
 | |
|     void ensure_tool_call_ids_set(std::vector<std::string> & ids_cache, const std::function<std::string()> & gen_tool_call_id) {
 | |
|         for (auto i = 0u; i < tool_calls.size(); i++) {
 | |
|             if (ids_cache.size() <= i) {
 | |
|                 auto id = tool_calls[i].id;
 | |
|                 if (id.empty()) {
 | |
|                     id = gen_tool_call_id();
 | |
|                 }
 | |
|                 ids_cache.push_back(id);
 | |
|             }
 | |
|             tool_calls[i].id = ids_cache[i];
 | |
|         }
 | |
|     }
 | |
|     bool operator==(const common_chat_msg & other) const {
 | |
|         return role == other.role
 | |
|             && content == other.content
 | |
|             && content_parts == other.content_parts
 | |
|             && tool_calls == other.tool_calls
 | |
|             && reasoning_content == other.reasoning_content
 | |
|             && tool_name == other.tool_name
 | |
|             && tool_call_id == other.tool_call_id;
 | |
|     }
 | |
|     bool operator!=(const common_chat_msg & other) const {
 | |
|         return !(*this == other);
 | |
|     }
 | |
| };
 | |
| 
 | |
| struct common_chat_msg_diff {
 | |
|     std::string reasoning_content_delta;
 | |
|     std::string content_delta;
 | |
|     size_t tool_call_index = std::string::npos;
 | |
|     common_chat_tool_call tool_call_delta;
 | |
| 
 | |
|     static std::vector<common_chat_msg_diff> compute_diffs(const common_chat_msg & previous_msg, const common_chat_msg & new_msg);
 | |
| 
 | |
|     bool operator==(const common_chat_msg_diff & other) const {
 | |
|         return content_delta == other.content_delta
 | |
|         && tool_call_index == other.tool_call_index
 | |
|         && tool_call_delta == other.tool_call_delta;
 | |
|     }
 | |
| };
 | |
| 
 | |
| struct common_chat_tool {
 | |
|     std::string name;
 | |
|     std::string description;
 | |
|     std::string parameters;
 | |
| };
 | |
| 
 | |
| enum common_chat_tool_choice {
 | |
|     COMMON_CHAT_TOOL_CHOICE_AUTO,
 | |
|     COMMON_CHAT_TOOL_CHOICE_REQUIRED,
 | |
|     COMMON_CHAT_TOOL_CHOICE_NONE,
 | |
| };
 | |
| 
 | |
| enum common_chat_format {
 | |
|     COMMON_CHAT_FORMAT_CONTENT_ONLY,
 | |
|     COMMON_CHAT_FORMAT_GENERIC,
 | |
|     COMMON_CHAT_FORMAT_MISTRAL_NEMO,
 | |
|     COMMON_CHAT_FORMAT_LLAMA_3_X,
 | |
|     COMMON_CHAT_FORMAT_LLAMA_3_X_WITH_BUILTIN_TOOLS,
 | |
|     COMMON_CHAT_FORMAT_DEEPSEEK_R1,
 | |
|     COMMON_CHAT_FORMAT_FIREFUNCTION_V2,
 | |
|     COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2,
 | |
|     COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1,
 | |
|     COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
 | |
|     COMMON_CHAT_FORMAT_HERMES_2_PRO,
 | |
|     COMMON_CHAT_FORMAT_COMMAND_R7B,
 | |
|     COMMON_CHAT_FORMAT_GRANITE,
 | |
|     COMMON_CHAT_FORMAT_GPT_OSS,
 | |
|     COMMON_CHAT_FORMAT_SEED_OSS,
 | |
|     COMMON_CHAT_FORMAT_NEMOTRON_V2,
 | |
| 
 | |
|     COMMON_CHAT_FORMAT_COUNT, // Not a format, just the # formats
 | |
| };
 | |
| 
 | |
| struct common_chat_templates_inputs {
 | |
|     std::vector<common_chat_msg> messages;
 | |
|     std::string grammar;
 | |
|     std::string json_schema;
 | |
|     bool add_generation_prompt = true;
 | |
|     bool use_jinja = true;
 | |
|     // Parameters below only supported when use_jinja is true
 | |
|     std::vector<common_chat_tool> tools;
 | |
|     common_chat_tool_choice tool_choice = COMMON_CHAT_TOOL_CHOICE_AUTO;
 | |
|     bool parallel_tool_calls = false;
 | |
|     common_reasoning_format reasoning_format = COMMON_REASONING_FORMAT_NONE;
 | |
|     bool enable_thinking = true;
 | |
|     std::chrono::system_clock::time_point now = std::chrono::system_clock::now();
 | |
|     std::map<std::string, std::string> chat_template_kwargs;
 | |
|     bool add_bos = false;
 | |
|     bool add_eos = false;
 | |
| };
 | |
| 
 | |
| struct common_chat_params {
 | |
|     common_chat_format                  format = COMMON_CHAT_FORMAT_CONTENT_ONLY;
 | |
|     std::string                         prompt;
 | |
|     std::string                         grammar;
 | |
|     bool                                grammar_lazy = false;
 | |
|     bool                                thinking_forced_open = false;
 | |
|     std::vector<common_grammar_trigger> grammar_triggers;
 | |
|     std::vector<std::string>            preserved_tokens;
 | |
|     std::vector<std::string>            additional_stops;
 | |
| };
 | |
| 
 | |
| struct common_chat_syntax {
 | |
|     common_chat_format       format                = COMMON_CHAT_FORMAT_CONTENT_ONLY;
 | |
|     common_reasoning_format  reasoning_format      = COMMON_REASONING_FORMAT_NONE;
 | |
|     // Whether reasoning_content should be inlined in the content (e.g. for reasoning_format=deepseek in stream mode)
 | |
|     bool                     reasoning_in_content  = false;
 | |
|     bool                     thinking_forced_open  = false;
 | |
|     bool                     parse_tool_calls      = true;
 | |
| };
 | |
| 
 | |
| // Check if the template supplied via "--chat-template" is supported or not. Returns true if it's valid
 | |
| bool common_chat_verify_template(const std::string & tmpl, bool use_jinja);
 | |
| 
 | |
| void common_chat_templates_free(struct common_chat_templates * tmpls);
 | |
| 
 | |
| struct common_chat_templates_deleter { void operator()(common_chat_templates * tmpls) { common_chat_templates_free(tmpls); } };
 | |
| 
 | |
| typedef std::unique_ptr<struct common_chat_templates, common_chat_templates_deleter> common_chat_templates_ptr;
 | |
| 
 | |
| common_chat_templates_ptr common_chat_templates_init(
 | |
|                                     const struct llama_model * model,
 | |
|                                            const std::string & chat_template_override,
 | |
|                                            const std::string & bos_token_override = "",
 | |
|                                            const std::string & eos_token_override = "");
 | |
| 
 | |
| bool         common_chat_templates_was_explicit(const struct common_chat_templates * tmpls);
 | |
| const char * common_chat_templates_source(const struct common_chat_templates * tmpls, const char * variant = nullptr);
 | |
| 
 | |
| 
 | |
| struct common_chat_params      common_chat_templates_apply(
 | |
|     const struct common_chat_templates * tmpls,
 | |
|     const struct common_chat_templates_inputs & inputs);
 | |
| 
 | |
| // Format single message, while taking into account the position of that message in chat history
 | |
| std::string common_chat_format_single(
 | |
|         const struct common_chat_templates * tmpls,
 | |
|         const std::vector<common_chat_msg> & past_msg,
 | |
|         const common_chat_msg & new_msg,
 | |
|         bool add_ass,
 | |
|         bool use_jinja);
 | |
| 
 | |
| // Returns an example of formatted chat
 | |
| std::string common_chat_format_example(
 | |
|     const struct common_chat_templates * tmpls,
 | |
|     bool use_jinja,
 | |
|     const std::map<std::string, std::string> & chat_template_kwargs);
 | |
| 
 | |
| const char*               common_chat_format_name(common_chat_format format);
 | |
| const char*               common_reasoning_format_name(common_reasoning_format format);
 | |
| common_reasoning_format   common_reasoning_format_from_name(const std::string & format);
 | |
| common_chat_msg           common_chat_parse(const std::string & input, bool is_partial, const common_chat_syntax & syntax);
 | |
| 
 | |
| common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice);
 | |
| 
 | |
| bool common_chat_templates_support_enable_thinking(const common_chat_templates * chat_templates);
 | |
| 
 | |
| // Parses a JSON array of messages in OpenAI's chat completion API format.
 | |
| // T can be std::string containing JSON or nlohmann::ordered_json
 | |
| template <class T> std::vector<common_chat_msg> common_chat_msgs_parse_oaicompat(const T & messages);
 | |
| template <class T> T common_chat_msgs_to_json_oaicompat(const std::vector<common_chat_msg> & msgs, bool concat_typed_text = false);
 | |
| 
 | |
| // Parses a JSON array of tools in OpenAI's chat completion tool call API format.
 | |
| // T can be std::string containing JSON or nlohmann::ordered_json
 | |
| template <class T> std::vector<common_chat_tool> common_chat_tools_parse_oaicompat(const T & tools);
 | |
| template <class T> T common_chat_tools_to_json_oaicompat(const std::vector<common_chat_tool> & tools);
 | |
| 
 | |
| template <class T> T common_chat_msg_diff_to_json_oaicompat(const common_chat_msg_diff & diff);
 |