mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-28 08:31:25 +00:00
chat : Deepseek V3.1 reasoning and tool calling support (OpenAI Style) (#15533)
* Add DeepSeek V3.1 thinking mode support
- Added COMMON_CHAT_FORMAT_DEEPSEEK_V3_1 enum value
- Created common_chat_params_init_deepseek_v3_1() function (currently uses R1 implementation)
- Created common_chat_parse_deepseek_v3_1() function that handles V3.1 thinking format:
- Extracts reasoning content before '</think>' tag into reasoning_content
- Extracts regular content after '</think>' tag into content
- No opening '<think>' tag in V3.1 format
- Added detection logic for V3.1 templates based on pattern: 'message['prefix'] is defined and message['prefix'] and thinking'
- Added V3.1 case to parsing switch statement
This addresses the issue where V3.1 outputs reasoning content followed by '</think>' and then regular content without the opening '<think>' tag.
* Another attempt by V3.1 non-thinking
* Fix test, but it's not asserting anything.
* Ignore vim swap files in tests dir
* Update the test
* Try using try_find_literal instead of regex
* passing test
* Revert "Try using try_find_literal instead of regex"
This reverts commit c50d887ec2.
* Remove unnecessary change
* Remove comment
* Add code to handle non-thinking mode.
* Try to set message['prefix'] when thinking is enabled.
* This fixes reasoning, but breaks normal content. We need state in the
chat parser.
* DeepSeek V3.1 thinking is now the default. Disable with `--reasoning-budget 0`.
* Simplify (DeepSeek V3.1 reasoning)
* Fix sign inversion bug
* Add some tool calling code (not working).
* Tool calls working in non-reasoning mode.
* Attempt a unit test for tool call parsing.
* Passing test
* Add tests for both happy path and broken fenced DeepSeek V3.1 tool call variants.
* Passing DeepSeek V3.1 tool call tests, but model is not working.
* Revert assistance response prefill change. Not my monkeys.
* Add fenced_thinking unit test variant. Passes, but thinking tool calling
still isn't working for some reason.
* Tests pass in reasoning mode. Also e2e tool test passes.
* Make a copy of the parse_json_tool_calls function for deepseek-v3.1 so
as to not accidentally introduce regressions.
* Fix thinking_forced_open logic. tool calling broken. Need to add another
test case.
* That's what I get for cargo culting a newline.
* Add multi tool call test for deepseek v3.1 non-reasoning
* Move test, remove .gitignore change
* Place deepseek-v3.1 reasoning test directly into existing reasoning
function per CISC's request.
* Address whitespace CI failure.
* Merge two assert_equals per CISC's request.
* Add DeepSeek-V3.1 tests to tests/test-chat.cpp per CISC's request.
* Merge deepseek V3.1 and regular parse_json_tool_calls() function
behaviors by adding optional update_cursor argument.
* Update tests/test-chat-parser.cpp
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
* Update tests/test-chat-parser.cpp
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
* Update tests/test-chat-parser.cpp
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
* Update tests/test-chat-parser.cpp
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
* Update tests/test-chat-parser.cpp
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
* Update tests/test-chat-parser.cpp
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
* Update tests/test-chat-parser.cpp
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
* Update tests/test-chat-parser.cpp
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
* Update tests/test-chat-parser.cpp
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
* DeepSeek V3.1 fix reasoning_format none
* Strip grammar down to strictly what we expect based on model card. Throw
out parts we cargo culted from R1 that don't make sense.
* Update tests/test-chat-parser.cpp
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
* DeepSeek V3.1 - Add edge case where thinking is forced open, there is
tool calling in the reasoning content, but then the model just stops the
output without closing the </think> tag, so it's not a partial. In this
case, use the tool call in the reasoning content.
* DeepSeek V3.1 - simplify update_cursor
* Update common/chat.cpp
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
* Update common/chat.cpp
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
* Update common/chat.cpp
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
* Fix indent
---------
Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
This commit is contained in:
139
common/chat.cpp
139
common/chat.cpp
@@ -631,6 +631,7 @@ const char * common_chat_format_name(common_chat_format format) {
|
||||
case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: return "FireFunction v2";
|
||||
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: return "Functionary v3.2";
|
||||
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return "Functionary v3.1 Llama 3.1";
|
||||
case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1: return "DeepSeek V3.1";
|
||||
case COMMON_CHAT_FORMAT_HERMES_2_PRO: return "Hermes 2 Pro";
|
||||
case COMMON_CHAT_FORMAT_COMMAND_R7B: return "Command R7B";
|
||||
case COMMON_CHAT_FORMAT_GRANITE: return "Granite";
|
||||
@@ -698,11 +699,13 @@ static void parse_json_tool_calls(
|
||||
size_t from = std::string::npos;
|
||||
auto first = true;
|
||||
while (true) {
|
||||
auto start_pos = builder.pos();
|
||||
auto res = function_regex_start_only && first
|
||||
? builder.try_consume_regex(*function_regex_start_only)
|
||||
: function_regex
|
||||
? builder.try_find_regex(*function_regex, from)
|
||||
: std::nullopt;
|
||||
|
||||
if (res) {
|
||||
std::string name;
|
||||
if (get_function_name) {
|
||||
@@ -737,6 +740,8 @@ static void parse_json_tool_calls(
|
||||
return;
|
||||
}
|
||||
throw common_chat_msg_partial_exception("incomplete tool call");
|
||||
} else {
|
||||
builder.move_to(start_pos);
|
||||
}
|
||||
break;
|
||||
}
|
||||
@@ -1388,6 +1393,71 @@ static common_chat_params common_chat_params_init_deepseek_r1(const common_chat_
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
static common_chat_params common_chat_params_init_deepseek_v3_1(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
||||
common_chat_params data;
|
||||
|
||||
// Pass thinking context for DeepSeek V3.1 template
|
||||
json additional_context = {
|
||||
{"thinking", inputs.enable_thinking},
|
||||
};
|
||||
|
||||
auto prompt = apply(tmpl, inputs,
|
||||
/* messages_override= */ inputs.messages,
|
||||
/* tools_override= */ std::nullopt,
|
||||
additional_context);
|
||||
data.prompt = prompt;
|
||||
data.format = COMMON_CHAT_FORMAT_DEEPSEEK_V3_1;
|
||||
if (string_ends_with(data.prompt, "<think>")) {
|
||||
if (!inputs.enable_thinking) {
|
||||
data.prompt += "</think>";
|
||||
} else {
|
||||
data.thinking_forced_open = true;
|
||||
}
|
||||
}
|
||||
if (inputs.tools.is_array() && !inputs.tools.empty()) {
|
||||
data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED && inputs.json_schema.is_null();
|
||||
data.grammar = build_grammar([&](const common_grammar_builder & builder) {
|
||||
std::vector<std::string> tool_rules;
|
||||
foreach_function(inputs.tools, [&](const json & tool) {
|
||||
const auto & function = tool.at("function");
|
||||
std::string name = function.at("name");
|
||||
auto parameters = function.at("parameters");
|
||||
builder.resolve_refs(parameters);
|
||||
tool_rules.push_back(builder.add_rule(name + "-call",
|
||||
"( \"<|tool▁call▁begin|>\" )? \"" + name + "<|tool▁sep|>"
|
||||
"\" " + builder.add_schema(name + "-args", parameters) + " "
|
||||
"\"<|tool▁call▁end|>\""));
|
||||
});
|
||||
// Distill Qwen 7B & 32B models seem confused re/ syntax of their tool call opening tag,
|
||||
// so we accept common variants (then it's all constrained)
|
||||
builder.add_rule("root",
|
||||
std::string(data.thinking_forced_open ? "( \"</think>\" space )? " : "") +
|
||||
"( \"<|tool▁calls▁begin|>\" | \"<|tool_calls_begin|>\" | \"<|tool calls begin|>\" | \"<|tool\\\\_calls\\\\_begin|>\" | \"<|tool▁calls|>\" ) "
|
||||
"(" + string_join(tool_rules, " | ") + ")" + (inputs.parallel_tool_calls ? "*" : "") + " "
|
||||
"\"<|tool▁calls▁end|>\""
|
||||
" space");
|
||||
data.grammar_triggers.push_back({
|
||||
COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN_FULL,
|
||||
// If thinking_forced_open, then we capture the </think> tag in the grammar,
|
||||
// (important for required tool choice) and in the trigger's first capture (decides what is sent to the grammar)
|
||||
std::string(data.thinking_forced_open ? "[\\s\\S]*?(</think>\\s*)" : "(?:<think>[\\s\\S]*?</think>\\s*)?") +
|
||||
"(<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)[\\s\\S]*"
|
||||
});
|
||||
data.preserved_tokens = {
|
||||
"<think>",
|
||||
"</think>",
|
||||
"<|tool▁calls▁begin|>",
|
||||
"<|tool▁call▁begin|>",
|
||||
"<|tool▁sep|>",
|
||||
"<|tool▁call▁end|>",
|
||||
"<|tool▁calls▁end|>",
|
||||
};
|
||||
});
|
||||
}
|
||||
return data;
|
||||
}
|
||||
|
||||
static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) {
|
||||
builder.try_parse_reasoning("<think>", "</think>");
|
||||
if (!builder.syntax().parse_tool_calls) {
|
||||
@@ -1409,6 +1479,66 @@ static void common_chat_parse_deepseek_r1(common_chat_msg_parser & builder) {
|
||||
tool_calls_end);
|
||||
}
|
||||
|
||||
static void common_chat_parse_deepseek_v3_1_content(common_chat_msg_parser & builder) {
|
||||
static const common_regex function_regex("(?:<|tool▁call▁begin|>)?([^\\n<]+)(?:<|tool▁sep|>)");
|
||||
|
||||
static const common_regex close_regex("(?:[\\s]*)?<|tool▁call▁end|>");
|
||||
static const common_regex tool_calls_begin("(?:<|tool▁calls▁begin|>|<|tool_calls_begin|>|<|tool calls begin|>|<|tool\\\\_calls\\\\_begin|>|<|tool▁calls|>)");
|
||||
static const common_regex tool_calls_end("<|tool▁calls▁end|>");
|
||||
|
||||
if (!builder.syntax().parse_tool_calls) {
|
||||
LOG_DBG("%s: not parse_tool_calls\n", __func__);
|
||||
builder.add_content(builder.consume_rest());
|
||||
return;
|
||||
}
|
||||
|
||||
LOG_DBG("%s: parse_tool_calls\n", __func__);
|
||||
|
||||
parse_json_tool_calls(
|
||||
builder,
|
||||
/* block_open= */ tool_calls_begin,
|
||||
/* function_regex_start_only= */ std::nullopt,
|
||||
function_regex,
|
||||
close_regex,
|
||||
tool_calls_end);
|
||||
}
|
||||
|
||||
static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) {
|
||||
// DeepSeek V3.1 outputs reasoning content between "<think>" and "</think>" tags, followed by regular content
|
||||
// First try to parse using the standard reasoning parsing method
|
||||
LOG_DBG("%s: thinking_forced_open: %s\n", __func__, std::to_string(builder.syntax().thinking_forced_open).c_str());
|
||||
|
||||
auto start_pos = builder.pos();
|
||||
auto found_end_think = builder.try_find_literal("</think>");
|
||||
builder.move_to(start_pos);
|
||||
|
||||
if (builder.syntax().thinking_forced_open && !builder.is_partial() && !found_end_think) {
|
||||
LOG_DBG("%s: no end_think, not partial, adding content\n", __func__);
|
||||
common_chat_parse_deepseek_v3_1_content(builder);
|
||||
} else if (builder.try_parse_reasoning("<think>", "</think>")) {
|
||||
// If reasoning was parsed successfully, the remaining content is regular content
|
||||
LOG_DBG("%s: parsed reasoning, adding content\n", __func__);
|
||||
// </think><|tool▁calls▁begin|><|tool▁call▁begin|>function<|tool▁sep|>NAME\n```json\nJSON\n```<|tool▁call▁end|><|tool▁calls▁end|>
|
||||
common_chat_parse_deepseek_v3_1_content(builder);
|
||||
} else {
|
||||
if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE) {
|
||||
LOG_DBG("%s: reasoning_format none, adding content\n", __func__);
|
||||
common_chat_parse_deepseek_v3_1_content(builder);
|
||||
return;
|
||||
}
|
||||
// If no reasoning tags found, check if we should treat everything as reasoning
|
||||
if (builder.syntax().thinking_forced_open) {
|
||||
// If thinking is forced open but no tags found, treat everything as reasoning
|
||||
LOG_DBG("%s: thinking_forced_open, adding reasoning content\n", __func__);
|
||||
builder.add_reasoning_content(builder.consume_rest());
|
||||
} else {
|
||||
LOG_DBG("%s: no thinking_forced_open, adding content\n", __func__);
|
||||
// <|tool▁call▁begin|>NAME<|tool▁sep|>JSON<|tool▁call▁end|>
|
||||
common_chat_parse_deepseek_v3_1_content(builder);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
static common_chat_params common_chat_params_init_gpt_oss(const common_chat_template & tmpl, const struct templates_params & inputs) {
|
||||
common_chat_params data;
|
||||
auto prompt = apply(tmpl, inputs);
|
||||
@@ -2365,6 +2495,12 @@ static common_chat_params common_chat_templates_apply_jinja(
|
||||
}
|
||||
}
|
||||
|
||||
// DeepSeek V3.1: detect based on specific patterns in the template
|
||||
if (src.find("message['prefix'] is defined and message['prefix'] and thinking") != std::string::npos &&
|
||||
params.json_schema.is_null()) {
|
||||
return common_chat_params_init_deepseek_v3_1(tmpl, params);
|
||||
}
|
||||
|
||||
// DeepSeek R1: use handler in all cases except json schema (thinking / tools).
|
||||
if (src.find("<|tool▁calls▁begin|>") != std::string::npos && params.json_schema.is_null()) {
|
||||
return common_chat_params_init_deepseek_r1(tmpl, params);
|
||||
@@ -2537,6 +2673,9 @@ static void common_chat_parse(common_chat_msg_parser & builder) {
|
||||
case COMMON_CHAT_FORMAT_DEEPSEEK_R1:
|
||||
common_chat_parse_deepseek_r1(builder);
|
||||
break;
|
||||
case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1:
|
||||
common_chat_parse_deepseek_v3_1(builder);
|
||||
break;
|
||||
case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2:
|
||||
common_chat_parse_functionary_v3_2(builder);
|
||||
break;
|
||||
|
||||
Reference in New Issue
Block a user