mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-28 08:31:25 +00:00
chat : Deepseek V3.1 reasoning and tool calling support (OpenAI Style) (#15533)
* Add DeepSeek V3.1 thinking mode support
- Added COMMON_CHAT_FORMAT_DEEPSEEK_V3_1 enum value
- Created common_chat_params_init_deepseek_v3_1() function (currently uses R1 implementation)
- Created common_chat_parse_deepseek_v3_1() function that handles V3.1 thinking format:
- Extracts reasoning content before '</think>' tag into reasoning_content
- Extracts regular content after '</think>' tag into content
- No opening '<think>' tag in V3.1 format
- Added detection logic for V3.1 templates based on pattern: 'message['prefix'] is defined and message['prefix'] and thinking'
- Added V3.1 case to parsing switch statement
This addresses the issue where V3.1 outputs reasoning content followed by '</think>' and then regular content without the opening '<think>' tag.
* Another attempt by V3.1 non-thinking
* Fix test, but it's not asserting anything.
* Ignore vim swap files in tests dir
* Update the test
* Try using try_find_literal instead of regex
* passing test
* Revert "Try using try_find_literal instead of regex"
This reverts commit c50d887ec2.
* Remove unnecessary change
* Remove comment
* Add code to handle non-thinking mode.
* Try to set message['prefix'] when thinking is enabled.
* This fixes reasoning, but breaks normal content. We need state in the
chat parser.
* DeepSeek V3.1 thinking is now the default. Disable with `--reasoning-budget 0`.
* Simplify (DeepSeek V3.1 reasoning)
* Fix sign inversion bug
* Add some tool calling code (not working).
* Tool calls working in non-reasoning mode.
* Attempt a unit test for tool call parsing.
* Passing test
* Add tests for both happy path and broken fenced DeepSeek V3.1 tool call variants.
* Passing DeepSeek V3.1 tool call tests, but model is not working.
* Revert assistance response prefill change. Not my monkeys.
* Add fenced_thinking unit test variant. Passes, but thinking tool calling
still isn't working for some reason.
* Tests pass in reasoning mode. Also e2e tool test passes.
* Make a copy of the parse_json_tool_calls function for deepseek-v3.1 so
as to not accidentally introduce regressions.
* Fix thinking_forced_open logic. tool calling broken. Need to add another
test case.
* That's what I get for cargo culting a newline.
* Add multi tool call test for deepseek v3.1 non-reasoning
* Move test, remove .gitignore change
* Place deepseek-v3.1 reasoning test directly into existing reasoning
function per CISC's request.
* Address whitespace CI failure.
* Merge two assert_equals per CISC's request.
* Add DeepSeek-V3.1 tests to tests/test-chat.cpp per CISC's request.
* Merge deepseek V3.1 and regular parse_json_tool_calls() function
behaviors by adding optional update_cursor argument.
* Update tests/test-chat-parser.cpp
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
* Update tests/test-chat-parser.cpp
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
* Update tests/test-chat-parser.cpp
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
* Update tests/test-chat-parser.cpp
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
* Update tests/test-chat-parser.cpp
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
* Update tests/test-chat-parser.cpp
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
* Update tests/test-chat-parser.cpp
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
* Update tests/test-chat-parser.cpp
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
* Update tests/test-chat-parser.cpp
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
* DeepSeek V3.1 fix reasoning_format none
* Strip grammar down to strictly what we expect based on model card. Throw
out parts we cargo culted from R1 that don't make sense.
* Update tests/test-chat-parser.cpp
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
* DeepSeek V3.1 - Add edge case where thinking is forced open, there is
tool calling in the reasoning content, but then the model just stops the
output without closing the </think> tag, so it's not a partial. In this
case, use the tool call in the reasoning content.
* DeepSeek V3.1 - simplify update_cursor
* Update common/chat.cpp
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
* Update common/chat.cpp
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
* Update common/chat.cpp
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
* Fix indent
---------
Co-authored-by: openhands <openhands@all-hands.dev>
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
This commit is contained in:
@@ -15,14 +15,20 @@
|
||||
#include "regex-partial.h"
|
||||
|
||||
template <class T>
|
||||
static void assert_equals(const T & expected, const T & actual) {
|
||||
static void assert_equals(const std::string_view label, const T & expected, const T & actual) {
|
||||
if (expected != actual) {
|
||||
std::cerr << label << std::endl;
|
||||
std::cerr << "Expected: " << expected << std::endl;
|
||||
std::cerr << "Actual: " << actual << std::endl;
|
||||
std::cerr << std::flush;
|
||||
throw std::runtime_error("Test failed");
|
||||
}
|
||||
}
|
||||
|
||||
template <class T>
|
||||
static void assert_equals(const T & expected, const T & actual) {
|
||||
assert_equals("", expected, actual);
|
||||
}
|
||||
static void assert_equals(const char * expected, const std::string & actual) {
|
||||
return assert_equals<std::string>(expected, actual);
|
||||
}
|
||||
@@ -46,6 +52,7 @@ static void assert_throws(const std::function<void()> & fn, const std::string &
|
||||
}
|
||||
|
||||
static void test_reasoning() {
|
||||
//common_log_set_verbosity_thold(LOG_DEFAULT_DEBUG);
|
||||
{
|
||||
common_chat_msg_parser builder("<tnk>Cogito</tnk>Ergo sum", /* is_partial= */ false, {
|
||||
/* .format = */ COMMON_CHAT_FORMAT_CONTENT_ONLY,
|
||||
@@ -99,6 +106,36 @@ static void test_reasoning() {
|
||||
assert_equals("<think>Cogito</think>", builder.result().content);
|
||||
assert_equals("Ergo sum", builder.consume_rest());
|
||||
}
|
||||
// Test DeepSeek V3.1 parsing - reasoning content followed by "</think>" and then regular content
|
||||
{
|
||||
common_chat_syntax syntax = {
|
||||
/* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
|
||||
/* .reasoning_in_content = */ false,
|
||||
/* .thinking_forced_open = */ true,
|
||||
/* .parse_tool_calls = */ true,
|
||||
};
|
||||
const std::string variant("deepseek_v3_1_reasoning_format_deepseek");
|
||||
common_chat_msg_parser builder("REASONING</think>ok", /* is_partial= */ false, syntax);
|
||||
assert_equals(variant, true, builder.try_parse_reasoning("<think>", "</think>"));
|
||||
assert_equals(variant, std::string("REASONING"), builder.result().reasoning_content);
|
||||
assert_equals(variant, std::string("ok"), builder.consume_rest());
|
||||
}
|
||||
// Test DeepSeek V3.1 parsing - reasoning_format none - reasoning content followed by "</think>" and then regular content
|
||||
{
|
||||
common_chat_syntax syntax = {
|
||||
/* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_NONE,
|
||||
/* .reasoning_in_content = */ false,
|
||||
/* .thinking_forced_open = */ true,
|
||||
/* .parse_tool_calls = */ true,
|
||||
};
|
||||
const std::string variant("deepseek_v3_1_reasoning_format_none");
|
||||
const std::string input = "REASONING</think>ok";
|
||||
auto msg = common_chat_parse(input, false, syntax);
|
||||
assert_equals(variant, std::string("REASONING</think>ok"), msg.content);
|
||||
assert_equals(variant, std::string(""), msg.reasoning_content);
|
||||
}
|
||||
}
|
||||
|
||||
static void test_regex() {
|
||||
@@ -186,6 +223,159 @@ static void test(const std::string & input, bool is_partial, const std::vector<s
|
||||
assert_equals(is_partial, js->is_partial);
|
||||
assert_equals(expected, args_paths.size() == 1 && args_paths[0].empty() ? js->value.get<std::string>() : js->value.dump());
|
||||
}
|
||||
|
||||
static void test_deepseek_v3_1_tool_calls() {
|
||||
//common_log_set_verbosity_thold(LOG_DEFAULT_DEBUG);
|
||||
// variant: happy path for when it works as the model card says it should
|
||||
const std::string variant("simple");
|
||||
common_chat_syntax syntax = {
|
||||
/* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
|
||||
/* .reasoning_in_content = */ false,
|
||||
/* .thinking_forced_open = */ false,
|
||||
/* .parse_tool_calls = */ true,
|
||||
};
|
||||
const std::string input = "<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>";
|
||||
auto msg = common_chat_parse(input, false, syntax);
|
||||
assert_equals<std::size_t>(variant, 1, msg.tool_calls.size());
|
||||
assert_equals(variant, std::string("get_time"), msg.tool_calls[0].name);
|
||||
// JSON arguments are dumped without spaces
|
||||
assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), msg.tool_calls[0].arguments);
|
||||
assert_equals(variant, std::string(""), msg.content);
|
||||
assert_equals(variant, std::string(""), msg.reasoning_content);
|
||||
|
||||
// variant: simple + thinking open
|
||||
{
|
||||
common_chat_syntax syntax = {
|
||||
/* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
|
||||
/* .reasoning_in_content = */ false,
|
||||
/* .thinking_forced_open = */ true,
|
||||
/* .parse_tool_calls = */ true,
|
||||
};
|
||||
const std::string variant("simple_thinking");
|
||||
const std::string in = "REASONING</think><|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>";
|
||||
auto m = common_chat_parse(in, false, syntax);
|
||||
assert_equals<std::size_t>(variant, 1, m.tool_calls.size());
|
||||
assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
|
||||
assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments);
|
||||
assert_equals(variant, std::string(""), m.content);
|
||||
assert_equals(variant, std::string("REASONING"), m.reasoning_content);
|
||||
}
|
||||
// variant: simple + multiple tool calls
|
||||
{
|
||||
common_chat_syntax syntax = {
|
||||
/* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
|
||||
/* .reasoning_in_content = */ false,
|
||||
/* .thinking_forced_open = */ false,
|
||||
/* .parse_tool_calls = */ true,
|
||||
};
|
||||
const std::string variant("simple_multiple_tool_calls");
|
||||
const std::string in = "CONTENT<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Paris\"}<|tool▁call▁end|><|tool▁call▁begin|>get_weather<|tool▁sep|>{\"city\": \"Paris\"}<|tool▁call▁end|><|tool▁calls▁end|>";
|
||||
auto m = common_chat_parse(in, false, syntax);
|
||||
assert_equals<std::size_t>(variant, 2, m.tool_calls.size());
|
||||
assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
|
||||
assert_equals(variant, std::string("{\"city\":\"Paris\"}"), m.tool_calls[0].arguments);
|
||||
assert_equals(variant, std::string("get_weather"), m.tool_calls[1].name);
|
||||
assert_equals(variant, std::string("{\"city\":\"Paris\"}"), m.tool_calls[1].arguments);
|
||||
assert_equals(variant, std::string("CONTENT"), m.content);
|
||||
assert_equals(variant, std::string(""), m.reasoning_content);
|
||||
}
|
||||
|
||||
|
||||
// variant: thinking forced open + tool call in reasoning content
|
||||
{
|
||||
common_chat_syntax syntax = {
|
||||
/* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
|
||||
/* .reasoning_in_content = */ false,
|
||||
/* .thinking_forced_open = */ true,
|
||||
/* .parse_tool_calls = */ true,
|
||||
};
|
||||
const std::string variant("thinking_forced_open_tool_call_in_reasoning");
|
||||
const std::string in = "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time2<|tool▁sep|>{\"city\": \"Tokyo2\"}<|tool▁call▁end|><|tool▁calls▁end|>REASONING</think><|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>";
|
||||
auto m = common_chat_parse(in, false, syntax);
|
||||
assert_equals<std::size_t>(variant, 1, m.tool_calls.size());
|
||||
assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
|
||||
assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments);
|
||||
assert_equals(variant, std::string(""), m.content);
|
||||
assert_equals(variant, std::string("REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time2<|tool▁sep|>{\"city\": \"Tokyo2\"}<|tool▁call▁end|><|tool▁calls▁end|>REASONING"), m.reasoning_content);
|
||||
}
|
||||
|
||||
// variant: thinking forced open + tool call in reasoning content + no closing think + not partial
|
||||
// This is a bit of a fine tuning issue on the model's part IMO. It really should not be attempting
|
||||
// to make tool calls in reasoning content according to the model card, but it does sometimes, so
|
||||
// add the reasoning content as regular content and parse the tool calls.
|
||||
{
|
||||
common_chat_syntax syntax = {
|
||||
/* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
|
||||
/* .reasoning_in_content = */ false,
|
||||
/* .thinking_forced_open = */ true,
|
||||
/* .parse_tool_calls = */ true,
|
||||
};
|
||||
const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_not_partial");
|
||||
const std::string in = "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>";
|
||||
auto m = common_chat_parse(in, false, syntax);
|
||||
assert_equals(variant, std::string("REASONING"), m.content);
|
||||
assert_equals(variant, std::string(""), m.reasoning_content);
|
||||
assert_equals<std::size_t>(variant, 1, m.tool_calls.size());
|
||||
assert_equals(variant, std::string("get_time"), m.tool_calls[0].name);
|
||||
assert_equals(variant, std::string("{\"city\":\"Tokyo\"}"), m.tool_calls[0].arguments);
|
||||
}
|
||||
|
||||
// variant: thinking forced open + tool call in reasoning content + no closing think + partial
|
||||
{
|
||||
common_chat_syntax syntax = {
|
||||
/* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
|
||||
/* .reasoning_in_content = */ false,
|
||||
/* .thinking_forced_open = */ true,
|
||||
/* .parse_tool_calls = */ true,
|
||||
};
|
||||
const std::string variant("thinking_forced_open_tool_call_in_reasoning_no_closing_think_partial");
|
||||
const std::string in = "REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>";
|
||||
auto m = common_chat_parse(in, /* is_partial= */ true, syntax);
|
||||
assert_equals(variant, std::string("REASONING<|tool▁calls▁begin|><|tool▁call▁begin|>get_time<|tool▁sep|>{\"city\": \"Tokyo\"}<|tool▁call▁end|><|tool▁calls▁end|>"), m.reasoning_content);
|
||||
assert_equals(variant, std::string(""), m.content);
|
||||
assert_equals<std::size_t>(variant, 0, m.tool_calls.size());
|
||||
}
|
||||
|
||||
// variant: thinking not forced open + reasoning + regular content + no tool calls
|
||||
{
|
||||
common_chat_syntax syntax = {
|
||||
/* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
|
||||
/* .reasoning_in_content = */ false,
|
||||
/* .thinking_forced_open = */ true,
|
||||
/* .parse_tool_calls = */ true,
|
||||
};
|
||||
const std::string variant("thinking_forced_open_reasoning_regular_content_no_tool_calls");
|
||||
const std::string in = "REASONING</think>CONTENT";
|
||||
auto m = common_chat_parse(in, false, syntax);
|
||||
assert_equals<std::size_t>(variant, 0, m.tool_calls.size());
|
||||
assert_equals(variant, std::string("CONTENT"), m.content);
|
||||
assert_equals(variant, std::string("REASONING"), m.reasoning_content);
|
||||
}
|
||||
// variant: thinking not forced open + missing reasoning + no tool calls
|
||||
{
|
||||
common_chat_syntax syntax = {
|
||||
/* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1,
|
||||
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
|
||||
/* .reasoning_in_content = */ false,
|
||||
/* .thinking_forced_open = */ false,
|
||||
/* .parse_tool_calls = */ true,
|
||||
};
|
||||
const std::string variant("thinking_not_forced_open_missing_reasoning_no_tool_calls");
|
||||
const std::string in = "CONTENT";
|
||||
auto m = common_chat_parse(in, false, syntax);
|
||||
assert_equals<std::size_t>(variant, 0, m.tool_calls.size());
|
||||
assert_equals(variant, std::string("CONTENT"), m.content);
|
||||
assert_equals(variant, std::string(""), m.reasoning_content);
|
||||
}
|
||||
}
|
||||
|
||||
static void test_with_args(const std::string & input, const std::string & expected, bool parse_as_partial = true, bool is_partial = true) {
|
||||
common_chat_msg_parser builder(input, parse_as_partial, {});
|
||||
auto js = builder.try_consume_json_with_dumped_args({{"args"}}, {});
|
||||
@@ -347,6 +537,7 @@ int main() {
|
||||
test_json_with_dumped_args();
|
||||
test_reasoning();
|
||||
test_regex();
|
||||
test_deepseek_v3_1_tool_calls();
|
||||
std::cout << "All tests passed!\n";
|
||||
return 0;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user