diff --git a/common/chat.cpp b/common/chat.cpp index 78ceecd02b..2150894459 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -618,6 +618,7 @@ const char * common_chat_format_name(common_chat_format format) { case COMMON_CHAT_FORMAT_FIREFUNCTION_V2: return "FireFunction v2"; case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_2: return "Functionary v3.2"; case COMMON_CHAT_FORMAT_FUNCTIONARY_V3_1_LLAMA_3_1: return "Functionary v3.1 Llama 3.1"; + case COMMON_CHAT_FORMAT_DEEPSEEK_V3_1: return "DeepSeek V3.1"; case COMMON_CHAT_FORMAT_HERMES_2_PRO: return "Hermes 2 Pro"; case COMMON_CHAT_FORMAT_COMMAND_R7B: return "Command R7B"; case COMMON_CHAT_FORMAT_GRANITE: return "Granite"; @@ -1352,18 +1353,15 @@ static void common_chat_parse_deepseek_v3_1(common_chat_msg_parser & builder) { // First, try to find the "" tag that separates thinking from regular content static const common_regex thinking_end_regex(""); - if (auto res = builder.try_find_regex(thinking_end_regex)) { - // Extract everything before "" as reasoning content - auto reasoning_content = builder.str(common_string_range{0, res->groups[0].begin}); - auto stripped_reasoning = string_strip(reasoning_content); + if (auto res = builder.try_find_regex(thinking_end_regex, std::string::npos, false)) { + // The prelude contains everything before the "" tag + auto stripped_reasoning = string_strip(res->prelude); if (!stripped_reasoning.empty()) { builder.add_reasoning_content(stripped_reasoning); } - // Move past the "" tag - builder.move_to(res->groups[0].end); - + // The parser position is already advanced past the "" tag by try_find_regex // The rest is regular content builder.add_content(builder.consume_rest()); } else { diff --git a/tests/test-chat-parser.cpp b/tests/test-chat-parser.cpp index 5098bd6779..53421bf125 100644 --- a/tests/test-chat-parser.cpp +++ b/tests/test-chat-parser.cpp @@ -197,7 +197,51 @@ static void test_deepseek_v3_1() { /* .thinking_forced_open = */ false, /* .parse_tool_calls = */ true, }; - common_chat_msg_parser builder("REASONINGok", /* is_partial= */ false, {}); + common_chat_msg_parser builder("REASONINGok", /* is_partial= */ false, syntax); + assert_equals(std::string("REASONING"), builder.result().reasoning_content); + assert_equals(std::string("ok"), builder.result().content); + } + + // Test with whitespace around reasoning content + { + common_chat_syntax syntax = { + /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ false, + /* .parse_tool_calls = */ true, + }; + common_chat_msg_parser builder(" REASONING WITH SPACES ok", /* is_partial= */ false, syntax); + assert_equals(std::string("REASONING WITH SPACES"), builder.result().reasoning_content); + assert_equals(std::string("ok"), builder.result().content); + } + + // Test without thinking tag (should be all regular content) + { + common_chat_syntax syntax = { + /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ false, + /* .parse_tool_calls = */ true, + }; + common_chat_msg_parser builder("just regular content", /* is_partial= */ false, syntax); + assert_equals(std::string(""), builder.result().reasoning_content); + assert_equals(std::string("just regular content"), builder.result().content); + } + + // Test with empty reasoning content + { + common_chat_syntax syntax = { + /* .format = */ COMMON_CHAT_FORMAT_DEEPSEEK_V3_1, + /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, + /* .reasoning_in_content = */ false, + /* .thinking_forced_open = */ false, + /* .parse_tool_calls = */ true, + }; + common_chat_msg_parser builder(" ok", /* is_partial= */ false, syntax); + assert_equals(std::string(""), builder.result().reasoning_content); + assert_equals(std::string("ok"), builder.result().content); } } @@ -362,6 +406,7 @@ int main() { test_json_with_dumped_args(); test_reasoning(); test_regex(); + test_deepseek_v3_1(); std::cout << "All tests passed!\n"; return 0; }