chat: Fix streaming parser for granite models (#15682)

* fix(chat): fix streaming parser for granite models

* tests: add test cases for Granite models chat parser
This commit is contained in:
shun095
2025-09-20 00:57:30 +09:00
committed by GitHub
parent 4067f07fc5
commit f432d8d83e
2 changed files with 114 additions and 13 deletions

View File

@@ -2232,15 +2232,28 @@ static common_chat_params common_chat_params_init_granite(const common_chat_temp
static void common_chat_parse_granite(common_chat_msg_parser & builder) { static void common_chat_parse_granite(common_chat_msg_parser & builder) {
// Parse thinking tags // Parse thinking tags
static const common_regex start_think_regex(regex_escape("<think>"));
static const common_regex end_think_regex(regex_escape("</think>"));
// Granite models output partial tokens such as "<" and "<think".
// By leveraging try_consume_regex()/try_find_regex() throwing
// common_chat_msg_partial_exception for these partial tokens,
// processing is interrupted and the tokens are not passed to add_content().
if (auto res = builder.try_consume_regex(start_think_regex)) {
// Restore position for try_parse_reasoning()
builder.move_to(res->groups[0].begin);
builder.try_find_regex(end_think_regex, std::string::npos, false);
// Restore position for try_parse_reasoning()
builder.move_to(res->groups[0].begin);
}
builder.try_parse_reasoning("<think>", "</think>"); builder.try_parse_reasoning("<think>", "</think>");
// Parse response tags using regex // Parse response tags
static const common_regex response_regex("<response>([\\s\\S]*?)</response>"); static const common_regex start_response_regex(regex_escape("<response>"));
if (auto res = builder.try_find_regex(response_regex)) { static const common_regex end_response_regex(regex_escape("</response>"));
// Extract the content between the tags (capture group 1) // Granite models output partial tokens such as "<" and "<response".
auto content = builder.str(res->groups[1]); // Same hack as reasoning parsing.
builder.add_content(content); if (builder.try_consume_regex(start_response_regex)) {
builder.move_to(res->groups[0].end); builder.try_find_regex(end_response_regex);
} }
if (!builder.syntax().parse_tool_calls) { if (!builder.syntax().parse_tool_calls) {
@@ -2254,13 +2267,10 @@ static void common_chat_parse_granite(common_chat_msg_parser & builder) {
builder.move_to(res->groups[0].end); builder.move_to(res->groups[0].end);
// Expect JSON array of tool calls // Expect JSON array of tool calls
auto tool_calls_data = builder.consume_json(); if (auto tool_call = builder.try_consume_json_with_dumped_args({{{"arguments"}}})) {
if (tool_calls_data.json.is_array()) { if (!builder.add_tool_calls(tool_call->value) || tool_call->is_partial) {
if (!builder.add_tool_calls(tool_calls_data.json)) { throw common_chat_msg_partial_exception("incomplete tool call");
builder.add_content("<|tool_call|>" + tool_calls_data.json.dump());
} }
} else {
builder.add_content("<|tool_call|>" + tool_calls_data.json.dump());
} }
} else { } else {
builder.add_content(builder.consume_rest()); builder.add_content(builder.consume_rest());

View File

@@ -1402,6 +1402,12 @@ static void test_template_output_parsers() {
"Hello, world!\nWhat's up?", "Hello, world!\nWhat's up?",
/* is_partial= */ false, /* is_partial= */ false,
{COMMON_CHAT_FORMAT_GRANITE})); {COMMON_CHAT_FORMAT_GRANITE}));
assert_msg_equals(
message_assist,
common_chat_parse(
"Hello, world!\nWhat's up?",
/* is_partial= */ true,
{COMMON_CHAT_FORMAT_GRANITE}));
// Test parsing content with thinking // Test parsing content with thinking
assert_msg_equals(message_assist_thoughts, assert_msg_equals(message_assist_thoughts,
@@ -1412,6 +1418,59 @@ static void test_template_output_parsers() {
/* .format = */ COMMON_CHAT_FORMAT_GRANITE, /* .format = */ COMMON_CHAT_FORMAT_GRANITE,
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK, /* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
})); }));
assert_msg_equals(message_assist_thoughts_unparsed_deepseek,
common_chat_parse(
"<think>I'm\nthinking</think>Hello, world!\nWhat's up?",
/* is_partial= */ false,
{COMMON_CHAT_FORMAT_GRANITE}));
assert_msg_equals(message_assist_thoughts,
common_chat_parse(
"<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?",
/* is_partial= */ true,
{
/* .format = */ COMMON_CHAT_FORMAT_GRANITE,
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
}));
assert_msg_equals(message_assist_thoughts,
common_chat_parse(
"<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?</response>",
/* is_partial= */ false,
{
/* .format = */ COMMON_CHAT_FORMAT_GRANITE,
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
}));
assert_msg_equals(simple_assist_msg("<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?</response>"),
common_chat_parse(
"<think>I'm\nthinking</think><response>Hello, world!\nWhat's up?</response>",
/* is_partial= */ false,
{COMMON_CHAT_FORMAT_GRANITE}));
assert_msg_equals(message_assist_empty,
common_chat_parse(
"<think",
/* is_partial= */ true,
{
/* .format = */ COMMON_CHAT_FORMAT_GRANITE,
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
}));
assert_msg_equals(message_assist_empty,
common_chat_parse(
"<think",
/* is_partial= */ true,
{COMMON_CHAT_FORMAT_GRANITE}));
assert_msg_equals(message_assist_thoughts_no_content,
common_chat_parse(
"<think>I'm\nthinking",
/* is_partial= */ true,
{
/* .format = */ COMMON_CHAT_FORMAT_GRANITE,
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
}));
assert_msg_equals(
message_assist_empty,
common_chat_parse(
"<think>I'm\nthinking</think><response",
/* is_partial= */ true,
{COMMON_CHAT_FORMAT_GRANITE}));
// Test parsing tool calls // Test parsing tool calls
assert_msg_equals(message_assist_call, assert_msg_equals(message_assist_call,
@@ -1419,6 +1478,38 @@ static void test_template_output_parsers() {
"<|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]", "<|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}}]",
/* is_partial= */ false, /* is_partial= */ false,
{COMMON_CHAT_FORMAT_GRANITE})); {COMMON_CHAT_FORMAT_GRANITE}));
assert_msg_equals(
message_assist_call_empty_args,
common_chat_parse(
"<|tool_call|>[{\"name\": \"special_function\"",
/* is_partial= */ true,
{COMMON_CHAT_FORMAT_GRANITE}));
assert_msg_equals(
message_assist_call_cutoff_args,
common_chat_parse(
"<|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg",
/* is_partial= */ true,
{COMMON_CHAT_FORMAT_GRANITE}));
assert_msg_equals(
message_assist_call_cutoff_args,
common_chat_parse(
"<|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg",
/* is_partial= */ true,
{
/* .format = */ COMMON_CHAT_FORMAT_GRANITE,
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
}));
// Test parsing tool calls with thinking
assert_msg_equals(
message_assist_call_thoughts,
common_chat_parse(
"<think>I'm\nthinking</think><|tool_call|>[{\"name\": \"special_function\", \"arguments\": {\"arg1\": 1}, {",
/* is_partial= */ true,
{
/* .format = */ COMMON_CHAT_FORMAT_GRANITE,
/* .reasoning_format = */ COMMON_REASONING_FORMAT_DEEPSEEK,
}));
// Test template generation for regular content // Test template generation for regular content
test_templates(tmpls.get(), end_tokens, message_assist, tools, test_templates(tmpls.get(), end_tokens, message_assist, tools,