mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	 f5cd27b71d
			
		
	
	f5cd27b71d
	
	
	
		
			
			* add common_json w/ support for truncated json healing * add common_chat_msg_diff * partial common_chat_parse * refactor parser w/ optionals * server: wire chat diffs in stream mode * fix trigger of thinking models (must happen after thoughts are closed) * fix functionary v3.2 raw python! * rename: common_chat_syntax (now contains format) * rm common_regex.at_start * don't return empty <think></think> * accommodate yet another deepseek r1 distill fantasy syntax (`<|tool▁calls|>`) * fix QwQ 32B tool call parsing after thoughts (hermes2) * better logs for grammar triggers * consume spaces after parse_json_tool_calls * fix required tool calls w/ thinking models that have pre-opened thinking tags * fix thinking model's initial trigger + test qwq's template * run most test_tool_call tests in stream + non-stream modes * make functionary v3.2 parsing more strict (differentiate first match from others) * send final diff from server, to close off raw python arguments * support partial content streaming in Generic mode * tool-call: allow content prelude before hermes2 tool calls (for Qwen2.5) * Update function-calling.md * Update tool_bench.py * chat-parser: remove input from exception (llm output may contain PII) --------- Co-authored-by: ochafik <ochafik@google.com> Co-authored-by: Olivier Chafik <ochafik@users.noreply.github.com>
		
			
				
	
	
		
			238 lines
		
	
	
		
			4.5 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			238 lines
		
	
	
		
			4.5 KiB
		
	
	
	
		
			C++
		
	
	
	
	
	
| #include "common.h"
 | |
| #include "json-partial.h"
 | |
| #include <exception>
 | |
| #include <iostream>
 | |
| #include <stdexcept>
 | |
| 
 | |
| template <class T> static void assert_equals(const T & expected, const T & actual) {
 | |
|   if (expected != actual) {
 | |
|       std::cerr << "Expected: " << expected << std::endl;
 | |
|       std::cerr << "Actual: " << actual << std::endl;
 | |
|       std::cerr << std::flush;
 | |
|       throw std::runtime_error("Test failed");
 | |
|   }
 | |
| }
 | |
| 
 | |
| static void test_json_healing() {
 | |
|   auto parse = [](const std::string & str) {
 | |
|       std::cerr << "# Parsing: " << str << '\n';
 | |
|       std::string::const_iterator it = str.begin();
 | |
|       const auto end = str.end();
 | |
|       common_json out;
 | |
|       std::string healing_marker = "$llama.cpp.json$";
 | |
|       if (common_json_parse(it, end, healing_marker, out)) {
 | |
|           auto dump = out.json.dump();
 | |
|           std::cerr << "Parsed: " << dump << '\n';
 | |
|           std::cerr << "Magic: " << out.healing_marker.json_dump_marker << '\n';
 | |
|           std::string result;
 | |
|           if (!out.healing_marker.json_dump_marker.empty()) {
 | |
|               auto i = dump.find(out.healing_marker.json_dump_marker);
 | |
|               if (i == std::string::npos) {
 | |
|                   throw std::runtime_error("Failed to find magic in dump " + dump + " (magic: " + out.healing_marker.json_dump_marker + ")");
 | |
|               }
 | |
|               result = dump.substr(0, i);
 | |
|           } else {
 | |
|             result = dump;
 | |
|           }
 | |
|           std::cerr << "Result: " << result << '\n';
 | |
|           if (string_starts_with(str, result)) {
 | |
|             std::cerr << "Failure!\n";
 | |
|           }
 | |
|         //   return dump;
 | |
|       } else {
 | |
|         throw std::runtime_error("Failed to parse: " + str);
 | |
|       }
 | |
| 
 | |
|   };
 | |
|   auto parse_all = [&](const std::string & str) {
 | |
|       for (size_t i = 1; i < str.size(); i++) {
 | |
|           parse(str.substr(0, i));
 | |
|       }
 | |
|   };
 | |
|   parse_all("{\"a\": \"b\"}");
 | |
|   parse_all("{\"hey\": 1, \"ho\\\"ha\": [1]}");
 | |
| 
 | |
|   parse_all("[{\"a\": \"b\"}]");
 | |
| 
 | |
|   auto test = [&](const std::vector<std::string> & inputs, const std::string & expected, const std::string & expected_marker) {
 | |
|       for (const auto & input : inputs) {
 | |
|         common_json out;
 | |
|         assert_equals(true, common_json_parse(input, "$foo", out));
 | |
|         assert_equals<std::string>(expected, out.json.dump());
 | |
|         assert_equals<std::string>(expected_marker, out.healing_marker.json_dump_marker);
 | |
|       }
 | |
|   };
 | |
|   // No healing needed:
 | |
|   test(
 | |
|     {
 | |
|       R"([{"a":"b"}, "y"])",
 | |
|     },
 | |
|     R"([{"a":"b"},"y"])",
 | |
|     ""
 | |
|   );
 | |
|   // Partial literals can't be healed:
 | |
|   test(
 | |
|     {
 | |
|       R"([1)",
 | |
|       R"([tru)",
 | |
|       R"([n)",
 | |
|       R"([nul)",
 | |
|       R"([23.2)",
 | |
|     },
 | |
|     R"(["$foo"])",
 | |
|     R"("$foo)"
 | |
|   );
 | |
|   test(
 | |
|     {
 | |
|       R"({"a": 1)",
 | |
|       R"({"a": tru)",
 | |
|       R"({"a": n)",
 | |
|       R"({"a": nul)",
 | |
|       R"({"a": 23.2)",
 | |
|     },
 | |
|     R"({"a":"$foo"})",
 | |
|     R"("$foo)"
 | |
|   );
 | |
|   test(
 | |
|     {
 | |
|       R"({)",
 | |
|     },
 | |
|     R"({"$foo":1})",
 | |
|     R"("$foo)"
 | |
|   );
 | |
|   test(
 | |
|     {
 | |
|       R"([)",
 | |
|     },
 | |
|     R"(["$foo"])",
 | |
|     R"("$foo)"
 | |
|   );
 | |
|   // Healing right after a full literal
 | |
|   test(
 | |
|     {
 | |
|       R"(1 )",
 | |
|     },
 | |
|     R"(1)",
 | |
|     ""
 | |
|   );
 | |
|   test(
 | |
|     {
 | |
|       R"(true)",
 | |
|       R"(true )",
 | |
|     },
 | |
|     R"(true)",
 | |
|     ""
 | |
|   );
 | |
|   test(
 | |
|     {
 | |
|       R"(null)",
 | |
|       R"(null )",
 | |
|     },
 | |
|     R"(null)",
 | |
|     ""
 | |
|   );
 | |
|   test(
 | |
|     {
 | |
|       R"([1 )",
 | |
|     },
 | |
|     R"([1,"$foo"])",
 | |
|     R"(,"$foo)"
 | |
|   );
 | |
|   test(
 | |
|     {
 | |
|       R"([{})",
 | |
|       R"([{} )",
 | |
|     },
 | |
|     R"([{},"$foo"])",
 | |
|     R"(,"$foo)"
 | |
|   );
 | |
|   test(
 | |
|     {
 | |
|       R"([true)",
 | |
|     },
 | |
|     // TODO: detect the true/false/null literal was complete
 | |
|     R"(["$foo"])",
 | |
|     R"("$foo)"
 | |
|   );
 | |
|   test(
 | |
|     {
 | |
|       R"([true )",
 | |
|     },
 | |
|     R"([true,"$foo"])",
 | |
|     R"(,"$foo)"
 | |
|   );
 | |
|   test(
 | |
|     {
 | |
|       R"([true,)",
 | |
|     },
 | |
|     R"([true,"$foo"])",
 | |
|     R"("$foo)"
 | |
|   );
 | |
|   // Test nesting
 | |
|   test(
 | |
|     {
 | |
|       R"([{"a": [{"b": [{)",
 | |
|     },
 | |
|     R"([{"a":[{"b":[{"$foo":1}]}]}])",
 | |
|     R"("$foo)"
 | |
|   );
 | |
|   test(
 | |
|     {
 | |
|       R"([{"a": [{"b": [)",
 | |
|     },
 | |
|     R"([{"a":[{"b":["$foo"]}]}])",
 | |
|     R"("$foo)"
 | |
|   );
 | |
| 
 | |
|   test(
 | |
|     {
 | |
|       R"([{"a": "b"})",
 | |
|       R"([{"a": "b"} )",
 | |
|     },
 | |
|     R"([{"a":"b"},"$foo"])",
 | |
|     R"(,"$foo)"
 | |
|   );
 | |
|   test(
 | |
|     {
 | |
|       R"([{"a": "b"},)",
 | |
|       R"([{"a": "b"}, )",
 | |
|     },
 | |
|     R"([{"a":"b"},"$foo"])",
 | |
|     R"("$foo)"
 | |
|   );
 | |
|   test(
 | |
|     {
 | |
|       R"({ "code)",
 | |
|     },
 | |
|     R"({"code$foo":1})",
 | |
|     R"($foo)"
 | |
|   );
 | |
|   test(
 | |
|     {
 | |
|       R"({ "code\)",
 | |
|     },
 | |
|     R"({"code\\$foo":1})",
 | |
|     R"(\$foo)"
 | |
|   );
 | |
|   test(
 | |
|     {
 | |
|       R"({ "code")",
 | |
|     },
 | |
|     R"({"code":"$foo"})",
 | |
|     R"(:"$foo)"
 | |
|   );
 | |
|   test(
 | |
|     {
 | |
|       R"({ "key")",
 | |
|     },
 | |
|     R"({"key":"$foo"})",
 | |
|     R"(:"$foo)"
 | |
|   );
 | |
| }
 | |
| 
 | |
| int main() {
 | |
|     test_json_healing();
 | |
|     std::cerr << "All tests passed.\n";
 | |
|     return 0;
 | |
| }
 |