mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	grammar : support array references in json schema (#16792)
* grammar : support array references in json schema * Update json-schema-to-grammar.cpp Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * grammar : improve regex when naming ref derived rules * grammar : replace non-conformant definitions array with anyOf test case --------- Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
This commit is contained in:
		| @@ -601,7 +601,10 @@ private: | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     std::string _resolve_ref(const std::string & ref) { |     std::string _resolve_ref(const std::string & ref) { | ||||||
|         std::string ref_name = ref.substr(ref.find_last_of('/') + 1); |         auto it = ref.find('#'); | ||||||
|  |         std::string ref_fragment = it != std::string::npos ? ref.substr(it + 1) : ref; | ||||||
|  |         static const std::regex nonalphanumeric_regex(R"([^a-zA-Z0-9-]+)"); | ||||||
|  |         std::string ref_name = "ref" + std::regex_replace(ref_fragment, nonalphanumeric_regex, "-"); | ||||||
|         if (_rules.find(ref_name) == _rules.end() && _refs_being_resolved.find(ref) == _refs_being_resolved.end()) { |         if (_rules.find(ref_name) == _rules.end() && _refs_being_resolved.find(ref) == _refs_being_resolved.end()) { | ||||||
|             _refs_being_resolved.insert(ref); |             _refs_being_resolved.insert(ref); | ||||||
|             json resolved = _refs[ref]; |             json resolved = _refs[ref]; | ||||||
| @@ -774,11 +777,24 @@ public: | |||||||
|                         std::vector<std::string> tokens = string_split(pointer, "/"); |                         std::vector<std::string> tokens = string_split(pointer, "/"); | ||||||
|                         for (size_t i = 1; i < tokens.size(); ++i) { |                         for (size_t i = 1; i < tokens.size(); ++i) { | ||||||
|                             std::string sel = tokens[i]; |                             std::string sel = tokens[i]; | ||||||
|                             if (target.is_null() || !target.contains(sel)) { |                             if (target.is_object() && target.contains(sel)) { | ||||||
|  |                                 target = target[sel]; | ||||||
|  |                             } else if (target.is_array()) { | ||||||
|  |                                 size_t sel_index; | ||||||
|  |                                 try { | ||||||
|  |                                     sel_index = std::stoul(sel); | ||||||
|  |                                 } catch (const std::invalid_argument & e) { | ||||||
|  |                                     sel_index = target.size(); | ||||||
|  |                                 } | ||||||
|  |                                 if (sel_index >= target.size()) { | ||||||
|  |                                     _errors.push_back("Error resolving ref " + ref + ": " + sel + " not in " + target.dump()); | ||||||
|  |                                     return; | ||||||
|  |                                 } | ||||||
|  |                                 target = target[sel_index]; | ||||||
|  |                             } else { | ||||||
|                                 _errors.push_back("Error resolving ref " + ref + ": " + sel + " not in " + target.dump()); |                                 _errors.push_back("Error resolving ref " + ref + ": " + sel + " not in " + target.dump()); | ||||||
|                                 return; |                                 return; | ||||||
|                             } |                             } | ||||||
|                             target = target[sel]; |  | ||||||
|                         } |                         } | ||||||
|                         _refs[ref] = target; |                         _refs[ref] = target; | ||||||
|                     } |                     } | ||||||
|   | |||||||
| @@ -371,8 +371,17 @@ class SchemaConverter: | |||||||
|                         raise ValueError(f'Unsupported ref {ref}') |                         raise ValueError(f'Unsupported ref {ref}') | ||||||
|  |  | ||||||
|                     for sel in ref.split('#')[-1].split('/')[1:]: |                     for sel in ref.split('#')[-1].split('/')[1:]: | ||||||
|                         assert target is not None and sel in target, f'Error resolving ref {ref}: {sel} not in {target}' |                         assert target is not None, f'Error resolving ref {ref}: {sel} not in {target}' | ||||||
|                         target = target[sel] |                         if isinstance(target, list): | ||||||
|  |                             try: | ||||||
|  |                                 sel_index = int(sel) | ||||||
|  |                             except ValueError: | ||||||
|  |                                 raise ValueError(f'Error resolving ref {ref}: {sel} not in {target}') | ||||||
|  |                             assert 0 <= sel_index < len(target), f'Error resolving ref {ref}: {sel} not in {target}' | ||||||
|  |                             target = target[sel_index] | ||||||
|  |                         else: | ||||||
|  |                             assert sel in target, f'Error resolving ref {ref}: {sel} not in {target}' | ||||||
|  |                             target = target[sel] | ||||||
|  |  | ||||||
|                     self._refs[ref] = target |                     self._refs[ref] = target | ||||||
|                 else: |                 else: | ||||||
| @@ -547,7 +556,8 @@ class SchemaConverter: | |||||||
|  |  | ||||||
|  |  | ||||||
|     def _resolve_ref(self, ref): |     def _resolve_ref(self, ref): | ||||||
|         ref_name = ref.split('/')[-1] |         ref_fragment = ref.split('#')[-1] | ||||||
|  |         ref_name = 'ref' + re.sub(r'[^a-zA-Z0-9-]+', '-', ref_fragment) | ||||||
|         if ref_name not in self._rules and ref not in self._refs_being_resolved: |         if ref_name not in self._rules and ref not in self._refs_being_resolved: | ||||||
|             self._refs_being_resolved.add(ref) |             self._refs_being_resolved.add(ref) | ||||||
|             resolved = self._refs[ref] |             resolved = self._refs[ref] | ||||||
|   | |||||||
| @@ -1124,9 +1124,9 @@ static void test_all(const std::string & lang, std::function<void(const TestCase | |||||||
|         })""", |         })""", | ||||||
|         R"""( |         R"""( | ||||||
|             char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) |             char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) | ||||||
|             foo ::= "{" space foo-a-kv "}" space |             ref-definitions-foo ::= "{" space ref-definitions-foo-a-kv "}" space | ||||||
|             foo-a-kv ::= "\"a\"" space ":" space string |             ref-definitions-foo-a-kv ::= "\"a\"" space ":" space string | ||||||
|             root ::= foo |             root ::= ref-definitions-foo | ||||||
|             space ::= | " " | "\n"{1,2} [ \t]{0,20} |             space ::= | " " | "\n"{1,2} [ \t]{0,20} | ||||||
|             string ::= "\"" char* "\"" space |             string ::= "\"" char* "\"" space | ||||||
|         )""" |         )""" | ||||||
| @@ -1151,20 +1151,58 @@ static void test_all(const std::string & lang, std::function<void(const TestCase | |||||||
|             "type": "object" |             "type": "object" | ||||||
|         })""", |         })""", | ||||||
|         R"""( |         R"""( | ||||||
|             alternative-0 ::= foo |             alternative-0 ::= ref-definitions-foo | ||||||
|             alternative-1 ::= bar |             alternative-1 ::= ref-definitions-bar | ||||||
|             bar ::= "{" space  (bar-b-kv )? "}" space |  | ||||||
|             bar-b-kv ::= "\"b\"" space ":" space number |  | ||||||
|             decimal-part ::= [0-9]{1,16} |             decimal-part ::= [0-9]{1,16} | ||||||
|             foo ::= "{" space  (foo-a-kv )? "}" space |  | ||||||
|             foo-a-kv ::= "\"a\"" space ":" space number |  | ||||||
|             integral-part ::= [0] | [1-9] [0-9]{0,15} |             integral-part ::= [0] | [1-9] [0-9]{0,15} | ||||||
|             number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space |             number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space | ||||||
|  |             ref-definitions-bar ::= "{" space  (ref-definitions-bar-b-kv )? "}" space | ||||||
|  |             ref-definitions-bar-b-kv ::= "\"b\"" space ":" space number | ||||||
|  |             ref-definitions-foo ::= "{" space  (ref-definitions-foo-a-kv )? "}" space | ||||||
|  |             ref-definitions-foo-a-kv ::= "\"a\"" space ":" space number | ||||||
|             root ::= alternative-0 | alternative-1 |             root ::= alternative-0 | alternative-1 | ||||||
|             space ::= | " " | "\n"{1,2} [ \t]{0,20} |             space ::= | " " | "\n"{1,2} [ \t]{0,20} | ||||||
|         )""" |         )""" | ||||||
|     }); |     }); | ||||||
|  |  | ||||||
|  |     test({ | ||||||
|  |         SUCCESS, | ||||||
|  |         "anyOf $ref", | ||||||
|  |         R"""({ | ||||||
|  |             "properties": { | ||||||
|  |                 "a": { | ||||||
|  |                     "anyOf": [ | ||||||
|  |                         {"type": "string"}, | ||||||
|  |                         {"type": "number"} | ||||||
|  |                     ] | ||||||
|  |                 }, | ||||||
|  |                 "b": { | ||||||
|  |                     "anyOf": [ | ||||||
|  |                         {"$ref": "#/properties/a/anyOf/0"}, | ||||||
|  |                         {"type": "boolean"} | ||||||
|  |                     ] | ||||||
|  |                 } | ||||||
|  |             }, | ||||||
|  |             "type": "object" | ||||||
|  |         })""", | ||||||
|  |         R"""( | ||||||
|  |             a ::= string | number | ||||||
|  |             a-kv ::= "\"a\"" space ":" space a | ||||||
|  |             a-rest ::= ( "," space b-kv )? | ||||||
|  |             b ::= b-0 | boolean | ||||||
|  |             b-0 ::= string | ||||||
|  |             b-kv ::= "\"b\"" space ":" space b | ||||||
|  |             boolean ::= ("true" | "false") space | ||||||
|  |             char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) | ||||||
|  |             decimal-part ::= [0-9]{1,16} | ||||||
|  |             integral-part ::= [0] | [1-9] [0-9]{0,15} | ||||||
|  |             number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space | ||||||
|  |             root ::= "{" space  (a-kv a-rest | b-kv )? "}" space | ||||||
|  |             space ::= | " " | "\n"{1,2} [ \t]{0,20} | ||||||
|  |             string ::= "\"" char* "\"" space | ||||||
|  |         )""" | ||||||
|  |     }); | ||||||
|  |  | ||||||
|     test({ |     test({ | ||||||
|         SUCCESS, |         SUCCESS, | ||||||
|         "mix of allOf, anyOf and $ref (similar to https://json.schemastore.org/tsconfig.json)", |         "mix of allOf, anyOf and $ref (similar to https://json.schemastore.org/tsconfig.json)", | ||||||
|   | |||||||
| @@ -345,10 +345,14 @@ export class SchemaConverter { | |||||||
|  |  | ||||||
|           const selectors = ref.split('#')[1].split('/').slice(1); |           const selectors = ref.split('#')[1].split('/').slice(1); | ||||||
|           for (const sel of selectors) { |           for (const sel of selectors) { | ||||||
|             if (!target || !(sel in target)) { |             const selIndex = parseInt(sel, 10); | ||||||
|  |             if (target && sel in target) { | ||||||
|  |               target = target[sel]; | ||||||
|  |             } else if (target && selIndex in target) { | ||||||
|  |               target = target[selIndex]; | ||||||
|  |             } else { | ||||||
|               throw new Error(`Error resolving ref ${ref}: ${sel} not in ${JSON.stringify(target)}`); |               throw new Error(`Error resolving ref ${ref}: ${sel} not in ${JSON.stringify(target)}`); | ||||||
|             } |             } | ||||||
|             target = target[sel]; |  | ||||||
|           } |           } | ||||||
|  |  | ||||||
|           this._refs[ref] = target; |           this._refs[ref] = target; | ||||||
| @@ -594,7 +598,8 @@ export class SchemaConverter { | |||||||
|   } |   } | ||||||
|  |  | ||||||
|   _resolveRef(ref) { |   _resolveRef(ref) { | ||||||
|     let refName = ref.split('/').pop(); |     let refFragment = ref.split('#').pop(); | ||||||
|  |     let refName = 'ref' + refFragment.replace(/[^a-zA-Z0-9-]+/g, '-'); | ||||||
|     if (!(refName in this._rules) && !this._refsBeingResolved.has(ref)) { |     if (!(refName in this._rules) && !this._refsBeingResolved.has(ref)) { | ||||||
|       this._refsBeingResolved.add(ref); |       this._refsBeingResolved.add(ref); | ||||||
|       const resolved = this._refs[ref]; |       const resolved = this._refs[ref]; | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Aldehir Rojas
					Aldehir Rojas