mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	grammar : support array references in json schema (#16792)
* grammar : support array references in json schema * Update json-schema-to-grammar.cpp Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * grammar : improve regex when naming ref derived rules * grammar : replace non-conformant definitions array with anyOf test case --------- Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
This commit is contained in:
		| @@ -601,7 +601,10 @@ private: | ||||
|     } | ||||
|  | ||||
|     std::string _resolve_ref(const std::string & ref) { | ||||
|         std::string ref_name = ref.substr(ref.find_last_of('/') + 1); | ||||
|         auto it = ref.find('#'); | ||||
|         std::string ref_fragment = it != std::string::npos ? ref.substr(it + 1) : ref; | ||||
|         static const std::regex nonalphanumeric_regex(R"([^a-zA-Z0-9-]+)"); | ||||
|         std::string ref_name = "ref" + std::regex_replace(ref_fragment, nonalphanumeric_regex, "-"); | ||||
|         if (_rules.find(ref_name) == _rules.end() && _refs_being_resolved.find(ref) == _refs_being_resolved.end()) { | ||||
|             _refs_being_resolved.insert(ref); | ||||
|             json resolved = _refs[ref]; | ||||
| @@ -774,11 +777,24 @@ public: | ||||
|                         std::vector<std::string> tokens = string_split(pointer, "/"); | ||||
|                         for (size_t i = 1; i < tokens.size(); ++i) { | ||||
|                             std::string sel = tokens[i]; | ||||
|                             if (target.is_null() || !target.contains(sel)) { | ||||
|                             if (target.is_object() && target.contains(sel)) { | ||||
|                                 target = target[sel]; | ||||
|                             } else if (target.is_array()) { | ||||
|                                 size_t sel_index; | ||||
|                                 try { | ||||
|                                     sel_index = std::stoul(sel); | ||||
|                                 } catch (const std::invalid_argument & e) { | ||||
|                                     sel_index = target.size(); | ||||
|                                 } | ||||
|                                 if (sel_index >= target.size()) { | ||||
|                                     _errors.push_back("Error resolving ref " + ref + ": " + sel + " not in " + target.dump()); | ||||
|                                     return; | ||||
|                                 } | ||||
|                                 target = target[sel_index]; | ||||
|                             } else { | ||||
|                                 _errors.push_back("Error resolving ref " + ref + ": " + sel + " not in " + target.dump()); | ||||
|                                 return; | ||||
|                             } | ||||
|                             target = target[sel]; | ||||
|                         } | ||||
|                         _refs[ref] = target; | ||||
|                     } | ||||
|   | ||||
| @@ -371,7 +371,16 @@ class SchemaConverter: | ||||
|                         raise ValueError(f'Unsupported ref {ref}') | ||||
|  | ||||
|                     for sel in ref.split('#')[-1].split('/')[1:]: | ||||
|                         assert target is not None and sel in target, f'Error resolving ref {ref}: {sel} not in {target}' | ||||
|                         assert target is not None, f'Error resolving ref {ref}: {sel} not in {target}' | ||||
|                         if isinstance(target, list): | ||||
|                             try: | ||||
|                                 sel_index = int(sel) | ||||
|                             except ValueError: | ||||
|                                 raise ValueError(f'Error resolving ref {ref}: {sel} not in {target}') | ||||
|                             assert 0 <= sel_index < len(target), f'Error resolving ref {ref}: {sel} not in {target}' | ||||
|                             target = target[sel_index] | ||||
|                         else: | ||||
|                             assert sel in target, f'Error resolving ref {ref}: {sel} not in {target}' | ||||
|                             target = target[sel] | ||||
|  | ||||
|                     self._refs[ref] = target | ||||
| @@ -547,7 +556,8 @@ class SchemaConverter: | ||||
|  | ||||
|  | ||||
|     def _resolve_ref(self, ref): | ||||
|         ref_name = ref.split('/')[-1] | ||||
|         ref_fragment = ref.split('#')[-1] | ||||
|         ref_name = 'ref' + re.sub(r'[^a-zA-Z0-9-]+', '-', ref_fragment) | ||||
|         if ref_name not in self._rules and ref not in self._refs_being_resolved: | ||||
|             self._refs_being_resolved.add(ref) | ||||
|             resolved = self._refs[ref] | ||||
|   | ||||
| @@ -1124,9 +1124,9 @@ static void test_all(const std::string & lang, std::function<void(const TestCase | ||||
|         })""", | ||||
|         R"""( | ||||
|             char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) | ||||
|             foo ::= "{" space foo-a-kv "}" space | ||||
|             foo-a-kv ::= "\"a\"" space ":" space string | ||||
|             root ::= foo | ||||
|             ref-definitions-foo ::= "{" space ref-definitions-foo-a-kv "}" space | ||||
|             ref-definitions-foo-a-kv ::= "\"a\"" space ":" space string | ||||
|             root ::= ref-definitions-foo | ||||
|             space ::= | " " | "\n"{1,2} [ \t]{0,20} | ||||
|             string ::= "\"" char* "\"" space | ||||
|         )""" | ||||
| @@ -1151,20 +1151,58 @@ static void test_all(const std::string & lang, std::function<void(const TestCase | ||||
|             "type": "object" | ||||
|         })""", | ||||
|         R"""( | ||||
|             alternative-0 ::= foo | ||||
|             alternative-1 ::= bar | ||||
|             bar ::= "{" space  (bar-b-kv )? "}" space | ||||
|             bar-b-kv ::= "\"b\"" space ":" space number | ||||
|             alternative-0 ::= ref-definitions-foo | ||||
|             alternative-1 ::= ref-definitions-bar | ||||
|             decimal-part ::= [0-9]{1,16} | ||||
|             foo ::= "{" space  (foo-a-kv )? "}" space | ||||
|             foo-a-kv ::= "\"a\"" space ":" space number | ||||
|             integral-part ::= [0] | [1-9] [0-9]{0,15} | ||||
|             number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space | ||||
|             ref-definitions-bar ::= "{" space  (ref-definitions-bar-b-kv )? "}" space | ||||
|             ref-definitions-bar-b-kv ::= "\"b\"" space ":" space number | ||||
|             ref-definitions-foo ::= "{" space  (ref-definitions-foo-a-kv )? "}" space | ||||
|             ref-definitions-foo-a-kv ::= "\"a\"" space ":" space number | ||||
|             root ::= alternative-0 | alternative-1 | ||||
|             space ::= | " " | "\n"{1,2} [ \t]{0,20} | ||||
|         )""" | ||||
|     }); | ||||
|  | ||||
|     test({ | ||||
|         SUCCESS, | ||||
|         "anyOf $ref", | ||||
|         R"""({ | ||||
|             "properties": { | ||||
|                 "a": { | ||||
|                     "anyOf": [ | ||||
|                         {"type": "string"}, | ||||
|                         {"type": "number"} | ||||
|                     ] | ||||
|                 }, | ||||
|                 "b": { | ||||
|                     "anyOf": [ | ||||
|                         {"$ref": "#/properties/a/anyOf/0"}, | ||||
|                         {"type": "boolean"} | ||||
|                     ] | ||||
|                 } | ||||
|             }, | ||||
|             "type": "object" | ||||
|         })""", | ||||
|         R"""( | ||||
|             a ::= string | number | ||||
|             a-kv ::= "\"a\"" space ":" space a | ||||
|             a-rest ::= ( "," space b-kv )? | ||||
|             b ::= b-0 | boolean | ||||
|             b-0 ::= string | ||||
|             b-kv ::= "\"b\"" space ":" space b | ||||
|             boolean ::= ("true" | "false") space | ||||
|             char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4}) | ||||
|             decimal-part ::= [0-9]{1,16} | ||||
|             integral-part ::= [0] | [1-9] [0-9]{0,15} | ||||
|             number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space | ||||
|             root ::= "{" space  (a-kv a-rest | b-kv )? "}" space | ||||
|             space ::= | " " | "\n"{1,2} [ \t]{0,20} | ||||
|             string ::= "\"" char* "\"" space | ||||
|         )""" | ||||
|     }); | ||||
|  | ||||
|     test({ | ||||
|         SUCCESS, | ||||
|         "mix of allOf, anyOf and $ref (similar to https://json.schemastore.org/tsconfig.json)", | ||||
|   | ||||
| @@ -345,10 +345,14 @@ export class SchemaConverter { | ||||
|  | ||||
|           const selectors = ref.split('#')[1].split('/').slice(1); | ||||
|           for (const sel of selectors) { | ||||
|             if (!target || !(sel in target)) { | ||||
|             const selIndex = parseInt(sel, 10); | ||||
|             if (target && sel in target) { | ||||
|               target = target[sel]; | ||||
|             } else if (target && selIndex in target) { | ||||
|               target = target[selIndex]; | ||||
|             } else { | ||||
|               throw new Error(`Error resolving ref ${ref}: ${sel} not in ${JSON.stringify(target)}`); | ||||
|             } | ||||
|             target = target[sel]; | ||||
|           } | ||||
|  | ||||
|           this._refs[ref] = target; | ||||
| @@ -594,7 +598,8 @@ export class SchemaConverter { | ||||
|   } | ||||
|  | ||||
|   _resolveRef(ref) { | ||||
|     let refName = ref.split('/').pop(); | ||||
|     let refFragment = ref.split('#').pop(); | ||||
|     let refName = 'ref' + refFragment.replace(/[^a-zA-Z0-9-]+/g, '-'); | ||||
|     if (!(refName in this._rules) && !this._refsBeingResolved.has(ref)) { | ||||
|       this._refsBeingResolved.add(ref); | ||||
|       const resolved = this._refs[ref]; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Aldehir Rojas
					Aldehir Rojas