mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-04 09:32:00 +00:00 
			
		
		
		
	grammar : support array references in json schema (#16792)
* grammar : support array references in json schema * Update json-schema-to-grammar.cpp Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * grammar : improve regex when naming ref derived rules * grammar : replace non-conformant definitions array with anyOf test case --------- Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
This commit is contained in:
		@@ -601,7 +601,10 @@ private:
 | 
				
			|||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    std::string _resolve_ref(const std::string & ref) {
 | 
					    std::string _resolve_ref(const std::string & ref) {
 | 
				
			||||||
        std::string ref_name = ref.substr(ref.find_last_of('/') + 1);
 | 
					        auto it = ref.find('#');
 | 
				
			||||||
 | 
					        std::string ref_fragment = it != std::string::npos ? ref.substr(it + 1) : ref;
 | 
				
			||||||
 | 
					        static const std::regex nonalphanumeric_regex(R"([^a-zA-Z0-9-]+)");
 | 
				
			||||||
 | 
					        std::string ref_name = "ref" + std::regex_replace(ref_fragment, nonalphanumeric_regex, "-");
 | 
				
			||||||
        if (_rules.find(ref_name) == _rules.end() && _refs_being_resolved.find(ref) == _refs_being_resolved.end()) {
 | 
					        if (_rules.find(ref_name) == _rules.end() && _refs_being_resolved.find(ref) == _refs_being_resolved.end()) {
 | 
				
			||||||
            _refs_being_resolved.insert(ref);
 | 
					            _refs_being_resolved.insert(ref);
 | 
				
			||||||
            json resolved = _refs[ref];
 | 
					            json resolved = _refs[ref];
 | 
				
			||||||
@@ -774,11 +777,24 @@ public:
 | 
				
			|||||||
                        std::vector<std::string> tokens = string_split(pointer, "/");
 | 
					                        std::vector<std::string> tokens = string_split(pointer, "/");
 | 
				
			||||||
                        for (size_t i = 1; i < tokens.size(); ++i) {
 | 
					                        for (size_t i = 1; i < tokens.size(); ++i) {
 | 
				
			||||||
                            std::string sel = tokens[i];
 | 
					                            std::string sel = tokens[i];
 | 
				
			||||||
                            if (target.is_null() || !target.contains(sel)) {
 | 
					                            if (target.is_object() && target.contains(sel)) {
 | 
				
			||||||
 | 
					                                target = target[sel];
 | 
				
			||||||
 | 
					                            } else if (target.is_array()) {
 | 
				
			||||||
 | 
					                                size_t sel_index;
 | 
				
			||||||
 | 
					                                try {
 | 
				
			||||||
 | 
					                                    sel_index = std::stoul(sel);
 | 
				
			||||||
 | 
					                                } catch (const std::invalid_argument & e) {
 | 
				
			||||||
 | 
					                                    sel_index = target.size();
 | 
				
			||||||
 | 
					                                }
 | 
				
			||||||
 | 
					                                if (sel_index >= target.size()) {
 | 
				
			||||||
 | 
					                                    _errors.push_back("Error resolving ref " + ref + ": " + sel + " not in " + target.dump());
 | 
				
			||||||
 | 
					                                    return;
 | 
				
			||||||
 | 
					                                }
 | 
				
			||||||
 | 
					                                target = target[sel_index];
 | 
				
			||||||
 | 
					                            } else {
 | 
				
			||||||
                                _errors.push_back("Error resolving ref " + ref + ": " + sel + " not in " + target.dump());
 | 
					                                _errors.push_back("Error resolving ref " + ref + ": " + sel + " not in " + target.dump());
 | 
				
			||||||
                                return;
 | 
					                                return;
 | 
				
			||||||
                            }
 | 
					                            }
 | 
				
			||||||
                            target = target[sel];
 | 
					 | 
				
			||||||
                        }
 | 
					                        }
 | 
				
			||||||
                        _refs[ref] = target;
 | 
					                        _refs[ref] = target;
 | 
				
			||||||
                    }
 | 
					                    }
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -371,8 +371,17 @@ class SchemaConverter:
 | 
				
			|||||||
                        raise ValueError(f'Unsupported ref {ref}')
 | 
					                        raise ValueError(f'Unsupported ref {ref}')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                    for sel in ref.split('#')[-1].split('/')[1:]:
 | 
					                    for sel in ref.split('#')[-1].split('/')[1:]:
 | 
				
			||||||
                        assert target is not None and sel in target, f'Error resolving ref {ref}: {sel} not in {target}'
 | 
					                        assert target is not None, f'Error resolving ref {ref}: {sel} not in {target}'
 | 
				
			||||||
                        target = target[sel]
 | 
					                        if isinstance(target, list):
 | 
				
			||||||
 | 
					                            try:
 | 
				
			||||||
 | 
					                                sel_index = int(sel)
 | 
				
			||||||
 | 
					                            except ValueError:
 | 
				
			||||||
 | 
					                                raise ValueError(f'Error resolving ref {ref}: {sel} not in {target}')
 | 
				
			||||||
 | 
					                            assert 0 <= sel_index < len(target), f'Error resolving ref {ref}: {sel} not in {target}'
 | 
				
			||||||
 | 
					                            target = target[sel_index]
 | 
				
			||||||
 | 
					                        else:
 | 
				
			||||||
 | 
					                            assert sel in target, f'Error resolving ref {ref}: {sel} not in {target}'
 | 
				
			||||||
 | 
					                            target = target[sel]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                    self._refs[ref] = target
 | 
					                    self._refs[ref] = target
 | 
				
			||||||
                else:
 | 
					                else:
 | 
				
			||||||
@@ -547,7 +556,8 @@ class SchemaConverter:
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    def _resolve_ref(self, ref):
 | 
					    def _resolve_ref(self, ref):
 | 
				
			||||||
        ref_name = ref.split('/')[-1]
 | 
					        ref_fragment = ref.split('#')[-1]
 | 
				
			||||||
 | 
					        ref_name = 'ref' + re.sub(r'[^a-zA-Z0-9-]+', '-', ref_fragment)
 | 
				
			||||||
        if ref_name not in self._rules and ref not in self._refs_being_resolved:
 | 
					        if ref_name not in self._rules and ref not in self._refs_being_resolved:
 | 
				
			||||||
            self._refs_being_resolved.add(ref)
 | 
					            self._refs_being_resolved.add(ref)
 | 
				
			||||||
            resolved = self._refs[ref]
 | 
					            resolved = self._refs[ref]
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -1124,9 +1124,9 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
 | 
				
			|||||||
        })""",
 | 
					        })""",
 | 
				
			||||||
        R"""(
 | 
					        R"""(
 | 
				
			||||||
            char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
 | 
					            char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
 | 
				
			||||||
            foo ::= "{" space foo-a-kv "}" space
 | 
					            ref-definitions-foo ::= "{" space ref-definitions-foo-a-kv "}" space
 | 
				
			||||||
            foo-a-kv ::= "\"a\"" space ":" space string
 | 
					            ref-definitions-foo-a-kv ::= "\"a\"" space ":" space string
 | 
				
			||||||
            root ::= foo
 | 
					            root ::= ref-definitions-foo
 | 
				
			||||||
            space ::= | " " | "\n"{1,2} [ \t]{0,20}
 | 
					            space ::= | " " | "\n"{1,2} [ \t]{0,20}
 | 
				
			||||||
            string ::= "\"" char* "\"" space
 | 
					            string ::= "\"" char* "\"" space
 | 
				
			||||||
        )"""
 | 
					        )"""
 | 
				
			||||||
@@ -1151,20 +1151,58 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
 | 
				
			|||||||
            "type": "object"
 | 
					            "type": "object"
 | 
				
			||||||
        })""",
 | 
					        })""",
 | 
				
			||||||
        R"""(
 | 
					        R"""(
 | 
				
			||||||
            alternative-0 ::= foo
 | 
					            alternative-0 ::= ref-definitions-foo
 | 
				
			||||||
            alternative-1 ::= bar
 | 
					            alternative-1 ::= ref-definitions-bar
 | 
				
			||||||
            bar ::= "{" space  (bar-b-kv )? "}" space
 | 
					 | 
				
			||||||
            bar-b-kv ::= "\"b\"" space ":" space number
 | 
					 | 
				
			||||||
            decimal-part ::= [0-9]{1,16}
 | 
					            decimal-part ::= [0-9]{1,16}
 | 
				
			||||||
            foo ::= "{" space  (foo-a-kv )? "}" space
 | 
					 | 
				
			||||||
            foo-a-kv ::= "\"a\"" space ":" space number
 | 
					 | 
				
			||||||
            integral-part ::= [0] | [1-9] [0-9]{0,15}
 | 
					            integral-part ::= [0] | [1-9] [0-9]{0,15}
 | 
				
			||||||
            number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
 | 
					            number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
 | 
				
			||||||
 | 
					            ref-definitions-bar ::= "{" space  (ref-definitions-bar-b-kv )? "}" space
 | 
				
			||||||
 | 
					            ref-definitions-bar-b-kv ::= "\"b\"" space ":" space number
 | 
				
			||||||
 | 
					            ref-definitions-foo ::= "{" space  (ref-definitions-foo-a-kv )? "}" space
 | 
				
			||||||
 | 
					            ref-definitions-foo-a-kv ::= "\"a\"" space ":" space number
 | 
				
			||||||
            root ::= alternative-0 | alternative-1
 | 
					            root ::= alternative-0 | alternative-1
 | 
				
			||||||
            space ::= | " " | "\n"{1,2} [ \t]{0,20}
 | 
					            space ::= | " " | "\n"{1,2} [ \t]{0,20}
 | 
				
			||||||
        )"""
 | 
					        )"""
 | 
				
			||||||
    });
 | 
					    });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    test({
 | 
				
			||||||
 | 
					        SUCCESS,
 | 
				
			||||||
 | 
					        "anyOf $ref",
 | 
				
			||||||
 | 
					        R"""({
 | 
				
			||||||
 | 
					            "properties": {
 | 
				
			||||||
 | 
					                "a": {
 | 
				
			||||||
 | 
					                    "anyOf": [
 | 
				
			||||||
 | 
					                        {"type": "string"},
 | 
				
			||||||
 | 
					                        {"type": "number"}
 | 
				
			||||||
 | 
					                    ]
 | 
				
			||||||
 | 
					                },
 | 
				
			||||||
 | 
					                "b": {
 | 
				
			||||||
 | 
					                    "anyOf": [
 | 
				
			||||||
 | 
					                        {"$ref": "#/properties/a/anyOf/0"},
 | 
				
			||||||
 | 
					                        {"type": "boolean"}
 | 
				
			||||||
 | 
					                    ]
 | 
				
			||||||
 | 
					                }
 | 
				
			||||||
 | 
					            },
 | 
				
			||||||
 | 
					            "type": "object"
 | 
				
			||||||
 | 
					        })""",
 | 
				
			||||||
 | 
					        R"""(
 | 
				
			||||||
 | 
					            a ::= string | number
 | 
				
			||||||
 | 
					            a-kv ::= "\"a\"" space ":" space a
 | 
				
			||||||
 | 
					            a-rest ::= ( "," space b-kv )?
 | 
				
			||||||
 | 
					            b ::= b-0 | boolean
 | 
				
			||||||
 | 
					            b-0 ::= string
 | 
				
			||||||
 | 
					            b-kv ::= "\"b\"" space ":" space b
 | 
				
			||||||
 | 
					            boolean ::= ("true" | "false") space
 | 
				
			||||||
 | 
					            char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
 | 
				
			||||||
 | 
					            decimal-part ::= [0-9]{1,16}
 | 
				
			||||||
 | 
					            integral-part ::= [0] | [1-9] [0-9]{0,15}
 | 
				
			||||||
 | 
					            number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
 | 
				
			||||||
 | 
					            root ::= "{" space  (a-kv a-rest | b-kv )? "}" space
 | 
				
			||||||
 | 
					            space ::= | " " | "\n"{1,2} [ \t]{0,20}
 | 
				
			||||||
 | 
					            string ::= "\"" char* "\"" space
 | 
				
			||||||
 | 
					        )"""
 | 
				
			||||||
 | 
					    });
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    test({
 | 
					    test({
 | 
				
			||||||
        SUCCESS,
 | 
					        SUCCESS,
 | 
				
			||||||
        "mix of allOf, anyOf and $ref (similar to https://json.schemastore.org/tsconfig.json)",
 | 
					        "mix of allOf, anyOf and $ref (similar to https://json.schemastore.org/tsconfig.json)",
 | 
				
			||||||
 
 | 
				
			|||||||
@@ -345,10 +345,14 @@ export class SchemaConverter {
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
          const selectors = ref.split('#')[1].split('/').slice(1);
 | 
					          const selectors = ref.split('#')[1].split('/').slice(1);
 | 
				
			||||||
          for (const sel of selectors) {
 | 
					          for (const sel of selectors) {
 | 
				
			||||||
            if (!target || !(sel in target)) {
 | 
					            const selIndex = parseInt(sel, 10);
 | 
				
			||||||
 | 
					            if (target && sel in target) {
 | 
				
			||||||
 | 
					              target = target[sel];
 | 
				
			||||||
 | 
					            } else if (target && selIndex in target) {
 | 
				
			||||||
 | 
					              target = target[selIndex];
 | 
				
			||||||
 | 
					            } else {
 | 
				
			||||||
              throw new Error(`Error resolving ref ${ref}: ${sel} not in ${JSON.stringify(target)}`);
 | 
					              throw new Error(`Error resolving ref ${ref}: ${sel} not in ${JSON.stringify(target)}`);
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
            target = target[sel];
 | 
					 | 
				
			||||||
          }
 | 
					          }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
          this._refs[ref] = target;
 | 
					          this._refs[ref] = target;
 | 
				
			||||||
@@ -594,7 +598,8 @@ export class SchemaConverter {
 | 
				
			|||||||
  }
 | 
					  }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  _resolveRef(ref) {
 | 
					  _resolveRef(ref) {
 | 
				
			||||||
    let refName = ref.split('/').pop();
 | 
					    let refFragment = ref.split('#').pop();
 | 
				
			||||||
 | 
					    let refName = 'ref' + refFragment.replace(/[^a-zA-Z0-9-]+/g, '-');
 | 
				
			||||||
    if (!(refName in this._rules) && !this._refsBeingResolved.has(ref)) {
 | 
					    if (!(refName in this._rules) && !this._refsBeingResolved.has(ref)) {
 | 
				
			||||||
      this._refsBeingResolved.add(ref);
 | 
					      this._refsBeingResolved.add(ref);
 | 
				
			||||||
      const resolved = this._refs[ref];
 | 
					      const resolved = this._refs[ref];
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user