mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-01 09:01:57 +00:00
grammar : support array references in json schema (#16792)
* grammar : support array references in json schema * Update json-schema-to-grammar.cpp Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com> * grammar : improve regex when naming ref derived rules * grammar : replace non-conformant definitions array with anyOf test case --------- Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
This commit is contained in:
@@ -601,7 +601,10 @@ private:
|
|||||||
}
|
}
|
||||||
|
|
||||||
std::string _resolve_ref(const std::string & ref) {
|
std::string _resolve_ref(const std::string & ref) {
|
||||||
std::string ref_name = ref.substr(ref.find_last_of('/') + 1);
|
auto it = ref.find('#');
|
||||||
|
std::string ref_fragment = it != std::string::npos ? ref.substr(it + 1) : ref;
|
||||||
|
static const std::regex nonalphanumeric_regex(R"([^a-zA-Z0-9-]+)");
|
||||||
|
std::string ref_name = "ref" + std::regex_replace(ref_fragment, nonalphanumeric_regex, "-");
|
||||||
if (_rules.find(ref_name) == _rules.end() && _refs_being_resolved.find(ref) == _refs_being_resolved.end()) {
|
if (_rules.find(ref_name) == _rules.end() && _refs_being_resolved.find(ref) == _refs_being_resolved.end()) {
|
||||||
_refs_being_resolved.insert(ref);
|
_refs_being_resolved.insert(ref);
|
||||||
json resolved = _refs[ref];
|
json resolved = _refs[ref];
|
||||||
@@ -774,11 +777,24 @@ public:
|
|||||||
std::vector<std::string> tokens = string_split(pointer, "/");
|
std::vector<std::string> tokens = string_split(pointer, "/");
|
||||||
for (size_t i = 1; i < tokens.size(); ++i) {
|
for (size_t i = 1; i < tokens.size(); ++i) {
|
||||||
std::string sel = tokens[i];
|
std::string sel = tokens[i];
|
||||||
if (target.is_null() || !target.contains(sel)) {
|
if (target.is_object() && target.contains(sel)) {
|
||||||
|
target = target[sel];
|
||||||
|
} else if (target.is_array()) {
|
||||||
|
size_t sel_index;
|
||||||
|
try {
|
||||||
|
sel_index = std::stoul(sel);
|
||||||
|
} catch (const std::invalid_argument & e) {
|
||||||
|
sel_index = target.size();
|
||||||
|
}
|
||||||
|
if (sel_index >= target.size()) {
|
||||||
|
_errors.push_back("Error resolving ref " + ref + ": " + sel + " not in " + target.dump());
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
target = target[sel_index];
|
||||||
|
} else {
|
||||||
_errors.push_back("Error resolving ref " + ref + ": " + sel + " not in " + target.dump());
|
_errors.push_back("Error resolving ref " + ref + ": " + sel + " not in " + target.dump());
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
target = target[sel];
|
|
||||||
}
|
}
|
||||||
_refs[ref] = target;
|
_refs[ref] = target;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -371,7 +371,16 @@ class SchemaConverter:
|
|||||||
raise ValueError(f'Unsupported ref {ref}')
|
raise ValueError(f'Unsupported ref {ref}')
|
||||||
|
|
||||||
for sel in ref.split('#')[-1].split('/')[1:]:
|
for sel in ref.split('#')[-1].split('/')[1:]:
|
||||||
assert target is not None and sel in target, f'Error resolving ref {ref}: {sel} not in {target}'
|
assert target is not None, f'Error resolving ref {ref}: {sel} not in {target}'
|
||||||
|
if isinstance(target, list):
|
||||||
|
try:
|
||||||
|
sel_index = int(sel)
|
||||||
|
except ValueError:
|
||||||
|
raise ValueError(f'Error resolving ref {ref}: {sel} not in {target}')
|
||||||
|
assert 0 <= sel_index < len(target), f'Error resolving ref {ref}: {sel} not in {target}'
|
||||||
|
target = target[sel_index]
|
||||||
|
else:
|
||||||
|
assert sel in target, f'Error resolving ref {ref}: {sel} not in {target}'
|
||||||
target = target[sel]
|
target = target[sel]
|
||||||
|
|
||||||
self._refs[ref] = target
|
self._refs[ref] = target
|
||||||
@@ -547,7 +556,8 @@ class SchemaConverter:
|
|||||||
|
|
||||||
|
|
||||||
def _resolve_ref(self, ref):
|
def _resolve_ref(self, ref):
|
||||||
ref_name = ref.split('/')[-1]
|
ref_fragment = ref.split('#')[-1]
|
||||||
|
ref_name = 'ref' + re.sub(r'[^a-zA-Z0-9-]+', '-', ref_fragment)
|
||||||
if ref_name not in self._rules and ref not in self._refs_being_resolved:
|
if ref_name not in self._rules and ref not in self._refs_being_resolved:
|
||||||
self._refs_being_resolved.add(ref)
|
self._refs_being_resolved.add(ref)
|
||||||
resolved = self._refs[ref]
|
resolved = self._refs[ref]
|
||||||
|
|||||||
@@ -1124,9 +1124,9 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
|||||||
})""",
|
})""",
|
||||||
R"""(
|
R"""(
|
||||||
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
|
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
|
||||||
foo ::= "{" space foo-a-kv "}" space
|
ref-definitions-foo ::= "{" space ref-definitions-foo-a-kv "}" space
|
||||||
foo-a-kv ::= "\"a\"" space ":" space string
|
ref-definitions-foo-a-kv ::= "\"a\"" space ":" space string
|
||||||
root ::= foo
|
root ::= ref-definitions-foo
|
||||||
space ::= | " " | "\n"{1,2} [ \t]{0,20}
|
space ::= | " " | "\n"{1,2} [ \t]{0,20}
|
||||||
string ::= "\"" char* "\"" space
|
string ::= "\"" char* "\"" space
|
||||||
)"""
|
)"""
|
||||||
@@ -1151,20 +1151,58 @@ static void test_all(const std::string & lang, std::function<void(const TestCase
|
|||||||
"type": "object"
|
"type": "object"
|
||||||
})""",
|
})""",
|
||||||
R"""(
|
R"""(
|
||||||
alternative-0 ::= foo
|
alternative-0 ::= ref-definitions-foo
|
||||||
alternative-1 ::= bar
|
alternative-1 ::= ref-definitions-bar
|
||||||
bar ::= "{" space (bar-b-kv )? "}" space
|
|
||||||
bar-b-kv ::= "\"b\"" space ":" space number
|
|
||||||
decimal-part ::= [0-9]{1,16}
|
decimal-part ::= [0-9]{1,16}
|
||||||
foo ::= "{" space (foo-a-kv )? "}" space
|
|
||||||
foo-a-kv ::= "\"a\"" space ":" space number
|
|
||||||
integral-part ::= [0] | [1-9] [0-9]{0,15}
|
integral-part ::= [0] | [1-9] [0-9]{0,15}
|
||||||
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
|
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
|
||||||
|
ref-definitions-bar ::= "{" space (ref-definitions-bar-b-kv )? "}" space
|
||||||
|
ref-definitions-bar-b-kv ::= "\"b\"" space ":" space number
|
||||||
|
ref-definitions-foo ::= "{" space (ref-definitions-foo-a-kv )? "}" space
|
||||||
|
ref-definitions-foo-a-kv ::= "\"a\"" space ":" space number
|
||||||
root ::= alternative-0 | alternative-1
|
root ::= alternative-0 | alternative-1
|
||||||
space ::= | " " | "\n"{1,2} [ \t]{0,20}
|
space ::= | " " | "\n"{1,2} [ \t]{0,20}
|
||||||
)"""
|
)"""
|
||||||
});
|
});
|
||||||
|
|
||||||
|
test({
|
||||||
|
SUCCESS,
|
||||||
|
"anyOf $ref",
|
||||||
|
R"""({
|
||||||
|
"properties": {
|
||||||
|
"a": {
|
||||||
|
"anyOf": [
|
||||||
|
{"type": "string"},
|
||||||
|
{"type": "number"}
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"b": {
|
||||||
|
"anyOf": [
|
||||||
|
{"$ref": "#/properties/a/anyOf/0"},
|
||||||
|
{"type": "boolean"}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"type": "object"
|
||||||
|
})""",
|
||||||
|
R"""(
|
||||||
|
a ::= string | number
|
||||||
|
a-kv ::= "\"a\"" space ":" space a
|
||||||
|
a-rest ::= ( "," space b-kv )?
|
||||||
|
b ::= b-0 | boolean
|
||||||
|
b-0 ::= string
|
||||||
|
b-kv ::= "\"b\"" space ":" space b
|
||||||
|
boolean ::= ("true" | "false") space
|
||||||
|
char ::= [^"\\\x7F\x00-\x1F] | [\\] (["\\bfnrt] | "u" [0-9a-fA-F]{4})
|
||||||
|
decimal-part ::= [0-9]{1,16}
|
||||||
|
integral-part ::= [0] | [1-9] [0-9]{0,15}
|
||||||
|
number ::= ("-"? integral-part) ("." decimal-part)? ([eE] [-+]? integral-part)? space
|
||||||
|
root ::= "{" space (a-kv a-rest | b-kv )? "}" space
|
||||||
|
space ::= | " " | "\n"{1,2} [ \t]{0,20}
|
||||||
|
string ::= "\"" char* "\"" space
|
||||||
|
)"""
|
||||||
|
});
|
||||||
|
|
||||||
test({
|
test({
|
||||||
SUCCESS,
|
SUCCESS,
|
||||||
"mix of allOf, anyOf and $ref (similar to https://json.schemastore.org/tsconfig.json)",
|
"mix of allOf, anyOf and $ref (similar to https://json.schemastore.org/tsconfig.json)",
|
||||||
|
|||||||
@@ -345,10 +345,14 @@ export class SchemaConverter {
|
|||||||
|
|
||||||
const selectors = ref.split('#')[1].split('/').slice(1);
|
const selectors = ref.split('#')[1].split('/').slice(1);
|
||||||
for (const sel of selectors) {
|
for (const sel of selectors) {
|
||||||
if (!target || !(sel in target)) {
|
const selIndex = parseInt(sel, 10);
|
||||||
|
if (target && sel in target) {
|
||||||
|
target = target[sel];
|
||||||
|
} else if (target && selIndex in target) {
|
||||||
|
target = target[selIndex];
|
||||||
|
} else {
|
||||||
throw new Error(`Error resolving ref ${ref}: ${sel} not in ${JSON.stringify(target)}`);
|
throw new Error(`Error resolving ref ${ref}: ${sel} not in ${JSON.stringify(target)}`);
|
||||||
}
|
}
|
||||||
target = target[sel];
|
|
||||||
}
|
}
|
||||||
|
|
||||||
this._refs[ref] = target;
|
this._refs[ref] = target;
|
||||||
@@ -594,7 +598,8 @@ export class SchemaConverter {
|
|||||||
}
|
}
|
||||||
|
|
||||||
_resolveRef(ref) {
|
_resolveRef(ref) {
|
||||||
let refName = ref.split('/').pop();
|
let refFragment = ref.split('#').pop();
|
||||||
|
let refName = 'ref' + refFragment.replace(/[^a-zA-Z0-9-]+/g, '-');
|
||||||
if (!(refName in this._rules) && !this._refsBeingResolved.has(ref)) {
|
if (!(refName in this._rules) && !this._refsBeingResolved.has(ref)) {
|
||||||
this._refsBeingResolved.add(ref);
|
this._refsBeingResolved.add(ref);
|
||||||
const resolved = this._refs[ref];
|
const resolved = this._refs[ref];
|
||||||
|
|||||||
Reference in New Issue
Block a user