mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-27 08:21:30 +00:00
grammar : use int64_t to avoid int overflows in int schema to grammar conversion logic (#16626)
This commit is contained in:
@@ -41,9 +41,9 @@ static std::string build_repetition(const std::string & item_rule, int min_items
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
|
|
||||||
static void _build_min_max_int(int min_value, int max_value, std::stringstream & out, int decimals_left = 16, bool top_level = true) {
|
static void _build_min_max_int(int64_t min_value, int64_t max_value, std::stringstream & out, int decimals_left = 16, bool top_level = true) {
|
||||||
auto has_min = min_value != std::numeric_limits<int>::min();
|
auto has_min = min_value != std::numeric_limits<int64_t>::min();
|
||||||
auto has_max = max_value != std::numeric_limits<int>::max();
|
auto has_max = max_value != std::numeric_limits<int64_t>::max();
|
||||||
|
|
||||||
auto digit_range = [&](char from, char to) {
|
auto digit_range = [&](char from, char to) {
|
||||||
out << "[";
|
out << "[";
|
||||||
@@ -159,7 +159,7 @@ static void _build_min_max_int(int min_value, int max_value, std::stringstream &
|
|||||||
if (has_min) {
|
if (has_min) {
|
||||||
if (min_value < 0) {
|
if (min_value < 0) {
|
||||||
out << "\"-\" (";
|
out << "\"-\" (";
|
||||||
_build_min_max_int(std::numeric_limits<int>::min(), -min_value, out, decimals_left, /* top_level= */ false);
|
_build_min_max_int(std::numeric_limits<int64_t>::min(), -min_value, out, decimals_left, /* top_level= */ false);
|
||||||
out << ") | [0] | [1-9] ";
|
out << ") | [0] | [1-9] ";
|
||||||
more_digits(0, decimals_left - 1);
|
more_digits(0, decimals_left - 1);
|
||||||
} else if (min_value == 0) {
|
} else if (min_value == 0) {
|
||||||
@@ -194,7 +194,7 @@ static void _build_min_max_int(int min_value, int max_value, std::stringstream &
|
|||||||
}
|
}
|
||||||
digit_range(c, c);
|
digit_range(c, c);
|
||||||
out << " (";
|
out << " (";
|
||||||
_build_min_max_int(std::stoi(min_s.substr(1)), std::numeric_limits<int>::max(), out, less_decimals, /* top_level= */ false);
|
_build_min_max_int(std::stoll(min_s.substr(1)), std::numeric_limits<int64_t>::max(), out, less_decimals, /* top_level= */ false);
|
||||||
out << ")";
|
out << ")";
|
||||||
if (c < '9') {
|
if (c < '9') {
|
||||||
out << " | ";
|
out << " | ";
|
||||||
@@ -216,7 +216,7 @@ static void _build_min_max_int(int min_value, int max_value, std::stringstream &
|
|||||||
_build_min_max_int(0, max_value, out, decimals_left, /* top_level= */ true);
|
_build_min_max_int(0, max_value, out, decimals_left, /* top_level= */ true);
|
||||||
} else {
|
} else {
|
||||||
out << "\"-\" (";
|
out << "\"-\" (";
|
||||||
_build_min_max_int(-max_value, std::numeric_limits<int>::max(), out, decimals_left, /* top_level= */ false);
|
_build_min_max_int(-max_value, std::numeric_limits<int64_t>::max(), out, decimals_left, /* top_level= */ false);
|
||||||
out << ")";
|
out << ")";
|
||||||
}
|
}
|
||||||
return;
|
return;
|
||||||
@@ -925,17 +925,17 @@ public:
|
|||||||
int max_len = schema.contains("maxLength") ? schema["maxLength"].get<int>() : std::numeric_limits<int>::max();
|
int max_len = schema.contains("maxLength") ? schema["maxLength"].get<int>() : std::numeric_limits<int>::max();
|
||||||
return _add_rule(rule_name, "\"\\\"\" " + build_repetition(char_rule, min_len, max_len) + " \"\\\"\" space");
|
return _add_rule(rule_name, "\"\\\"\" " + build_repetition(char_rule, min_len, max_len) + " \"\\\"\" space");
|
||||||
} else if (schema_type == "integer" && (schema.contains("minimum") || schema.contains("exclusiveMinimum") || schema.contains("maximum") || schema.contains("exclusiveMaximum"))) {
|
} else if (schema_type == "integer" && (schema.contains("minimum") || schema.contains("exclusiveMinimum") || schema.contains("maximum") || schema.contains("exclusiveMaximum"))) {
|
||||||
int min_value = std::numeric_limits<int>::min();
|
int64_t min_value = std::numeric_limits<int64_t>::min();
|
||||||
int max_value = std::numeric_limits<int>::max();
|
int64_t max_value = std::numeric_limits<int64_t>::max();
|
||||||
if (schema.contains("minimum")) {
|
if (schema.contains("minimum")) {
|
||||||
min_value = schema["minimum"].get<int>();
|
min_value = schema["minimum"].get<int64_t>();
|
||||||
} else if (schema.contains("exclusiveMinimum")) {
|
} else if (schema.contains("exclusiveMinimum")) {
|
||||||
min_value = schema["exclusiveMinimum"].get<int>() + 1;
|
min_value = schema["exclusiveMinimum"].get<int64_t>() + 1;
|
||||||
}
|
}
|
||||||
if (schema.contains("maximum")) {
|
if (schema.contains("maximum")) {
|
||||||
max_value = schema["maximum"].get<int>();
|
max_value = schema["maximum"].get<int64_t>();
|
||||||
} else if (schema.contains("exclusiveMaximum")) {
|
} else if (schema.contains("exclusiveMaximum")) {
|
||||||
max_value = schema["exclusiveMaximum"].get<int>() - 1;
|
max_value = schema["exclusiveMaximum"].get<int64_t>() - 1;
|
||||||
}
|
}
|
||||||
std::stringstream out;
|
std::stringstream out;
|
||||||
out << "(";
|
out << "(";
|
||||||
|
|||||||
@@ -301,6 +301,30 @@ static void test_simple_grammar() {
|
|||||||
"0123",
|
"0123",
|
||||||
}
|
}
|
||||||
);
|
);
|
||||||
|
test_schema(
|
||||||
|
"min 1 max 900719925474091",
|
||||||
|
// Schema
|
||||||
|
R"""({
|
||||||
|
"type": "integer",
|
||||||
|
"exclusiveMinimum": 0,
|
||||||
|
"maximum": 900719925474091
|
||||||
|
})""",
|
||||||
|
// Passing strings
|
||||||
|
{
|
||||||
|
"1",
|
||||||
|
"2",
|
||||||
|
"10",
|
||||||
|
"900719925474090",
|
||||||
|
"900719925474091",
|
||||||
|
},
|
||||||
|
// Failing strings
|
||||||
|
{
|
||||||
|
"0",
|
||||||
|
"01",
|
||||||
|
"900719925474092",
|
||||||
|
"9007199254740910",
|
||||||
|
}
|
||||||
|
);
|
||||||
test_schema(
|
test_schema(
|
||||||
"min -1 max 1",
|
"min -1 max 1",
|
||||||
R"""({
|
R"""({
|
||||||
|
|||||||
Reference in New Issue
Block a user