ggml : implement set_rows with i32 index (#16159)

* implement set_rows with i32 index

* template fix

* test quantized path

warnings--

* Apply suggestions from code review

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>

* forgotten name change

* deduplicate cuda/sycl and test-fix

* indent++

* vulkan: support set_rows with i32 index type (#16162)

* disable i32 index for webgpu for now

---------

Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
Co-authored-by: Jeff Bolz <jbolz@nvidia.com>
This commit is contained in:
Sigbjørn Skjæret
2025-09-22 19:13:00 +02:00
committed by GitHub
parent 432cf4304c
commit 3ecb2f671a
17 changed files with 299 additions and 133 deletions

View File

@@ -2064,20 +2064,22 @@ struct test_get_rows_back : public test_case {
// GGML_OP_SET_ROWS
struct test_set_rows : public test_case {
const ggml_type type;
const ggml_type type_idx;
const std::array<int64_t, 4> ne;
const std::array<int, 2> nr23; // broadcast only dims 2 and 3
const int r; // rows to set
const bool v; // view (non-contiguous src1)
std::string vars() override {
return VARS_TO_STR5(type, ne, nr23, r, v);
return VARS_TO_STR6(type, type_idx, ne, nr23, r, v);
}
test_set_rows(ggml_type type,
ggml_type type_idx,
std::array<int64_t, 4> ne,
std::array<int, 2> nr23,
int r, bool v = false)
: type(type), ne(ne), nr23(nr23), r(r), v(v) {}
: type(type), type_idx(type_idx), ne(ne), nr23(nr23), r(r), v(v) {}
ggml_tensor * build_graph(ggml_context * ctx) override {
ggml_tensor * dst = ggml_new_tensor_4d(ctx, type, ne[0], ne[1], ne[2]*nr23[0], ne[3]*nr23[1]);
@@ -2086,7 +2088,7 @@ struct test_set_rows : public test_case {
ggml_tensor * src = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, ne[0], r, ne[2]*nr23[0], ne[3]*nr23[1]);
ggml_set_name(src, "src");
ggml_tensor * row_idxs = ggml_new_tensor_3d(ctx, GGML_TYPE_I64, r, ne[2], ne[3]);
ggml_tensor * row_idxs = ggml_new_tensor_3d(ctx, type_idx, r, ne[2], ne[3]);
ggml_set_name(row_idxs, "row_idxs");
if (v) {
@@ -2105,7 +2107,7 @@ struct test_set_rows : public test_case {
std::random_device rd;
std::default_random_engine rng(rd());
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
if (t->type == GGML_TYPE_I64) {
if (t->type == GGML_TYPE_I64 || t->type == GGML_TYPE_I32) {
if (ggml_is_view_op(t->op)) {
continue;
}
@@ -2121,7 +2123,16 @@ struct test_set_rows : public test_case {
data.resize(t->ne[0]);
const size_t offs = i1*t->nb[1] + i2*t->nb[2];
ggml_backend_tensor_set(t, data.data(), offs, t->ne[0]*sizeof(int64_t));
if (t->type == GGML_TYPE_I32) {
// TODO: Make a template or something
std::vector<int32_t> data_i32(t->ne[0]);
for (int i = 0; i < t->ne[0]; i++) {
data_i32[i] = static_cast<int32_t>(data[i]);
}
ggml_backend_tensor_set(t, data_i32.data(), offs, t->ne[0]*sizeof(int32_t));
} else {
ggml_backend_tensor_set(t, data.data(), offs, t->ne[0]*sizeof(int64_t));
}
}
}
} else {
@@ -5662,18 +5673,20 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
test_cases.emplace_back(new test_get_rows_back(GGML_TYPE_I32, 256, 5, 4, 1, v));
}
test_cases.emplace_back(new test_set_rows(GGML_TYPE_F32, { 1, 8, 1, 3 }, { 1, 1 }, 2, false));
test_cases.emplace_back(new test_set_rows(GGML_TYPE_F32, GGML_TYPE_I64, { 1, 8, 1, 3 }, { 1, 1 }, 2, false));
test_cases.emplace_back(new test_set_rows(GGML_TYPE_F32, GGML_TYPE_I32, { 1, 8, 1, 3 }, { 1, 1 }, 2, false));
test_cases.emplace_back(new test_set_rows(GGML_TYPE_Q8_0, GGML_TYPE_I32, { 256, 5, 1, 3 }, { 1, 1, }, 1, false));
for (ggml_type type : all_types) {
for (int b : {1, 7}) {
for (bool v : {false, true}) {
test_cases.emplace_back(new test_set_rows(type, { 256, 5, b, 3 }, { 1, 1, }, 1, v));
test_cases.emplace_back(new test_set_rows(type, { 256, 11, 1, b }, { 2, 3, }, 7, v));
test_cases.emplace_back(new test_set_rows(type, GGML_TYPE_I64, { 256, 5, b, 3 }, { 1, 1, }, 1, v));
test_cases.emplace_back(new test_set_rows(type, GGML_TYPE_I64, { 256, 11, 1, b }, { 2, 3, }, 7, v));
test_cases.emplace_back(new test_set_rows(type, { 3*ggml_blck_size(type), 3, b, 1 }, { 2, 3, }, 2, v));
test_cases.emplace_back(new test_set_rows(type, GGML_TYPE_I64, { 3*ggml_blck_size(type), 3, b, 1 }, { 2, 3, }, 2, v));
if (ggml_blck_size(type) == 1) {
test_cases.emplace_back(new test_set_rows(type, { 31, 3, b, 1 }, { 2, 3, }, 2, v));
test_cases.emplace_back(new test_set_rows(type, { 33, 5, 1, b }, { 2, 3, }, 1, v));
test_cases.emplace_back(new test_set_rows(type, GGML_TYPE_I64, { 31, 3, b, 1 }, { 2, 3, }, 2, v));
test_cases.emplace_back(new test_set_rows(type, GGML_TYPE_I64, { 33, 5, 1, b }, { 2, 3, }, 1, v));
}
}
}