mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-28 08:31:25 +00:00
ggml : implement set_rows with i32 index (#16159)
* implement set_rows with i32 index * template fix * test quantized path warnings-- * Apply suggestions from code review Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> * forgotten name change * deduplicate cuda/sycl and test-fix * indent++ * vulkan: support set_rows with i32 index type (#16162) * disable i32 index for webgpu for now --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com> Co-authored-by: Jeff Bolz <jbolz@nvidia.com>
This commit is contained in:
@@ -2064,20 +2064,22 @@ struct test_get_rows_back : public test_case {
|
||||
// GGML_OP_SET_ROWS
|
||||
struct test_set_rows : public test_case {
|
||||
const ggml_type type;
|
||||
const ggml_type type_idx;
|
||||
const std::array<int64_t, 4> ne;
|
||||
const std::array<int, 2> nr23; // broadcast only dims 2 and 3
|
||||
const int r; // rows to set
|
||||
const bool v; // view (non-contiguous src1)
|
||||
|
||||
std::string vars() override {
|
||||
return VARS_TO_STR5(type, ne, nr23, r, v);
|
||||
return VARS_TO_STR6(type, type_idx, ne, nr23, r, v);
|
||||
}
|
||||
|
||||
test_set_rows(ggml_type type,
|
||||
ggml_type type_idx,
|
||||
std::array<int64_t, 4> ne,
|
||||
std::array<int, 2> nr23,
|
||||
int r, bool v = false)
|
||||
: type(type), ne(ne), nr23(nr23), r(r), v(v) {}
|
||||
: type(type), type_idx(type_idx), ne(ne), nr23(nr23), r(r), v(v) {}
|
||||
|
||||
ggml_tensor * build_graph(ggml_context * ctx) override {
|
||||
ggml_tensor * dst = ggml_new_tensor_4d(ctx, type, ne[0], ne[1], ne[2]*nr23[0], ne[3]*nr23[1]);
|
||||
@@ -2086,7 +2088,7 @@ struct test_set_rows : public test_case {
|
||||
ggml_tensor * src = ggml_new_tensor_4d(ctx, GGML_TYPE_F32, ne[0], r, ne[2]*nr23[0], ne[3]*nr23[1]);
|
||||
ggml_set_name(src, "src");
|
||||
|
||||
ggml_tensor * row_idxs = ggml_new_tensor_3d(ctx, GGML_TYPE_I64, r, ne[2], ne[3]);
|
||||
ggml_tensor * row_idxs = ggml_new_tensor_3d(ctx, type_idx, r, ne[2], ne[3]);
|
||||
ggml_set_name(row_idxs, "row_idxs");
|
||||
|
||||
if (v) {
|
||||
@@ -2105,7 +2107,7 @@ struct test_set_rows : public test_case {
|
||||
std::random_device rd;
|
||||
std::default_random_engine rng(rd());
|
||||
for (ggml_tensor * t = ggml_get_first_tensor(ctx); t != NULL; t = ggml_get_next_tensor(ctx, t)) {
|
||||
if (t->type == GGML_TYPE_I64) {
|
||||
if (t->type == GGML_TYPE_I64 || t->type == GGML_TYPE_I32) {
|
||||
if (ggml_is_view_op(t->op)) {
|
||||
continue;
|
||||
}
|
||||
@@ -2121,7 +2123,16 @@ struct test_set_rows : public test_case {
|
||||
data.resize(t->ne[0]);
|
||||
|
||||
const size_t offs = i1*t->nb[1] + i2*t->nb[2];
|
||||
ggml_backend_tensor_set(t, data.data(), offs, t->ne[0]*sizeof(int64_t));
|
||||
if (t->type == GGML_TYPE_I32) {
|
||||
// TODO: Make a template or something
|
||||
std::vector<int32_t> data_i32(t->ne[0]);
|
||||
for (int i = 0; i < t->ne[0]; i++) {
|
||||
data_i32[i] = static_cast<int32_t>(data[i]);
|
||||
}
|
||||
ggml_backend_tensor_set(t, data_i32.data(), offs, t->ne[0]*sizeof(int32_t));
|
||||
} else {
|
||||
ggml_backend_tensor_set(t, data.data(), offs, t->ne[0]*sizeof(int64_t));
|
||||
}
|
||||
}
|
||||
}
|
||||
} else {
|
||||
@@ -5662,18 +5673,20 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
||||
test_cases.emplace_back(new test_get_rows_back(GGML_TYPE_I32, 256, 5, 4, 1, v));
|
||||
}
|
||||
|
||||
test_cases.emplace_back(new test_set_rows(GGML_TYPE_F32, { 1, 8, 1, 3 }, { 1, 1 }, 2, false));
|
||||
test_cases.emplace_back(new test_set_rows(GGML_TYPE_F32, GGML_TYPE_I64, { 1, 8, 1, 3 }, { 1, 1 }, 2, false));
|
||||
test_cases.emplace_back(new test_set_rows(GGML_TYPE_F32, GGML_TYPE_I32, { 1, 8, 1, 3 }, { 1, 1 }, 2, false));
|
||||
test_cases.emplace_back(new test_set_rows(GGML_TYPE_Q8_0, GGML_TYPE_I32, { 256, 5, 1, 3 }, { 1, 1, }, 1, false));
|
||||
for (ggml_type type : all_types) {
|
||||
for (int b : {1, 7}) {
|
||||
for (bool v : {false, true}) {
|
||||
test_cases.emplace_back(new test_set_rows(type, { 256, 5, b, 3 }, { 1, 1, }, 1, v));
|
||||
test_cases.emplace_back(new test_set_rows(type, { 256, 11, 1, b }, { 2, 3, }, 7, v));
|
||||
test_cases.emplace_back(new test_set_rows(type, GGML_TYPE_I64, { 256, 5, b, 3 }, { 1, 1, }, 1, v));
|
||||
test_cases.emplace_back(new test_set_rows(type, GGML_TYPE_I64, { 256, 11, 1, b }, { 2, 3, }, 7, v));
|
||||
|
||||
test_cases.emplace_back(new test_set_rows(type, { 3*ggml_blck_size(type), 3, b, 1 }, { 2, 3, }, 2, v));
|
||||
test_cases.emplace_back(new test_set_rows(type, GGML_TYPE_I64, { 3*ggml_blck_size(type), 3, b, 1 }, { 2, 3, }, 2, v));
|
||||
|
||||
if (ggml_blck_size(type) == 1) {
|
||||
test_cases.emplace_back(new test_set_rows(type, { 31, 3, b, 1 }, { 2, 3, }, 2, v));
|
||||
test_cases.emplace_back(new test_set_rows(type, { 33, 5, 1, b }, { 2, 3, }, 1, v));
|
||||
test_cases.emplace_back(new test_set_rows(type, GGML_TYPE_I64, { 31, 3, b, 1 }, { 2, 3, }, 2, v));
|
||||
test_cases.emplace_back(new test_set_rows(type, GGML_TYPE_I64, { 33, 5, 1, b }, { 2, 3, }, 1, v));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user