mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-21 12:16:57 +00:00
Fix too relaxed check on CUDA "fast copy" (can_be_transposed) condition (#17332)
* Fix too relaxed check on CUDA "fast copy" (can_be_transposed) condition * Argh. * Making CISC happy ;) * Integrate CONT tests * Use loopy loop * Skip new tests for (B)F16 for now.
This commit is contained in:
committed by
GitHub
parent
980b7cd17e
commit
6fd4f95367
@@ -2776,24 +2776,34 @@ struct test_cpy : public test_case {
|
||||
struct test_cont : public test_case {
|
||||
const ggml_type type;
|
||||
const std::array<int64_t, 4> ne;
|
||||
bool use_view_slice;
|
||||
|
||||
std::string vars() override {
|
||||
return VARS_TO_STR2(type, ne);
|
||||
return VARS_TO_STR3(type, ne, use_view_slice);
|
||||
}
|
||||
|
||||
test_cont(ggml_type type = GGML_TYPE_F32,
|
||||
std::array<int64_t, 4> ne = {10, 10, 10, 1})
|
||||
: type(type), ne(ne) {}
|
||||
std::array<int64_t, 4> ne = {10, 10, 10, 1},
|
||||
bool use_view_slice = false)
|
||||
: type(type), ne(ne), use_view_slice(use_view_slice) {}
|
||||
|
||||
ggml_tensor * build_graph(ggml_context * ctx) override {
|
||||
ggml_tensor * src = ggml_new_tensor(ctx, type, 4, ne.data());
|
||||
ggml_set_param(src);
|
||||
ggml_set_name(src, "src");
|
||||
|
||||
src = ggml_transpose(ctx, src);
|
||||
ggml_set_name(src, "src_transposed");
|
||||
|
||||
ggml_tensor * out = ggml_cont(ctx, src);
|
||||
ggml_tensor * dst;
|
||||
if (use_view_slice) {
|
||||
dst = ggml_view_4d(ctx, src, src->ne[0], 1, src->ne[2], src->ne[3],
|
||||
src->nb[1], src->nb[2], src->nb[3], src->nb[0] * (src->ne[1] - 1));
|
||||
ggml_set_name(dst, "src_view_slice");
|
||||
} else {
|
||||
dst = ggml_transpose(ctx, src);
|
||||
ggml_set_name(dst, "src_transposed");
|
||||
}
|
||||
|
||||
ggml_tensor * out = ggml_cont(ctx, dst);
|
||||
ggml_set_name(out, "out");
|
||||
|
||||
return out;
|
||||
@@ -6945,16 +6955,17 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
||||
test_cases.emplace_back(new test_cpy(GGML_TYPE_BF16, GGML_TYPE_BF16, {256, 4, 1, 1}, {0, 0, 0, 0}, {0, 0, 0, 0}, true));
|
||||
test_cases.emplace_back(new test_cpy(GGML_TYPE_F32, GGML_TYPE_F32, {256, 1, 4, 1}, {1, 2, 0, 3}, {0, 0, 0, 0}));
|
||||
|
||||
test_cases.emplace_back(new test_cont());
|
||||
test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 1, 1 ,1}));
|
||||
test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 1, 3 ,5}));
|
||||
test_cases.emplace_back(new test_cont(GGML_TYPE_F32, {2, 3, 5 ,7}));
|
||||
test_cases.emplace_back(new test_cont(GGML_TYPE_F16, {2, 1, 1 ,1}));
|
||||
test_cases.emplace_back(new test_cont(GGML_TYPE_F16, {2, 1, 3 ,5}));
|
||||
test_cases.emplace_back(new test_cont(GGML_TYPE_F16, {2, 3, 5 ,7}));
|
||||
test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {2, 1, 1 ,1}));
|
||||
test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {2, 1, 3 ,5}));
|
||||
test_cases.emplace_back(new test_cont(GGML_TYPE_BF16, {2, 3, 5 ,7}));
|
||||
for (ggml_type type_dst : { GGML_TYPE_F32, GGML_TYPE_F16, GGML_TYPE_BF16 }) {
|
||||
for (bool use_view_slice : { true, false }) {
|
||||
for (std::array<int64_t, 4> ne : std::initializer_list<std::array<int64_t, 4>>{ {2, 1, 1, 1}, {2, 1, 3, 5},
|
||||
{2, 3, 5, 7}, {1, 4, 4, 1}, {1, 8, 17, 1}, {10, 10, 10, 1} }) {
|
||||
if (use_view_slice && (type_dst == GGML_TYPE_F16 || type_dst == GGML_TYPE_BF16)) {
|
||||
continue; // TODO: add after WebGPU is fixed
|
||||
}
|
||||
test_cases.emplace_back(new test_cont(type_dst, ne, use_view_slice));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
auto add_test_bin_bcast = [&](ggml_type type, std::array<int64_t, 4> ne, std::array<int, 4> nr) {
|
||||
for (auto op : {ggml_add, ggml_sub, ggml_mul, ggml_div}) {
|
||||
|
||||
Reference in New Issue
Block a user