mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-27 08:21:30 +00:00
OpenCL: add fused group_norm/norm, mul, add (#15314)
* add fused group_norm/norm, mul, add * fix spacing * revert rms_norm logic * fix trailing whitespace
This commit is contained in:
@@ -2789,6 +2789,49 @@ struct test_norm : public test_case {
|
||||
}
|
||||
};
|
||||
|
||||
// GGML_OP_NORM + GGML_OP_MUL + GGML_OP_ADD
|
||||
struct test_norm_mul_add : public test_case {
|
||||
const ggml_type type;
|
||||
const std::array<int64_t, 4> ne;
|
||||
float eps;
|
||||
const bool broadcast;
|
||||
|
||||
std::string op_desc(ggml_tensor * t) override {
|
||||
GGML_UNUSED(t);
|
||||
return "NORM_MUL_ADD";
|
||||
}
|
||||
|
||||
bool run_whole_graph() override { return true; }
|
||||
|
||||
std::string vars() override {
|
||||
return VARS_TO_STR4(type, ne, eps, broadcast);
|
||||
}
|
||||
|
||||
test_norm_mul_add(ggml_type type = GGML_TYPE_F32,
|
||||
std::array<int64_t, 4> ne = {128, 2, 1, 1},
|
||||
float eps = 1e-5f,
|
||||
bool broadcast = false)
|
||||
: type(type), ne(ne), eps(eps), broadcast(broadcast) {}
|
||||
|
||||
ggml_tensor * build_graph(ggml_context * ctx) override {
|
||||
std::array<int64_t, 4> broadcast_dims = {ne[0], ne[1] * 2, ne[2] * 2, ne[3] * 2};
|
||||
|
||||
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, broadcast ? broadcast_dims.data() : ne.data());
|
||||
ggml_tensor * w = ggml_new_tensor(ctx, type, 4, ne.data());
|
||||
ggml_tensor * b = ggml_new_tensor(ctx, type, 4, ne.data());
|
||||
ggml_set_param(a); ggml_set_param(w); ggml_set_param(b);
|
||||
ggml_set_name(a, "a"); ggml_set_name(w, "w"); ggml_set_name(b, "b");
|
||||
|
||||
// Use a, w and b early to avoid OP_NONE in graph
|
||||
a = ggml_add(ctx, ggml_add(ctx, a, w), b);
|
||||
|
||||
ggml_tensor * n = ggml_norm(ctx, a, eps);
|
||||
ggml_tensor * m = ggml_mul(ctx, n, w);
|
||||
ggml_tensor * out = ggml_add(ctx, m, b);
|
||||
ggml_set_name(out, "out");
|
||||
return out;
|
||||
}
|
||||
};
|
||||
// GGML_OP_RMS_NORM
|
||||
struct test_rms_norm : public test_case {
|
||||
const ggml_type type;
|
||||
@@ -4475,6 +4518,44 @@ struct test_group_norm : public test_case {
|
||||
}
|
||||
};
|
||||
|
||||
// GGML_OP_GROUP_NORM + GGML_OP_MUL + GGML_OP_ADD
|
||||
struct test_group_norm_mul_add : public test_case {
|
||||
const ggml_type type;
|
||||
const std::array<int64_t, 4> ne;
|
||||
int num_groups;
|
||||
float eps;
|
||||
|
||||
std::string op_desc(ggml_tensor * t) override {
|
||||
GGML_UNUSED(t);
|
||||
return "GROUP_NORM_MUL_ADD";
|
||||
}
|
||||
|
||||
bool run_whole_graph() override { return true; }
|
||||
|
||||
std::string vars() override {
|
||||
return VARS_TO_STR4(type, ne, num_groups, eps);
|
||||
}
|
||||
|
||||
test_group_norm_mul_add(ggml_type type = GGML_TYPE_F32,
|
||||
std::array<int64_t, 4> ne = {128, 1, 1, 1},
|
||||
int num_groups = 4,
|
||||
float eps = 1e-5f)
|
||||
: type(type), ne(ne), num_groups(num_groups), eps(eps) {}
|
||||
|
||||
ggml_tensor * build_graph(ggml_context * ctx) override {
|
||||
ggml_tensor * a = ggml_new_tensor(ctx, type, 4, ne.data());
|
||||
ggml_tensor * w = ggml_new_tensor(ctx, type, 4, ne.data());
|
||||
ggml_tensor * b = ggml_new_tensor(ctx, type, 4, ne.data());
|
||||
ggml_set_param(a); ggml_set_param(w); ggml_set_param(b);
|
||||
ggml_set_name(a, "a"); ggml_set_name(w, "w"); ggml_set_name(b, "b");
|
||||
ggml_tensor * n = ggml_group_norm(ctx, a, num_groups, eps);
|
||||
ggml_tensor * m = ggml_mul(ctx, n, w);
|
||||
ggml_tensor * out = ggml_add(ctx, m, b);
|
||||
ggml_set_name(out, "out");
|
||||
return out;
|
||||
}
|
||||
};
|
||||
|
||||
// GGML_OP_L2_NORM
|
||||
struct test_l2_norm : public test_case {
|
||||
const ggml_type type;
|
||||
@@ -5865,6 +5946,8 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
||||
for (float eps : {0.0f, 1e-6f, 1e-4f, 1e-1f, 1.0f}) {
|
||||
test_cases.emplace_back(new test_rms_norm_mul_add(GGML_TYPE_F32, {64, 5, 4, 3}, eps));
|
||||
test_cases.emplace_back(new test_rms_norm_mul_add(GGML_TYPE_F32, {64, 5, 4, 3}, eps, true));
|
||||
test_cases.emplace_back(new test_norm_mul_add(GGML_TYPE_F32, {64, 5, 4, 3}, eps, false));
|
||||
test_cases.emplace_back(new test_norm_mul_add(GGML_TYPE_F32, {64, 5, 4, 3}, eps, true));
|
||||
}
|
||||
for (uint32_t n : {1, 511, 1025, 8192, 33*512}) {
|
||||
for (bool multi_add : {false, true}) {
|
||||
@@ -6253,6 +6336,8 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
||||
test_cases.emplace_back(new test_mean(GGML_TYPE_F32, { 32769, 1, 1, 1 }));
|
||||
test_cases.emplace_back(new test_group_norm(GGML_TYPE_F32, {64, 64, 320, 1}));
|
||||
test_cases.emplace_back(new test_group_norm(GGML_TYPE_F32, {9, 9, 1280, 1}));
|
||||
test_cases.emplace_back(new test_group_norm_mul_add(GGML_TYPE_F32, {64, 64, 320, 1}));
|
||||
test_cases.emplace_back(new test_group_norm_mul_add(GGML_TYPE_F32, {9, 9, 1280, 1}));
|
||||
test_cases.emplace_back(new test_acc());
|
||||
test_cases.emplace_back(new test_pad());
|
||||
test_cases.emplace_back(new test_pad_reflect_1d());
|
||||
|
||||
Reference in New Issue
Block a user