mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-29 08:41:22 +00:00
vulkan: add exp operation (#15456)
Co-authored-by: aeseulgi <kim2h7903@gmail.com>
This commit is contained in:
@@ -490,6 +490,7 @@ struct vk_device_struct {
|
|||||||
vk_pipeline pipeline_l2_norm_f32;
|
vk_pipeline pipeline_l2_norm_f32;
|
||||||
|
|
||||||
// [src/dst 0=fp32,1=fp16]
|
// [src/dst 0=fp32,1=fp16]
|
||||||
|
vk_pipeline pipeline_exp[2];
|
||||||
vk_pipeline pipeline_gelu[2];
|
vk_pipeline pipeline_gelu[2];
|
||||||
vk_pipeline pipeline_gelu_erf[2];
|
vk_pipeline pipeline_gelu_erf[2];
|
||||||
vk_pipeline pipeline_gelu_quick[2];
|
vk_pipeline pipeline_gelu_quick[2];
|
||||||
@@ -3066,6 +3067,7 @@ static void ggml_vk_load_shaders(vk_device& device) {
|
|||||||
ggml_vk_create_pipeline(device, device->pipeline_ ## name [0], #name "_f32", name ## _f32_len, name ## _f32_data, "main", 2, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); \
|
ggml_vk_create_pipeline(device, device->pipeline_ ## name [0], #name "_f32", name ## _f32_len, name ## _f32_data, "main", 2, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1); \
|
||||||
ggml_vk_create_pipeline(device, device->pipeline_ ## name [1], #name "_f16", name ## _f16_len, name ## _f16_data, "main", 2, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1);
|
ggml_vk_create_pipeline(device, device->pipeline_ ## name [1], #name "_f16", name ## _f16_len, name ## _f16_data, "main", 2, sizeof(vk_op_push_constants), {512, 1, 1}, {}, 1);
|
||||||
|
|
||||||
|
CREATE_UNARY(exp)
|
||||||
CREATE_UNARY(gelu)
|
CREATE_UNARY(gelu)
|
||||||
CREATE_UNARY(gelu_erf)
|
CREATE_UNARY(gelu_erf)
|
||||||
CREATE_UNARY(gelu_quick)
|
CREATE_UNARY(gelu_quick)
|
||||||
@@ -7133,6 +7135,8 @@ static vk_pipeline ggml_vk_op_get_pipeline(ggml_backend_vk_context * ctx, const
|
|||||||
}
|
}
|
||||||
|
|
||||||
switch (ggml_get_unary_op(dst)) {
|
switch (ggml_get_unary_op(dst)) {
|
||||||
|
case GGML_UNARY_OP_EXP:
|
||||||
|
return ctx->device->pipeline_exp[dst->type == GGML_TYPE_F16];
|
||||||
case GGML_UNARY_OP_SILU:
|
case GGML_UNARY_OP_SILU:
|
||||||
return ctx->device->pipeline_silu[dst->type == GGML_TYPE_F16];
|
return ctx->device->pipeline_silu[dst->type == GGML_TYPE_F16];
|
||||||
case GGML_UNARY_OP_GELU:
|
case GGML_UNARY_OP_GELU:
|
||||||
@@ -9738,6 +9742,7 @@ static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_cgraph * cgr
|
|||||||
return false;
|
return false;
|
||||||
case GGML_OP_UNARY:
|
case GGML_OP_UNARY:
|
||||||
switch (ggml_get_unary_op(node)) {
|
switch (ggml_get_unary_op(node)) {
|
||||||
|
case GGML_UNARY_OP_EXP:
|
||||||
case GGML_UNARY_OP_SILU:
|
case GGML_UNARY_OP_SILU:
|
||||||
case GGML_UNARY_OP_GELU:
|
case GGML_UNARY_OP_GELU:
|
||||||
case GGML_UNARY_OP_GELU_ERF:
|
case GGML_UNARY_OP_GELU_ERF:
|
||||||
@@ -10015,6 +10020,7 @@ static bool ggml_vk_build_graph(ggml_backend_vk_context * ctx, ggml_cgraph * cgr
|
|||||||
break;
|
break;
|
||||||
case GGML_OP_UNARY:
|
case GGML_OP_UNARY:
|
||||||
switch (ggml_get_unary_op(node)) {
|
switch (ggml_get_unary_op(node)) {
|
||||||
|
case GGML_UNARY_OP_EXP:
|
||||||
case GGML_UNARY_OP_SILU:
|
case GGML_UNARY_OP_SILU:
|
||||||
case GGML_UNARY_OP_GELU:
|
case GGML_UNARY_OP_GELU:
|
||||||
case GGML_UNARY_OP_GELU_ERF:
|
case GGML_UNARY_OP_GELU_ERF:
|
||||||
@@ -10251,6 +10257,7 @@ static bool ggml_vk_compute_forward(ggml_backend_vk_context * ctx, ggml_cgraph *
|
|||||||
break;
|
break;
|
||||||
case GGML_OP_UNARY:
|
case GGML_OP_UNARY:
|
||||||
switch (ggml_get_unary_op(tensor)) {
|
switch (ggml_get_unary_op(tensor)) {
|
||||||
|
case GGML_UNARY_OP_EXP:
|
||||||
case GGML_UNARY_OP_SILU:
|
case GGML_UNARY_OP_SILU:
|
||||||
case GGML_UNARY_OP_GELU:
|
case GGML_UNARY_OP_GELU:
|
||||||
case GGML_UNARY_OP_GELU_ERF:
|
case GGML_UNARY_OP_GELU_ERF:
|
||||||
@@ -11166,6 +11173,7 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm
|
|||||||
switch (op->op) {
|
switch (op->op) {
|
||||||
case GGML_OP_UNARY:
|
case GGML_OP_UNARY:
|
||||||
switch (ggml_get_unary_op(op)) {
|
switch (ggml_get_unary_op(op)) {
|
||||||
|
case GGML_UNARY_OP_EXP:
|
||||||
case GGML_UNARY_OP_GELU:
|
case GGML_UNARY_OP_GELU:
|
||||||
case GGML_UNARY_OP_GELU_ERF:
|
case GGML_UNARY_OP_GELU_ERF:
|
||||||
case GGML_UNARY_OP_GELU_QUICK:
|
case GGML_UNARY_OP_GELU_QUICK:
|
||||||
@@ -11965,6 +11973,9 @@ static void ggml_vk_check_results_0(ggml_backend_vk_context * ctx, ggml_cgraph *
|
|||||||
}
|
}
|
||||||
} else if (tensor->op == GGML_OP_UNARY) {
|
} else if (tensor->op == GGML_OP_UNARY) {
|
||||||
switch (ggml_get_unary_op(tensor)) {
|
switch (ggml_get_unary_op(tensor)) {
|
||||||
|
case GGML_UNARY_OP_EXP:
|
||||||
|
tensor_clone = ggml_exp(ggml_ctx, src_clone[0]);
|
||||||
|
break;
|
||||||
case GGML_UNARY_OP_SILU:
|
case GGML_UNARY_OP_SILU:
|
||||||
tensor_clone = ggml_silu(ggml_ctx, src_clone[0]);
|
tensor_clone = ggml_silu(ggml_ctx, src_clone[0]);
|
||||||
break;
|
break;
|
||||||
|
|||||||
20
ggml/src/ggml-vulkan/vulkan-shaders/exp.comp
Normal file
20
ggml/src/ggml-vulkan/vulkan-shaders/exp.comp
Normal file
@@ -0,0 +1,20 @@
|
|||||||
|
#version 450
|
||||||
|
|
||||||
|
#include "generic_head.comp"
|
||||||
|
#include "types.comp"
|
||||||
|
|
||||||
|
#extension GL_EXT_control_flow_attributes : enable
|
||||||
|
|
||||||
|
layout(local_size_x = 512, local_size_y = 1, local_size_z = 1) in;
|
||||||
|
|
||||||
|
layout (binding = 0) readonly buffer X {A_TYPE data_a[];};
|
||||||
|
layout (binding = 1) writeonly buffer D {D_TYPE data_d[];};
|
||||||
|
|
||||||
|
void main() {
|
||||||
|
const uint i = gl_GlobalInvocationID.z * 262144 + gl_GlobalInvocationID.y * 512 + gl_GlobalInvocationID.x;
|
||||||
|
|
||||||
|
if (i >= p.KX) {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
data_d[i] = D_TYPE(exp(float(data_a[i])));
|
||||||
|
}
|
||||||
@@ -586,6 +586,8 @@ void process_shaders() {
|
|||||||
|
|
||||||
string_to_spv("upscale_f32", "upscale.comp", {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}});
|
string_to_spv("upscale_f32", "upscale.comp", {{"A_TYPE", "float"}, {"B_TYPE", "float"}, {"D_TYPE", "float"}});
|
||||||
|
|
||||||
|
string_to_spv("exp_f16", "exp.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}});
|
||||||
|
string_to_spv("exp_f32", "exp.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
|
||||||
string_to_spv("gelu_f16", "gelu.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}});
|
string_to_spv("gelu_f16", "gelu.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}});
|
||||||
string_to_spv("gelu_f32", "gelu.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
|
string_to_spv("gelu_f32", "gelu.comp", {{"A_TYPE", "float"}, {"D_TYPE", "float"}});
|
||||||
string_to_spv("gelu_erf_f16", "gelu_erf.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}});
|
string_to_spv("gelu_erf_f16", "gelu_erf.comp", {{"A_TYPE", "float16_t"}, {"D_TYPE", "float16_t"}});
|
||||||
|
|||||||
Reference in New Issue
Block a user