From 092fa3a32816d51c0707d4db9bef5391e28ef298 Mon Sep 17 00:00:00 2001 From: Aaron Teo Date: Mon, 28 Jul 2025 16:27:35 +0800 Subject: [PATCH] ggml-zdnn: activate bias transform in matmul Signed-off-by: Aaron Teo --- ggml/src/ggml-zdnn/ggml-zdnn-rewrite.cpp | 23 +++-------------------- 1 file changed, 3 insertions(+), 20 deletions(-) diff --git a/ggml/src/ggml-zdnn/ggml-zdnn-rewrite.cpp b/ggml/src/ggml-zdnn/ggml-zdnn-rewrite.cpp index 236d958ded..390bfe7984 100644 --- a/ggml/src/ggml-zdnn/ggml-zdnn-rewrite.cpp +++ b/ggml/src/ggml-zdnn/ggml-zdnn-rewrite.cpp @@ -35,9 +35,7 @@ static bool ggml_zdnn_op_mul_mat(struct ggml_backend_zdnn_context * ctx, const g const ggml_backend_zdnn_buffer * weights_extra = (const ggml_backend_zdnn_buffer *)weights->extra; const ggml_backend_zdnn_buffer * inputs_extra = (const ggml_backend_zdnn_buffer *)inputs->extra; ggml_backend_zdnn_buffer * output_extra = ( ggml_backend_zdnn_buffer *)output->extra; - - zdnn_tensor_desc pre_tfm_desc_bias, tfm_desc_bias; - zdnn_ztensor ztensor_bias; + ggml_backend_zdnn_buffer * bias_extra = ( ggml_backend_zdnn_buffer *)output_extra->extra; const int64_t weights_rows = ne01; const int64_t weights_cols = ne00; @@ -49,27 +47,12 @@ static bool ggml_zdnn_op_mul_mat(struct ggml_backend_zdnn_context * ctx, const g const int64_t output_rows = ne1; const int64_t output_cols = ne0; - const int64_t blas_dim[GGML_MAX_DIMS] = { 1, 1, 1, output_cols }; - - zdnn_init_pre_transformed_desc( - ZDNN_1D, - FP32, - &pre_tfm_desc_bias, - blas_dim[3], blas_dim[2], blas_dim[1], blas_dim[0] - ); - ZDNN_CHECK(zdnn_generate_transformed_desc(&pre_tfm_desc_bias, &tfm_desc_bias)); - ZDNN_CHECK(zdnn_init_ztensor_with_malloc(&pre_tfm_desc_bias, &tfm_desc_bias, &ztensor_bias)); - - void * bias_data = (void *)calloc(ne0, ggml_element_size(output)); - ZDNN_CHECK(zdnn_transform_ztensor(&ztensor_bias, bias_data)); + ZDNN_CHECK(zdnn_transform_ztensor(&bias_extra->ztensor, bias_extra->data)); std::raise(SIGINT); - ZDNN_CHECK(zdnn_matmul_transpose_op(&inputs_extra->ztensor, &weights_extra->ztensor, &ztensor_bias, + ZDNN_CHECK(zdnn_matmul_transpose_op(&inputs_extra->ztensor, &weights_extra->ztensor, &bias_extra->ztensor, false, true, MATMUL_OP_ADDITION, &output_extra->ztensor)); ZDNN_CHECK(zdnn_transform_ztensor(&output_extra->ztensor, output->data)); - - ZDNN_CHECK(zdnn_free_ztensor_buffer(&ztensor_bias)); - free(bias_data); } static bool ggml_backend_zdnn_compute_forward(struct ggml_backend_zdnn_context * ctx, struct ggml_tensor * dst) {