ggml-zdnn: activate bias transform in matmul

Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
This commit is contained in:
Aaron Teo
2025-07-28 16:27:35 +08:00
parent f7e8d6f2b2
commit 092fa3a328

View File

@@ -35,9 +35,7 @@ static bool ggml_zdnn_op_mul_mat(struct ggml_backend_zdnn_context * ctx, const g
const ggml_backend_zdnn_buffer * weights_extra = (const ggml_backend_zdnn_buffer *)weights->extra; const ggml_backend_zdnn_buffer * weights_extra = (const ggml_backend_zdnn_buffer *)weights->extra;
const ggml_backend_zdnn_buffer * inputs_extra = (const ggml_backend_zdnn_buffer *)inputs->extra; const ggml_backend_zdnn_buffer * inputs_extra = (const ggml_backend_zdnn_buffer *)inputs->extra;
ggml_backend_zdnn_buffer * output_extra = ( ggml_backend_zdnn_buffer *)output->extra; ggml_backend_zdnn_buffer * output_extra = ( ggml_backend_zdnn_buffer *)output->extra;
ggml_backend_zdnn_buffer * bias_extra = ( ggml_backend_zdnn_buffer *)output_extra->extra;
zdnn_tensor_desc pre_tfm_desc_bias, tfm_desc_bias;
zdnn_ztensor ztensor_bias;
const int64_t weights_rows = ne01; const int64_t weights_rows = ne01;
const int64_t weights_cols = ne00; const int64_t weights_cols = ne00;
@@ -49,27 +47,12 @@ static bool ggml_zdnn_op_mul_mat(struct ggml_backend_zdnn_context * ctx, const g
const int64_t output_rows = ne1; const int64_t output_rows = ne1;
const int64_t output_cols = ne0; const int64_t output_cols = ne0;
const int64_t blas_dim[GGML_MAX_DIMS] = { 1, 1, 1, output_cols }; ZDNN_CHECK(zdnn_transform_ztensor(&bias_extra->ztensor, bias_extra->data));
zdnn_init_pre_transformed_desc(
ZDNN_1D,
FP32,
&pre_tfm_desc_bias,
blas_dim[3], blas_dim[2], blas_dim[1], blas_dim[0]
);
ZDNN_CHECK(zdnn_generate_transformed_desc(&pre_tfm_desc_bias, &tfm_desc_bias));
ZDNN_CHECK(zdnn_init_ztensor_with_malloc(&pre_tfm_desc_bias, &tfm_desc_bias, &ztensor_bias));
void * bias_data = (void *)calloc(ne0, ggml_element_size(output));
ZDNN_CHECK(zdnn_transform_ztensor(&ztensor_bias, bias_data));
std::raise(SIGINT); std::raise(SIGINT);
ZDNN_CHECK(zdnn_matmul_transpose_op(&inputs_extra->ztensor, &weights_extra->ztensor, &ztensor_bias, ZDNN_CHECK(zdnn_matmul_transpose_op(&inputs_extra->ztensor, &weights_extra->ztensor, &bias_extra->ztensor,
false, true, MATMUL_OP_ADDITION, &output_extra->ztensor)); false, true, MATMUL_OP_ADDITION, &output_extra->ztensor));
ZDNN_CHECK(zdnn_transform_ztensor(&output_extra->ztensor, output->data)); ZDNN_CHECK(zdnn_transform_ztensor(&output_extra->ztensor, output->data));
ZDNN_CHECK(zdnn_free_ztensor_buffer(&ztensor_bias));
free(bias_data);
} }
static bool ggml_backend_zdnn_compute_forward(struct ggml_backend_zdnn_context * ctx, struct ggml_tensor * dst) { static bool ggml_backend_zdnn_compute_forward(struct ggml_backend_zdnn_context * ctx, struct ggml_tensor * dst) {