From 03ec5d3ed35dcd2f02aae77f5ace1c8fc20876cd Mon Sep 17 00:00:00 2001 From: Aaron Teo Date: Mon, 28 Jul 2025 18:14:44 +0800 Subject: [PATCH] ggml-zdnn: bring back working matmul Signed-off-by: Aaron Teo --- ggml/src/ggml-zdnn/ggml-zdnn-rewrite.cpp | 120 ++++++++++++----------- 1 file changed, 62 insertions(+), 58 deletions(-) diff --git a/ggml/src/ggml-zdnn/ggml-zdnn-rewrite.cpp b/ggml/src/ggml-zdnn/ggml-zdnn-rewrite.cpp index 51d1833a98..8779535ec7 100644 --- a/ggml/src/ggml-zdnn/ggml-zdnn-rewrite.cpp +++ b/ggml/src/ggml-zdnn/ggml-zdnn-rewrite.cpp @@ -8,7 +8,46 @@ #include #include -static bool ggml_zdnn_op_mul_mat(struct ggml_backend_zdnn_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { +inline zdnn_data_types ggml_zdnn_type_mapping(ggml_type type) { + switch (type) { + case GGML_TYPE_F32: + return FP32; + case GGML_TYPE_F16: + return FP16; + case GGML_TYPE_BF16: + return BFLOAT; + case GGML_TYPE_I8: + return INT8; + case GGML_TYPE_I32: + return INT32; + case GGML_TYPE_Q8_0: + return INT8; + default: + GGML_ABORT("%s: fatal: unable to determine zTensor data type", + __func__); + break; + } +} + +inline void ggml_zdnn_create_tensor(zdnn_tensor_desc & pre_tfm_desc, + zdnn_tensor_desc & tfm_desc, + zdnn_ztensor & ztensor, + const ggml_tensor * src, + const int64_t * ne, + const zdnn_data_layouts layout) { + zdnn_init_pre_transformed_desc( + layout, + ggml_zdnn_type_mapping(src->type), + &pre_tfm_desc, + ne[3], ne[2], ne[1], ne[0] + ); + + ZDNN_CHECK(zdnn_generate_transformed_desc(&pre_tfm_desc, &tfm_desc)); + ZDNN_CHECK(zdnn_init_ztensor_with_malloc(&pre_tfm_desc, &tfm_desc, &ztensor)); +} + + +static void ggml_backend_zdnn_mul_mat(ggml_backend_zdnn_context * ctx, const ggml_tensor * src0, const ggml_tensor * src1, ggml_tensor * dst) { GGML_TENSOR_BINARY_OP_LOCALS const enum ggml_type type = src0->type; @@ -32,10 +71,15 @@ static bool ggml_zdnn_op_mul_mat(struct ggml_backend_zdnn_context * ctx, const g const ggml_tensor * inputs = src1; ggml_tensor * output = dst; - ggml_backend_zdnn_buffer * weights_extra = (ggml_backend_zdnn_buffer *)weights->extra; - ggml_backend_zdnn_buffer * inputs_extra = (ggml_backend_zdnn_buffer *)inputs->extra; - ggml_backend_zdnn_buffer * output_extra = (ggml_backend_zdnn_buffer *)output->extra; - ggml_backend_zdnn_buffer * bias_extra = (ggml_backend_zdnn_buffer *)output_extra->extra; + const zdnn_extra * inputs_extra = (const zdnn_extra *)inputs->extra; + const zdnn_extra * weights_extra = (const zdnn_extra *)weights->extra; + zdnn_extra * output_extra = ( zdnn_extra *)output->extra; + zdnn_extra * output_bias_extra = ( zdnn_extra *)output_extra->extra; + + zdnn_tensor_desc pre_tfm_desc_weights, tfm_desc_weights; + zdnn_tensor_desc pre_tfm_desc_bias, tfm_desc_bias; + + zdnn_ztensor ztensor_weights, ztensor_bias; const int64_t weights_rows = ne01; const int64_t weights_cols = ne00; @@ -47,66 +91,26 @@ static bool ggml_zdnn_op_mul_mat(struct ggml_backend_zdnn_context * ctx, const g const int64_t output_rows = ne1; const int64_t output_cols = ne0; - zdnn_tensor_desc pre_tfm_desc_inputs, tfm_desc_inputs; - zdnn_tensor_desc pre_tfm_desc_weights, tfm_desc_weights; - zdnn_tensor_desc pre_tfm_desc_bias, tfm_desc_bias; - zdnn_tensor_desc pre_tfm_desc_output, tfm_desc_output; - zdnn_ztensor ztensor_inputs, ztensor_weights, ztensor_bias, ztensor_output; - - const int64_t inputs_dim [GGML_MAX_DIMS] = { 1, 1, inputs_cols, inputs_rows }; const int64_t weights_dim[GGML_MAX_DIMS] = { 1, 1, weights_cols, weights_rows }; const int64_t bias_dim [GGML_MAX_DIMS] = { 1, 1, 1, output_cols }; - const int64_t output_dim[GGML_MAX_DIMS] = { 1, 1, output_cols, output_rows }; - // have to do this because weights apparently do not go through set_tensor - zdnn_init_pre_transformed_desc( - ZDNN_2D, - FP32, - &pre_tfm_desc_weights, - weights_dim[3], weights_dim[2], weights_dim[1], weights_dim[0] - ); - ZDNN_CHECK(zdnn_generate_transformed_desc(&pre_tfm_desc_weights, &tfm_desc_weights)); - ZDNN_CHECK(zdnn_init_ztensor_with_malloc(&pre_tfm_desc_weights, &tfm_desc_weights, &ztensor_weights)); - ZDNN_CHECK(zdnn_transform_ztensor(&ztensor_weights, weights->data)); - - // have to do this here because although it was transformed, the shape is wrong - zdnn_init_pre_transformed_desc( - ZDNN_2D, - FP32, - &pre_tfm_desc_inputs, - inputs_dim[3], inputs_dim[2], inputs_dim[1], inputs_dim[0] - ); - ZDNN_CHECK(zdnn_generate_transformed_desc(&pre_tfm_desc_inputs, &tfm_desc_inputs)); - ZDNN_CHECK(zdnn_init_ztensor_with_malloc(&pre_tfm_desc_inputs, &tfm_desc_inputs, &ztensor_inputs)); - ZDNN_CHECK(zdnn_transform_ztensor(&ztensor_inputs, inputs->data)); - - // have to transform the bias ztensor here because only GGML_OP_NONE goes through set_tensor - zdnn_init_pre_transformed_desc( - ZDNN_1D, - FP32, - &pre_tfm_desc_bias, - bias_dim[3], bias_dim[2], bias_dim[1], bias_dim[0] - ); - ZDNN_CHECK(zdnn_generate_transformed_desc(&pre_tfm_desc_bias, &tfm_desc_bias)); - ZDNN_CHECK(zdnn_init_ztensor_with_malloc(&pre_tfm_desc_bias, &tfm_desc_bias, &ztensor_bias)); + //! Something to do with these 2 lines that we can't remove + //! If we remove it, the entire computation will throw an error + //! Even though we don't use these tensors lol + ggml_zdnn_create_tensor(pre_tfm_desc_weights, tfm_desc_weights, ztensor_weights, src0, weights_dim, ZDNN_2D); + ggml_zdnn_create_tensor(pre_tfm_desc_bias, tfm_desc_bias, ztensor_bias, dst, bias_dim, ZDNN_1D); void * bias_data = (void *)calloc(ne0, sizeof(ggml_element_size(output))); - ZDNN_CHECK(zdnn_transform_ztensor(&ztensor_bias, bias_data)); + ZDNN_CHECK(zdnn_transform_ztensor(&output_bias_extra->ztensor, bias_data)); - zdnn_init_pre_transformed_desc( - ZDNN_2D, - FP32, - &pre_tfm_desc_output, - output_dim[3], output_dim[2], output_dim[1], output_dim[0] - ); - ZDNN_CHECK(zdnn_generate_transformed_desc(&pre_tfm_desc_output, &tfm_desc_output)); - ZDNN_CHECK(zdnn_init_ztensor_with_malloc(&pre_tfm_desc_output, &tfm_desc_output, &ztensor_output)); - ZDNN_CHECK(zdnn_transform_ztensor(&ztensor_output, output->data)); + ZDNN_CHECK(zdnn_matmul_transpose_op(&inputs_extra->ztensor, &weights_extra->ztensor, &ztensor_bias, + false, true, MATMUL_OP_ADDITION, &output_extra->ztensor)); + ZDNN_CHECK(zdnn_transform_origtensor(&output_extra->ztensor, output->data)); - std::raise(SIGINT); - ZDNN_CHECK(zdnn_matmul_transpose_op(&ztensor_inputs, &ztensor_weights, &ztensor_bias, - false, true, MATMUL_OP_ADDITION, &ztensor_output)); - ZDNN_CHECK(zdnn_transform_origtensor(&ztensor_output, output->data)); + ZDNN_CHECK(zdnn_free_ztensor_buffer(&ztensor_weights)); + ZDNN_CHECK(zdnn_free_ztensor_buffer(&ztensor_bias)); + + free(bias_data); } static bool ggml_backend_zdnn_compute_forward(struct ggml_backend_zdnn_context * ctx, struct ggml_tensor * dst) {