mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	CANN: format code using .clang-format (#15863)
This commit applies .clang-format rules to all source files under the ggml-cann directory to ensure consistent coding style and readability. The .clang-format option `SortIncludes: false` has been set to disable automatic reordering of include directives. No functional changes are introduced. Co-authored-by: hipudding <huafengchun@gmail.com>
This commit is contained in:
		
							
								
								
									
										89
									
								
								ggml/src/ggml-cann/acl_tensor.cpp
									
									
									
									
									
										
										
										Executable file → Normal file
									
								
							
							
						
						
									
										89
									
								
								ggml/src/ggml-cann/acl_tensor.cpp
									
									
									
									
									
										
										
										Executable file → Normal file
									
								
							| @@ -51,28 +51,31 @@ aclDataType ggml_cann_type_mapping(ggml_type type) { | ||||
|     return ACL_DT_UNDEFINED; | ||||
| } | ||||
|  | ||||
| aclTensor* ggml_cann_create_tensor(const ggml_tensor* tensor, int64_t* ne, | ||||
|                                    size_t* nb, int64_t dims, aclFormat format, | ||||
|                                    size_t offset) { | ||||
| aclTensor * ggml_cann_create_tensor(const ggml_tensor * tensor, | ||||
|                                     int64_t *           ne, | ||||
|                                     size_t *            nb, | ||||
|                                     int64_t             dims, | ||||
|                                     aclFormat           format, | ||||
|                                     size_t              offset) { | ||||
|     // If tensor is bcasted, Up to GGML_MAX_DIMS additional dimensions will be | ||||
|     // added. | ||||
|     int64_t acl_ne[GGML_MAX_DIMS * 2], acl_stride[GGML_MAX_DIMS * 2]; | ||||
|  | ||||
|     if (ne == nullptr) { | ||||
|         for (int i = 0; i < GGML_MAX_DIMS; i++) { | ||||
|             acl_ne[i] = tensor->ne[i]; | ||||
|             acl_ne[i]     = tensor->ne[i]; | ||||
|             // The step size of acl is in elements. | ||||
|             acl_stride[i] = tensor->nb[i] / ggml_element_size(tensor); | ||||
|         } | ||||
|     } else { | ||||
|         // With bcast | ||||
|         for (int i = 0; i < dims; i++) { | ||||
|             acl_ne[i] = ne[i]; | ||||
|             acl_ne[i]     = ne[i]; | ||||
|             acl_stride[i] = nb[i] / ggml_element_size(tensor); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     int64_t final_dims = (dims == 0 ? GGML_MAX_DIMS : dims); | ||||
|     int64_t final_dims      = (dims == 0 ? GGML_MAX_DIMS : dims); | ||||
|     int64_t acl_storage_len = 1; | ||||
|     for (int i = 0; i < final_dims; i++) { | ||||
|         acl_storage_len += (acl_ne[i] - 1) * acl_stride[i]; | ||||
| @@ -84,15 +87,13 @@ aclTensor* ggml_cann_create_tensor(const ggml_tensor* tensor, int64_t* ne, | ||||
|     std::reverse(acl_ne, acl_ne + final_dims); | ||||
|     std::reverse(acl_stride, acl_stride + final_dims); | ||||
|  | ||||
|     aclTensor* acl_tensor = aclCreateTensor( | ||||
|         acl_ne, final_dims, ggml_cann_type_mapping(tensor->type), acl_stride, | ||||
|         elem_offset, format, &acl_storage_len, 1, | ||||
|         tensor->data); | ||||
|     aclTensor * acl_tensor = aclCreateTensor(acl_ne, final_dims, ggml_cann_type_mapping(tensor->type), acl_stride, | ||||
|                                              elem_offset, format, &acl_storage_len, 1, tensor->data); | ||||
|  | ||||
|     return acl_tensor; | ||||
| } | ||||
|  | ||||
| bool ggml_cann_need_bcast(const ggml_tensor* t0, const ggml_tensor* t1) { | ||||
| bool ggml_cann_need_bcast(const ggml_tensor * t0, const ggml_tensor * t1) { | ||||
|     for (int i = 0; i < GGML_MAX_DIMS; i++) { | ||||
|         if (t1->ne[i] != t0->ne[i] && t1->ne[i] != 1) { | ||||
|             return true; | ||||
| @@ -101,15 +102,16 @@ bool ggml_cann_need_bcast(const ggml_tensor* t0, const ggml_tensor* t1) { | ||||
|     return false; | ||||
| } | ||||
|  | ||||
| int64_t ggml_cann_get_bcast_shape(const ggml_tensor* src0, | ||||
|                                   const ggml_tensor* src1, | ||||
|                                   int64_t* bcast_src0_ne, | ||||
|                                   int64_t* bcast_src1_ne, size_t* bcast_src0_nb, | ||||
|                                   size_t* bcast_src1_nb) { | ||||
| int64_t ggml_cann_get_bcast_shape(const ggml_tensor * src0, | ||||
|                                   const ggml_tensor * src1, | ||||
|                                   int64_t *           bcast_src0_ne, | ||||
|                                   int64_t *           bcast_src1_ne, | ||||
|                                   size_t *            bcast_src0_nb, | ||||
|                                   size_t *            bcast_src1_nb) { | ||||
|     GGML_ASSERT(ggml_can_repeat(src1, src0)); | ||||
|     int bcast_dim_cnt = 0; | ||||
|     for (int i = 0; i < GGML_MAX_DIMS; i++) { | ||||
|         int64_t nr = src0->ne[i] / src1->ne[i]; | ||||
|         int64_t nr                   = src0->ne[i] / src1->ne[i]; | ||||
|         bcast_src0_ne[bcast_dim_cnt] = src0->ne[i] / nr; | ||||
|         bcast_src1_ne[bcast_dim_cnt] = src1->ne[i]; | ||||
|         bcast_src0_nb[bcast_dim_cnt] = src0->nb[i]; | ||||
| @@ -119,21 +121,26 @@ int64_t ggml_cann_get_bcast_shape(const ggml_tensor* src0, | ||||
|             // Need to add an extra dim. | ||||
|             bcast_src0_ne[bcast_dim_cnt] = nr; | ||||
|             bcast_src1_ne[bcast_dim_cnt] = 1; | ||||
|             bcast_src0_nb[bcast_dim_cnt] = bcast_src0_nb[bcast_dim_cnt - 1] * | ||||
|                                            bcast_src0_ne[bcast_dim_cnt - 1]; | ||||
|             bcast_src1_nb[bcast_dim_cnt] = bcast_src1_nb[bcast_dim_cnt - 1] * | ||||
|                                            bcast_src1_ne[bcast_dim_cnt - 1]; | ||||
|             bcast_src0_nb[bcast_dim_cnt] = bcast_src0_nb[bcast_dim_cnt - 1] * bcast_src0_ne[bcast_dim_cnt - 1]; | ||||
|             bcast_src1_nb[bcast_dim_cnt] = bcast_src1_nb[bcast_dim_cnt - 1] * bcast_src1_ne[bcast_dim_cnt - 1]; | ||||
|             bcast_dim_cnt++; | ||||
|         } | ||||
|     } | ||||
|     return bcast_dim_cnt; | ||||
| } | ||||
|  | ||||
| int64_t ggml_cann_get_mulmat_bcast_shape( | ||||
|     const int64_t* input_ne, const int64_t* weight_ne, const int64_t* dst_ne, | ||||
|     const size_t* input_nb, const size_t* weight_nb, const size_t* dst_nb, | ||||
|     int64_t* bcast_input_ne, int64_t* bcast_weight_ne, int64_t* bcast_dst_ne, | ||||
|     size_t* bcast_input_nb, size_t* bcast_weight_nb, size_t* bcast_dst_nb) { | ||||
| int64_t ggml_cann_get_mulmat_bcast_shape(const int64_t * input_ne, | ||||
|                                          const int64_t * weight_ne, | ||||
|                                          const int64_t * dst_ne, | ||||
|                                          const size_t *  input_nb, | ||||
|                                          const size_t *  weight_nb, | ||||
|                                          const size_t *  dst_nb, | ||||
|                                          int64_t *       bcast_input_ne, | ||||
|                                          int64_t *       bcast_weight_ne, | ||||
|                                          int64_t *       bcast_dst_ne, | ||||
|                                          size_t *        bcast_input_nb, | ||||
|                                          size_t *        bcast_weight_nb, | ||||
|                                          size_t *        bcast_dst_nb) { | ||||
|     // input and dst shoule in same shape, except first two dims. | ||||
|     GGML_ASSERT(input_ne[2] == dst_ne[2]); | ||||
|     GGML_ASSERT(input_ne[3] == dst_ne[3]); | ||||
| @@ -148,34 +155,30 @@ int64_t ggml_cann_get_mulmat_bcast_shape( | ||||
|         // Do not use bcast in the first two dimensions because we only support | ||||
|         // the bcast batch dimension. Just copy them. | ||||
|         if (i < 2 || nr == 1) { | ||||
|             bcast_input_ne[bcast_dim_cnt] = input_ne[i]; | ||||
|             bcast_input_ne[bcast_dim_cnt]  = input_ne[i]; | ||||
|             bcast_weight_ne[bcast_dim_cnt] = weight_ne[i]; | ||||
|             bcast_dst_ne[bcast_dim_cnt] = dst_ne[i]; | ||||
|             bcast_dst_ne[bcast_dim_cnt]    = dst_ne[i]; | ||||
|  | ||||
|             bcast_input_nb[bcast_dim_cnt] = input_nb[i]; | ||||
|             bcast_input_nb[bcast_dim_cnt]  = input_nb[i]; | ||||
|             bcast_weight_nb[bcast_dim_cnt] = weight_nb[i]; | ||||
|             bcast_dst_nb[bcast_dim_cnt] = dst_nb[i]; | ||||
|             bcast_dst_nb[bcast_dim_cnt]    = dst_nb[i]; | ||||
|             bcast_dim_cnt++; | ||||
|         } else { | ||||
|             // Need to add an extra dim. | ||||
|             bcast_input_ne[bcast_dim_cnt] = nr; | ||||
|             bcast_dst_ne[bcast_dim_cnt] = nr; | ||||
|             bcast_input_ne[bcast_dim_cnt]  = nr; | ||||
|             bcast_dst_ne[bcast_dim_cnt]    = nr; | ||||
|             bcast_weight_ne[bcast_dim_cnt] = 1; | ||||
|             bcast_input_nb[bcast_dim_cnt] = input_nb[i]; | ||||
|             bcast_dst_nb[bcast_dim_cnt] = dst_nb[i]; | ||||
|             bcast_input_nb[bcast_dim_cnt]  = input_nb[i]; | ||||
|             bcast_dst_nb[bcast_dim_cnt]    = dst_nb[i]; | ||||
|             bcast_weight_nb[bcast_dim_cnt] = weight_nb[i]; | ||||
|             bcast_dim_cnt++; | ||||
|  | ||||
|             bcast_input_ne[bcast_dim_cnt] = input_ne[i] / nr; | ||||
|             bcast_dst_ne[bcast_dim_cnt] = dst_ne[i] / nr; | ||||
|             bcast_input_ne[bcast_dim_cnt]  = input_ne[i] / nr; | ||||
|             bcast_dst_ne[bcast_dim_cnt]    = dst_ne[i] / nr; | ||||
|             bcast_weight_ne[bcast_dim_cnt] = weight_ne[i]; | ||||
|             bcast_input_nb[bcast_dim_cnt] = bcast_input_nb[bcast_dim_cnt - 1] * | ||||
|                                             bcast_input_ne[bcast_dim_cnt - 1]; | ||||
|             bcast_dst_nb[bcast_dim_cnt] = bcast_dst_nb[bcast_dim_cnt - 1] * | ||||
|                                           bcast_dst_ne[bcast_dim_cnt - 1]; | ||||
|             bcast_weight_nb[bcast_dim_cnt] = | ||||
|                 bcast_weight_nb[bcast_dim_cnt - 1] * | ||||
|                 bcast_weight_ne[bcast_dim_cnt - 1]; | ||||
|             bcast_input_nb[bcast_dim_cnt]  = bcast_input_nb[bcast_dim_cnt - 1] * bcast_input_ne[bcast_dim_cnt - 1]; | ||||
|             bcast_dst_nb[bcast_dim_cnt]    = bcast_dst_nb[bcast_dim_cnt - 1] * bcast_dst_ne[bcast_dim_cnt - 1]; | ||||
|             bcast_weight_nb[bcast_dim_cnt] = bcast_weight_nb[bcast_dim_cnt - 1] * bcast_weight_ne[bcast_dim_cnt - 1]; | ||||
|             bcast_dim_cnt++; | ||||
|         } | ||||
|     } | ||||
|   | ||||
							
								
								
									
										97
									
								
								ggml/src/ggml-cann/acl_tensor.h
									
									
									
									
									
										
										
										Executable file → Normal file
									
								
							
							
						
						
									
										97
									
								
								ggml/src/ggml-cann/acl_tensor.h
									
									
									
									
									
										
										
										Executable file → Normal file
									
								
							| @@ -62,10 +62,12 @@ aclDataType ggml_cann_type_mapping(ggml_type type); | ||||
|  * @param   offset      Offset in bytes for the ACL tensor data. Defaults to 0. | ||||
|  * @return  Pointer to the created ACL tensor. | ||||
|  */ | ||||
| aclTensor* ggml_cann_create_tensor(const ggml_tensor* tensor, int64_t* ne = nullptr, | ||||
|                              size_t* nb = nullptr, int64_t dims = 0, | ||||
|                              aclFormat format = ACL_FORMAT_ND, | ||||
|                              size_t offset = 0); | ||||
| aclTensor * ggml_cann_create_tensor(const ggml_tensor * tensor, | ||||
|                                     int64_t *           ne     = nullptr, | ||||
|                                     size_t *            nb     = nullptr, | ||||
|                                     int64_t             dims   = 0, | ||||
|                                     aclFormat           format = ACL_FORMAT_ND, | ||||
|                                     size_t              offset = 0); | ||||
|  | ||||
| /** | ||||
|  * @brief   Template for creating an ACL tensor from provided parameters. typename TYPE | ||||
| @@ -87,12 +89,15 @@ aclTensor* ggml_cann_create_tensor(const ggml_tensor* tensor, int64_t* ne = null | ||||
|  * @param   offset      Offset in bytes for the ACL tensor data. Defaults to 0. | ||||
|  * @return  Pointer to the created ACL tensor. | ||||
|  */ | ||||
| template<typename TYPE> | ||||
| aclTensor* ggml_cann_create_tensor(void* data_ptr, aclDataType dtype, | ||||
|                                    TYPE type_size, int64_t* ne, TYPE* nb, | ||||
|                                    int64_t dims, | ||||
|                                    aclFormat format = ACL_FORMAT_ND, | ||||
|                                    size_t offset = 0) { | ||||
| template <typename TYPE> | ||||
| aclTensor * ggml_cann_create_tensor(void *      data_ptr, | ||||
|                                     aclDataType dtype, | ||||
|                                     TYPE        type_size, | ||||
|                                     int64_t *   ne, | ||||
|                                     TYPE *      nb, | ||||
|                                     int64_t     dims, | ||||
|                                     aclFormat   format = ACL_FORMAT_ND, | ||||
|                                     size_t      offset = 0) { | ||||
|     int64_t tmp_ne[GGML_MAX_DIMS * 2]; | ||||
|     int64_t tmp_stride[GGML_MAX_DIMS * 2]; | ||||
|  | ||||
| @@ -109,9 +114,8 @@ aclTensor* ggml_cann_create_tensor(void* data_ptr, aclDataType dtype, | ||||
|     std::reverse(tmp_ne, tmp_ne + dims); | ||||
|     std::reverse(tmp_stride, tmp_stride + dims); | ||||
|  | ||||
|     aclTensor* acl_tensor = | ||||
|         aclCreateTensor(tmp_ne, dims, dtype, tmp_stride, offset / type_size, | ||||
|                         format, &acl_storage_len, 1, data_ptr); | ||||
|     aclTensor * acl_tensor = | ||||
|         aclCreateTensor(tmp_ne, dims, dtype, tmp_stride, offset / type_size, format, &acl_storage_len, 1, data_ptr); | ||||
|  | ||||
|     return acl_tensor; | ||||
| } | ||||
| @@ -132,7 +136,7 @@ aclTensor* ggml_cann_create_tensor(void* data_ptr, aclDataType dtype, | ||||
|  *          to 1. If such a dimension is found, broadcasting is required to align t1 | ||||
|  *          with t0 for element-wise operations. | ||||
|  */ | ||||
| bool ggml_cann_need_bcast(const ggml_tensor* t0, const ggml_tensor* t1); | ||||
| bool ggml_cann_need_bcast(const ggml_tensor * t0, const ggml_tensor * t1); | ||||
|  | ||||
| /** | ||||
|  * @brief   Computes broadcast shapes and strides for two ggml_tensors. | ||||
| @@ -187,19 +191,21 @@ bool ggml_cann_need_bcast(const ggml_tensor* t0, const ggml_tensor* t1); | ||||
|  *  dim1 in a inserted dim, should add nb for dim1, | ||||
|  *  and all other nb moves to next in order. | ||||
|  */ | ||||
| int64_t ggml_cann_get_bcast_shape(const ggml_tensor* src0, const ggml_tensor* src1, | ||||
|                         int64_t* bcast_ne_src0, int64_t* bcast_ne_src1, | ||||
|                         size_t* bcast_nb_src0, size_t* bcast_nb_src1); | ||||
| int64_t ggml_cann_get_bcast_shape(const ggml_tensor * src0, | ||||
|                                   const ggml_tensor * src1, | ||||
|                                   int64_t *           bcast_ne_src0, | ||||
|                                   int64_t *           bcast_ne_src1, | ||||
|                                   size_t *            bcast_nb_src0, | ||||
|                                   size_t *            bcast_nb_src1); | ||||
|  | ||||
| // Bcast macro to avoid duplicate code. | ||||
| #define BCAST_SHAPE(src0, src1)                                              \ | ||||
|     int64_t bcast_##src0##_ne[GGML_MAX_DIMS * 2];                            \ | ||||
|     int64_t bcast_##src1##_ne[GGML_MAX_DIMS * 2];                            \ | ||||
|     size_t bcast_##src0##_nb[GGML_MAX_DIMS * 2];                             \ | ||||
|     size_t bcast_##src1##_nb[GGML_MAX_DIMS * 2];                             \ | ||||
|     int64_t bcast_dims = ggml_cann_get_bcast_shape(                          \ | ||||
|         src0, src1, bcast_##src0##_ne, bcast_##src1##_ne, bcast_##src0##_nb, \ | ||||
|         bcast_##src1##_nb); | ||||
| #define BCAST_SHAPE(src0, src1)                                                                      \ | ||||
|     int64_t bcast_##src0##_ne[GGML_MAX_DIMS * 2];                                                    \ | ||||
|     int64_t bcast_##src1##_ne[GGML_MAX_DIMS * 2];                                                    \ | ||||
|     size_t  bcast_##src0##_nb[GGML_MAX_DIMS * 2];                                                    \ | ||||
|     size_t  bcast_##src1##_nb[GGML_MAX_DIMS * 2];                                                    \ | ||||
|     int64_t bcast_dims = ggml_cann_get_bcast_shape(src0, src1, bcast_##src0##_ne, bcast_##src1##_ne, \ | ||||
|                                                    bcast_##src0##_nb, bcast_##src1##_nb); | ||||
|  | ||||
| #define BCAST_PARAM(tensor) bcast_##tensor##_ne, bcast_##tensor##_nb, bcast_dims | ||||
|  | ||||
| @@ -233,26 +239,31 @@ int64_t ggml_cann_get_bcast_shape(const ggml_tensor* src0, const ggml_tensor* sr | ||||
|  *       before cast dim. | ||||
|  * @sa ggml_cann_get_bcast_shape | ||||
|  */ | ||||
| int64_t ggml_cann_get_mulmat_bcast_shape( | ||||
|     const int64_t* input_ne, const int64_t* weight_ne, const int64_t* dst_ne, | ||||
|     const size_t* input_nb, const size_t* weight_nb, const size_t* dst_nb, | ||||
|     int64_t* bcast_input_ne, int64_t* bcast_weight_ne, int64_t* bcast_dst_ne, | ||||
|     size_t* bcast_input_nb, size_t* bcast_weight_nb, size_t* bcast_dst_nb); | ||||
| int64_t ggml_cann_get_mulmat_bcast_shape(const int64_t * input_ne, | ||||
|                                          const int64_t * weight_ne, | ||||
|                                          const int64_t * dst_ne, | ||||
|                                          const size_t *  input_nb, | ||||
|                                          const size_t *  weight_nb, | ||||
|                                          const size_t *  dst_nb, | ||||
|                                          int64_t *       bcast_input_ne, | ||||
|                                          int64_t *       bcast_weight_ne, | ||||
|                                          int64_t *       bcast_dst_ne, | ||||
|                                          size_t *        bcast_input_nb, | ||||
|                                          size_t *        bcast_weight_nb, | ||||
|                                          size_t *        bcast_dst_nb); | ||||
|  | ||||
| // Bcast macro to avoid duplicate code. | ||||
| #define BCAST_MUL_MAT_SHAPE(input, weight, dst)                         \ | ||||
|     int64_t bcast_##input##_ne[GGML_MAX_DIMS * 2];                      \ | ||||
|     int64_t bcast_##weight##_ne[GGML_MAX_DIMS * 2];                     \ | ||||
|     int64_t bcast_##dst##_ne[GGML_MAX_DIMS * 2];                        \ | ||||
|     size_t bcast_##input##_nb[GGML_MAX_DIMS * 2];                       \ | ||||
|     size_t bcast_##weight##_nb[GGML_MAX_DIMS * 2];                      \ | ||||
|     size_t bcast_##dst##_nb[GGML_MAX_DIMS * 2];                         \ | ||||
|     int64_t bcast_dims = ggml_cann_get_mulmat_bcast_shape(              \ | ||||
|         input->ne, weight->ne, dst->ne, input->nb, weight->nb, dst->nb, \ | ||||
|         bcast_##input##_ne, bcast_##weight##_ne, bcast_##dst##_ne,      \ | ||||
|         bcast_##input##_nb, bcast_##weight##_nb, bcast_##dst##_nb); | ||||
| #define BCAST_MUL_MAT_SHAPE(input, weight, dst)                                                                  \ | ||||
|     int64_t bcast_##input##_ne[GGML_MAX_DIMS * 2];                                                               \ | ||||
|     int64_t bcast_##weight##_ne[GGML_MAX_DIMS * 2];                                                              \ | ||||
|     int64_t bcast_##dst##_ne[GGML_MAX_DIMS * 2];                                                                 \ | ||||
|     size_t  bcast_##input##_nb[GGML_MAX_DIMS * 2];                                                               \ | ||||
|     size_t  bcast_##weight##_nb[GGML_MAX_DIMS * 2];                                                              \ | ||||
|     size_t  bcast_##dst##_nb[GGML_MAX_DIMS * 2];                                                                 \ | ||||
|     int64_t bcast_dims = ggml_cann_get_mulmat_bcast_shape(                                                       \ | ||||
|         input->ne, weight->ne, dst->ne, input->nb, weight->nb, dst->nb, bcast_##input##_ne, bcast_##weight##_ne, \ | ||||
|         bcast_##dst##_ne, bcast_##input##_nb, bcast_##weight##_nb, bcast_##dst##_nb); | ||||
|  | ||||
| #define BCAST_MUL_MAT_PARAM(tensor) \ | ||||
|     bcast_##tensor##_ne, bcast_##tensor##_nb, bcast_dims | ||||
| #define BCAST_MUL_MAT_PARAM(tensor) bcast_##tensor##_ne, bcast_##tensor##_nb, bcast_dims | ||||
|  | ||||
| #endif  // CANN_ACL_TENSOR_H | ||||
|   | ||||
							
								
								
									
										2508
									
								
								ggml/src/ggml-cann/aclnn_ops.cpp
									
									
									
									
									
										
										
										Executable file → Normal file
									
								
							
							
						
						
									
										2508
									
								
								ggml/src/ggml-cann/aclnn_ops.cpp
									
									
									
									
									
										
										
										Executable file → Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
							
								
								
									
										401
									
								
								ggml/src/ggml-cann/aclnn_ops.h
									
									
									
									
									
										
										
										Executable file → Normal file
									
								
							
							
						
						
									
										401
									
								
								ggml/src/ggml-cann/aclnn_ops.h
									
									
									
									
									
										
										
										Executable file → Normal file
									
								
							| @@ -62,7 +62,7 @@ | ||||
|  * @param   dst The ggml tensor representing the destination, which op is | ||||
|  *              GGML_OP_REPEAT and specifies the desired dimensions. | ||||
|  */ | ||||
| void ggml_cann_repeat(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
| void ggml_cann_repeat(ggml_backend_cann_context & ctx, ggml_tensor * dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Applies the Leaky ReLU activation function to a tensor using the CANN | ||||
| @@ -82,7 +82,7 @@ void ggml_cann_repeat(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  * @param dst The destination tensor where the result of the Leaky ReLU | ||||
|  *            activation is stored, which op is `GGML_OP_LEAKY_RELU` | ||||
|  */ | ||||
| void ggml_cann_leaky_relu(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
| void ggml_cann_leaky_relu(ggml_backend_cann_context & ctx, ggml_tensor * dst); | ||||
|  | ||||
| /** | ||||
|  * @brief    Concatenates multiple tensors along a specified dimension using the | ||||
| @@ -97,7 +97,7 @@ void ggml_cann_leaky_relu(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  * @attention tensorList length should be 2 and the dimension using for concat | ||||
|  *            default to 1. | ||||
|  */ | ||||
| void ggml_cann_concat(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
| void ggml_cann_concat(ggml_backend_cann_context & ctx, ggml_tensor * dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Generates a sequence of evenly spaced values within a specified | ||||
| @@ -113,7 +113,7 @@ void ggml_cann_concat(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  *            `start`, 'stop' and 'step' are in dst->op_params and dst->op is | ||||
|  *            `GGML_OP_ARANGE`. | ||||
|  */ | ||||
| void ggml_cann_arange(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
| void ggml_cann_arange(ggml_backend_cann_context & ctx, ggml_tensor * dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Applies a clamp operation to the elements of a ggml tensor using the | ||||
| @@ -131,7 +131,7 @@ void ggml_cann_arange(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  * @param dst The destination tensor where the clamped values will be stored. | ||||
|  *            dst->op is `GGML_OP_CLAMP`, `min` and `max` value is in dst->params. | ||||
|  */ | ||||
| void ggml_cann_clamp(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
| void ggml_cann_clamp(ggml_backend_cann_context & ctx, ggml_tensor * dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Scales the elements of a ggml tensor by a constant factor using the | ||||
| @@ -148,7 +148,7 @@ void ggml_cann_clamp(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  * @param dst The destination tensor where the scaled values will be stored. | ||||
|  *            dst->op is `GGML_OP_SCALE` and `scale` value is in dst->params. | ||||
|  */ | ||||
| void ggml_cann_scale(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
| void ggml_cann_scale(ggml_backend_cann_context & ctx, ggml_tensor * dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Sorts the elements of a ggml tensor and returns the indices that | ||||
| @@ -163,7 +163,7 @@ void ggml_cann_scale(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  * @param dst The destination tensor where the sorted indices will be stored. | ||||
|  *            dst->op is `GGML_OP_ARGSORT`. | ||||
|  */ | ||||
| void ggml_cann_argsort(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
| void ggml_cann_argsort(ggml_backend_cann_context & ctx, ggml_tensor * dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Computes the Layer Normalization for a ggml tensor using the CANN | ||||
| @@ -185,7 +185,7 @@ void ggml_cann_argsort(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  * @param dst The destination tensor where the normalized values will be stored. | ||||
|  * @attention `Var` defaults to dst->ne[0]. | ||||
|  */ | ||||
| void ggml_cann_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
| void ggml_cann_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst); | ||||
|  | ||||
| /** | ||||
|  * @brief  Computes the Group Normalization for a ggml tensor using the CANN | ||||
| @@ -209,7 +209,7 @@ void ggml_cann_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  * | ||||
|  * @attention eps defaults to 1e-6f. | ||||
|  */ | ||||
| void ggml_cann_group_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
| void ggml_cann_group_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Computes the accumulation of tensors using the CANN backend. | ||||
| @@ -228,7 +228,7 @@ void ggml_cann_group_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  * @param dst The destination tensor where the accumulated values will be stored. | ||||
|  *            `inplace` is in dst->params, and dst->op is `GGML_OP_ACC`. | ||||
|  */ | ||||
| void ggml_cann_acc(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
| void ggml_cann_acc(ggml_backend_cann_context & ctx, ggml_tensor * dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Computes the sum of elements along the last dimension of a ggml tensor | ||||
| @@ -244,7 +244,7 @@ void ggml_cann_acc(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  * | ||||
|  * @attention `reduce_dims` defaults to 3, which means the last dimension. | ||||
|  */ | ||||
| void ggml_cann_sum_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
| void ggml_cann_sum_rows(ggml_backend_cann_context & ctx, ggml_tensor * dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Computes the sum of elements in a ggml tensor. | ||||
| @@ -258,7 +258,7 @@ void ggml_cann_sum_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  * | ||||
|  */ | ||||
|  | ||||
| void ggml_cann_sum(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
| void ggml_cann_sum(ggml_backend_cann_context & ctx, ggml_tensor * dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Upsamples a ggml tensor using nearest neighbor interpolation using | ||||
| @@ -274,8 +274,7 @@ void ggml_cann_sum(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  * @param dst The destination tensor where the upsampled values will be stored. | ||||
|  *            dst->op is `GGML_OP_UPSCALE`. | ||||
|  */ | ||||
| void ggml_cann_upsample_nearest2d(ggml_backend_cann_context& ctx, | ||||
|                                   ggml_tensor* dst); | ||||
| void ggml_cann_upsample_nearest2d(ggml_backend_cann_context & ctx, ggml_tensor * dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Pads a ggml tensor to match the dimensions of the destination tensor | ||||
| @@ -290,7 +289,7 @@ void ggml_cann_upsample_nearest2d(ggml_backend_cann_context& ctx, | ||||
|  * @param dst The destination tensor, which specifies the target dimensions for | ||||
|  *            padding. dst->op is `GGML_OP_PAD`. | ||||
|  */ | ||||
| void ggml_cann_pad(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
| void ggml_cann_pad(ggml_backend_cann_context & ctx, ggml_tensor * dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Executes a 2D pooling operation on a ggml tensor using the CANN | ||||
| @@ -307,7 +306,7 @@ void ggml_cann_pad(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  * @param dst The destination tensor on which the pooling operation is to be | ||||
|  *            performed. dst->op is `GGML_OP_POOL_2D`. | ||||
|  */ | ||||
| void ggml_cann_pool2d(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
| void ggml_cann_pool2d(ggml_backend_cann_context & ctx, ggml_tensor * dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Duplicates a ggml tensor using the CANN backend. | ||||
| @@ -326,7 +325,7 @@ void ggml_cann_pool2d(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  *            different shape and dst is no-contiguous. | ||||
|  * @note:     This func need to simplify. | ||||
|  */ | ||||
| void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
| void ggml_cann_dup(ggml_backend_cann_context & ctx, ggml_tensor * dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Computes the Root Mean Square (RMS) normalization of a ggml tensor | ||||
| @@ -348,7 +347,7 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  * @param dst The destination tensor where the normalized values will be stored. | ||||
|  *            dst->op is `GGML_OP_RMS_NORM`. | ||||
|  */ | ||||
| void ggml_cann_rms_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
| void ggml_cann_rms_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Applies a diagonal mask to the tensor with a specified value. | ||||
| @@ -363,7 +362,7 @@ void ggml_cann_rms_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  *            `GGML_OP_DIAG_MASK` | ||||
|  * @param value The value to use for masking. | ||||
|  */ | ||||
| void ggml_cann_diag_mask(ggml_backend_cann_context& ctx, ggml_tensor* dst, float value); | ||||
| void ggml_cann_diag_mask(ggml_backend_cann_context & ctx, ggml_tensor * dst, float value); | ||||
|  | ||||
| /** | ||||
|  * @brief   Performs an image-to-column transformation on the input tensor. | ||||
| @@ -378,7 +377,7 @@ void ggml_cann_diag_mask(ggml_backend_cann_context& ctx, ggml_tensor* dst, float | ||||
|  * @param dst The destination tensor that stores the result of the operation. | ||||
|  *            dst->op is `GGML_OP_IM2COL`. | ||||
|  */ | ||||
| void ggml_cann_im2col(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
| void ggml_cann_im2col(ggml_backend_cann_context & ctx, ggml_tensor * dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Computes time step embeddings using sine and cosine functions. | ||||
| @@ -392,10 +391,10 @@ void ggml_cann_im2col(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  * @param dst The destination tensor where the result of the embedding operation | ||||
|  *            will be stored. dst->op is `GGML_OP_TIMESTEP_EMBEDDING`. | ||||
|  */ | ||||
| void ggml_cann_timestep_embedding(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
| void ggml_cann_timestep_embedding(ggml_backend_cann_context & ctx, ggml_tensor * dst); | ||||
|  | ||||
| // @see ggml_cann_dup. | ||||
| void ggml_cann_cpy(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
| void ggml_cann_cpy(ggml_backend_cann_context & ctx, ggml_tensor * dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Computes the softmax activation with optional masking. | ||||
| @@ -417,7 +416,7 @@ void ggml_cann_cpy(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  * @param dst The destination tensor where the result will be stored. dst->op is | ||||
|  *            `GGML_OP_SOFTMAX`. | ||||
|  */ | ||||
| void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
| void ggml_cann_softmax(ggml_backend_cann_context & ctx, ggml_tensor * dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Extracts specific rows from a tensor based on indices. | ||||
| @@ -429,7 +428,7 @@ void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  * @param ctx The backend CANN context for executing operations. | ||||
|  * @param dst The destination tensor where the extracted rows will be stored. | ||||
|  */ | ||||
| void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
| void ggml_cann_get_rows(ggml_backend_cann_context & ctx, ggml_tensor * dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Writes specific rows into a tensor at positions specified by indices. | ||||
| @@ -441,7 +440,7 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  * @param ctx The backend CANN context for executing operations. | ||||
|  * @param dst The destination tensor where the specified rows will be updated. | ||||
|  */ | ||||
| void ggml_cann_set_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
| void ggml_cann_set_rows(ggml_backend_cann_context & ctx, ggml_tensor * dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Executes matrix multiplication for the given tensor. | ||||
| @@ -454,7 +453,7 @@ void ggml_cann_set_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  * @param dst The destination tensor for storing the result of the matrix | ||||
|  *            multiplication. dst->op is `GGML_OP_MUL_MAT`. | ||||
|  */ | ||||
| void ggml_cann_mul_mat(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
| void ggml_cann_mul_mat(ggml_backend_cann_context & ctx, ggml_tensor * dst); | ||||
|  | ||||
| /** | ||||
|  * @brief Applies Rotary Positional Embedding (RoPE) to the input tensor. | ||||
| @@ -477,7 +476,7 @@ void ggml_cann_mul_mat(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  * @note The function currently does not support cases where the freq_scale is | ||||
|  *       not equal 1. | ||||
|  */ | ||||
| void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
| void ggml_cann_rope(ggml_backend_cann_context & ctx, ggml_tensor * dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Computes the index of the maximum value along the specified dimension | ||||
| @@ -492,7 +491,7 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  * @param dst The destination tensor where the indices of the maximum values will | ||||
|  *            be stored. dst->op is `GGML_OP_ARGMAX`. | ||||
|  */ | ||||
| void ggml_cann_argmax(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
| void ggml_cann_argmax(ggml_backend_cann_context & ctx, ggml_tensor * dst); | ||||
|  | ||||
| /** | ||||
|  * @brief Adds two tensors element-wise and stores the result in a destination | ||||
| @@ -509,8 +508,10 @@ void ggml_cann_argmax(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  * @param acl_src1 The second source tensor. | ||||
|  * @param acl_dst The destination tensor where the result will be stored. | ||||
|  */ | ||||
| void aclnn_add(ggml_backend_cann_context& ctx, aclTensor* acl_src0, | ||||
|     aclTensor* acl_src1, aclTensor* acl_dst = nullptr); | ||||
| void aclnn_add(ggml_backend_cann_context & ctx, | ||||
|                aclTensor *                 acl_src0, | ||||
|                aclTensor *                 acl_src1, | ||||
|                aclTensor *                 acl_dst = nullptr); | ||||
|  | ||||
| /** | ||||
|  * @brief Sub two tensors element-wise and stores the result in a destination | ||||
| @@ -527,8 +528,10 @@ void aclnn_add(ggml_backend_cann_context& ctx, aclTensor* acl_src0, | ||||
|  * @param acl_src1 The second source tensor. | ||||
|  * @param acl_dst The destination tensor where the result will be stored. | ||||
|  */ | ||||
| void aclnn_sub(ggml_backend_cann_context& ctx, aclTensor* acl_src0, | ||||
|     aclTensor* acl_src1, aclTensor* acl_dst = nullptr); | ||||
| void aclnn_sub(ggml_backend_cann_context & ctx, | ||||
|                aclTensor *                 acl_src0, | ||||
|                aclTensor *                 acl_src1, | ||||
|                aclTensor *                 acl_dst = nullptr); | ||||
|  | ||||
| /** | ||||
|  * @brief Performs element-wise multiplication of two tensors and stores the | ||||
| @@ -546,8 +549,10 @@ void aclnn_sub(ggml_backend_cann_context& ctx, aclTensor* acl_src0, | ||||
|  * @param acl_other The second tensor for element-wise multiplication. | ||||
|  * @param acl_dst The destination tensor where the result will be stored. | ||||
|  */ | ||||
| void aclnn_mul(ggml_backend_cann_context& ctx, aclTensor* acl_src, | ||||
|     aclTensor* acl_other, aclTensor* acl_dst = nullptr); | ||||
| void aclnn_mul(ggml_backend_cann_context & ctx, | ||||
|                aclTensor *                 acl_src, | ||||
|                aclTensor *                 acl_other, | ||||
|                aclTensor *                 acl_dst = nullptr); | ||||
|  | ||||
| /** | ||||
|  * @brief Matrix division, optionally in-place. | ||||
| @@ -567,8 +572,10 @@ void aclnn_mul(ggml_backend_cann_context& ctx, aclTensor* acl_src, | ||||
|  * @param inplace Flag indicating whether to perform the operation in-place on | ||||
|  * `acl_src`. | ||||
|  */ | ||||
| void aclnn_div(ggml_backend_cann_context& ctx, aclTensor* acl_src, | ||||
|     aclTensor* acl_other, aclTensor* acl_dst = nullptr); | ||||
| void aclnn_div(ggml_backend_cann_context & ctx, | ||||
|                aclTensor *                 acl_src, | ||||
|                aclTensor *                 acl_other, | ||||
|                aclTensor *                 acl_dst = nullptr); | ||||
|  | ||||
| /** | ||||
|  * @brief Applies element-wise cosine function to the elements of a tensor. | ||||
| @@ -584,8 +591,7 @@ void aclnn_div(ggml_backend_cann_context& ctx, aclTensor* acl_src, | ||||
|  * @param acl_dst The destination tensor where the cosine results will be | ||||
|  * stored. | ||||
|  */ | ||||
| void aclnn_cos(ggml_backend_cann_context& ctx, aclTensor* acl_src, | ||||
|     aclTensor* acl_dst); | ||||
| void aclnn_cos(ggml_backend_cann_context & ctx, aclTensor * acl_src, aclTensor * acl_dst); | ||||
|  | ||||
| /** | ||||
|  * @brief Applies element-wise sine function to the elements of a tensor. | ||||
| @@ -602,8 +608,7 @@ void aclnn_cos(ggml_backend_cann_context& ctx, aclTensor* acl_src, | ||||
|  * @param acl_src The source tensor on which the sine function will be applied. | ||||
|  * @param acl_dst The destination tensor where the sine results will be stored. | ||||
|  */ | ||||
| void aclnn_sin(ggml_backend_cann_context& ctx, aclTensor* acl_src, | ||||
|     aclTensor* acl_dst); | ||||
| void aclnn_sin(ggml_backend_cann_context & ctx, aclTensor * acl_src, aclTensor * acl_dst); | ||||
|  | ||||
| /** | ||||
|  * @brief Prepares broadcast-compatible ACL tensors for two input tensors and one | ||||
| @@ -621,8 +626,12 @@ void aclnn_sin(ggml_backend_cann_context& ctx, aclTensor* acl_src, | ||||
|  * @param acl_src1 Output pointer to the created ACL tensor corresponding to src1. | ||||
|  * @param acl_dst  Output pointer to the created ACL tensor corresponding to dst. | ||||
|  */ | ||||
| void bcast_shape(ggml_tensor * src0, ggml_tensor * src1, ggml_tensor * dst, | ||||
|     aclTensor ** acl_src0, aclTensor ** acl_src1, aclTensor ** acl_dst); | ||||
| void bcast_shape(ggml_tensor * src0, | ||||
|                  ggml_tensor * src1, | ||||
|                  ggml_tensor * dst, | ||||
|                  aclTensor **  acl_src0, | ||||
|                  aclTensor **  acl_src1, | ||||
|                  aclTensor **  acl_dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Computes the 1D transposed convolution (deconvolution) of a ggml | ||||
| @@ -637,7 +646,7 @@ void bcast_shape(ggml_tensor * src0, ggml_tensor * src1, ggml_tensor * dst, | ||||
|  * @param dst The destination tensor where the transposed convolution result | ||||
|  * will be stored. dst->op is `GGML_OP_CONV_TRANSPOSE_1D`. | ||||
|  */ | ||||
| void ggml_cann_conv_transpose_1d(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
| void ggml_cann_conv_transpose_1d(ggml_backend_cann_context & ctx, ggml_tensor * dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Applies the ELU (Exponential Linear Unit) activation to a ggml tensor | ||||
| @@ -662,7 +671,7 @@ void ggml_cann_conv_transpose_1d(ggml_backend_cann_context& ctx, ggml_tensor* ds | ||||
|  * @param dst The destination tensor where the ELU-activated result will be stored. | ||||
|  *            dst->op is expected to be `GGML_OP_ELU`. | ||||
|  */ | ||||
| void ggml_cann_elu(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
| void ggml_cann_elu(ggml_backend_cann_context & ctx, ggml_tensor * dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Computes the mean of a ggml tensor element-wise using the CANN backend. | ||||
| @@ -677,7 +686,7 @@ void ggml_cann_elu(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  * @param dst The destination tensor where the mean result will be stored. | ||||
|  *            dst->op is expected to be `GGML_OP_MEAN`. | ||||
|  */ | ||||
| void ggml_cann_mean(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
| void ggml_cann_mean(ggml_backend_cann_context & ctx, ggml_tensor * dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Applies 1D reflect padding to a ggml tensor using the CANN backend. | ||||
| @@ -692,7 +701,7 @@ void ggml_cann_mean(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  * @param dst The destination tensor where the padded result will be stored. | ||||
|  *            dst->op is expected to be `GGML_OP_PAD_REFLECT_1D`. | ||||
|  */ | ||||
| void ggml_cann_pad_reflect_1d(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
| void ggml_cann_pad_reflect_1d(ggml_backend_cann_context & ctx, ggml_tensor * dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Counts the number of equal elements in two ggml tensors using the CANN backend. | ||||
| @@ -708,7 +717,7 @@ void ggml_cann_pad_reflect_1d(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  * @param dst The destination tensor where the result will be stored. | ||||
|  *            dst->op is expected to be `GGML_OP_COUNT_EQUAL`. | ||||
|  */ | ||||
| void ggml_cann_count_equal(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
| void ggml_cann_count_equal(ggml_backend_cann_context & ctx, ggml_tensor * dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Applies the Step activation function to a ggml tensor using the CANN backend. | ||||
| @@ -723,7 +732,7 @@ void ggml_cann_count_equal(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  * @param dst The destination tensor where the result will be stored. | ||||
|  *            dst->op is expected to be `GGML_OP_STEP`. | ||||
|  */ | ||||
| void ggml_cann_step(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
| void ggml_cann_step(ggml_backend_cann_context & ctx, ggml_tensor * dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Performs the Flash Attention extended operator using the CANN backend. | ||||
| @@ -738,59 +747,46 @@ void ggml_cann_step(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  * @param dst The destination tensor where the result will be stored. | ||||
|  *            dst->op is expected to be `GGML_OP_FLASH_ATTN_EXT`. | ||||
|  */ | ||||
| void ggml_cann_flash_attn_ext(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
| void ggml_cann_flash_attn_ext(ggml_backend_cann_context & ctx, ggml_tensor * dst); | ||||
|  | ||||
| /* | ||||
|  * @brief A generic wrapper for ACL resources with custom deleter support. | ||||
|  */ | ||||
| using any_acl_resource = std::unique_ptr<void, std::function<void(void*)>>; | ||||
| using any_acl_resource = std::unique_ptr<void, std::function<void(void *)>>; | ||||
|  | ||||
| /** | ||||
|  * @brief Trait structure used to define how to destroy a given ACL resource type. | ||||
|  * | ||||
|  * @tparam T ACL resource type. | ||||
|  */ | ||||
| template<typename T> | ||||
| struct acl_resource_traits; | ||||
| template <typename T> struct acl_resource_traits; | ||||
|  | ||||
| /** | ||||
|  * @brief Specialization for aclTensor, defines how to destroy an aclTensor resource. | ||||
|  */ | ||||
| template<> | ||||
| struct acl_resource_traits<aclTensor> { | ||||
|     static void destroy(void* p) { | ||||
|         ACL_CHECK(aclDestroyTensor(static_cast<aclTensor*>(p))); | ||||
|     } | ||||
| template <> struct acl_resource_traits<aclTensor> { | ||||
|     static void destroy(void * p) { ACL_CHECK(aclDestroyTensor(static_cast<aclTensor *>(p))); } | ||||
| }; | ||||
|  | ||||
| /** | ||||
|  * @brief Specialization for aclIntArray, defines how to destroy an aclIntArray resource. | ||||
|  */ | ||||
| template<> | ||||
| struct acl_resource_traits<aclIntArray> { | ||||
|     static void destroy(void* p) { | ||||
|         ACL_CHECK(aclDestroyIntArray(static_cast<aclIntArray*>(p))); | ||||
|     } | ||||
| template <> struct acl_resource_traits<aclIntArray> { | ||||
|     static void destroy(void * p) { ACL_CHECK(aclDestroyIntArray(static_cast<aclIntArray *>(p))); } | ||||
| }; | ||||
|  | ||||
| /** | ||||
|  * @brief Specialization for aclScalar, defines how to destroy an aclScalar resource. | ||||
|  */ | ||||
| template<> | ||||
| struct acl_resource_traits<aclScalar> { | ||||
|     static void destroy(void* p) { | ||||
|         ACL_CHECK(aclDestroyScalar(static_cast<aclScalar*>(p))); | ||||
|     } | ||||
| template <> struct acl_resource_traits<aclScalar> { | ||||
|     static void destroy(void * p) { ACL_CHECK(aclDestroyScalar(static_cast<aclScalar *>(p))); } | ||||
| }; | ||||
|  | ||||
| /** | ||||
|  * @brief Specialization for aclTensorList, defines how to destroy an aclTensorList resource. | ||||
|  */ | ||||
| template<> | ||||
| struct acl_resource_traits<aclTensorList> { | ||||
|     static void destroy(void* p) { | ||||
|         ACL_CHECK(aclDestroyTensorList(static_cast<aclTensorList*>(p))); | ||||
|     } | ||||
| template <> struct acl_resource_traits<aclTensorList> { | ||||
|     static void destroy(void * p) { ACL_CHECK(aclDestroyTensorList(static_cast<aclTensorList *>(p))); } | ||||
| }; | ||||
|  | ||||
| /** | ||||
| @@ -800,14 +796,8 @@ struct acl_resource_traits<aclTensorList> { | ||||
|  * @param ptr Raw pointer to ACL resource. | ||||
|  * @return any_acl_resource Smart pointer that handles destruction. | ||||
|  */ | ||||
| template<typename T> | ||||
| any_acl_resource make_acl_resource(T* ptr) { | ||||
|     return any_acl_resource( | ||||
|         static_cast<void*>(ptr), | ||||
|         [](void* p) { | ||||
|             acl_resource_traits<T>::destroy(p); | ||||
|         } | ||||
|     ); | ||||
| template <typename T> any_acl_resource make_acl_resource(T * ptr) { | ||||
|     return any_acl_resource(static_cast<void *>(ptr), [](void * p) { acl_resource_traits<T>::destroy(p); }); | ||||
| } | ||||
|  | ||||
| /** | ||||
| @@ -817,8 +807,7 @@ any_acl_resource make_acl_resource(T* ptr) { | ||||
|  * @param vec Target vector to hold ACL resources. | ||||
|  * @param args Raw pointers to ACL resources. | ||||
|  */ | ||||
| template<typename... Args> | ||||
| void register_acl_resources(std::vector<any_acl_resource>& vec, Args*... args) { | ||||
| template <typename... Args> void register_acl_resources(std::vector<any_acl_resource> & vec, Args *... args) { | ||||
|     (vec.emplace_back(make_acl_resource(args)), ...); | ||||
| } | ||||
|  | ||||
| @@ -826,39 +815,36 @@ void register_acl_resources(std::vector<any_acl_resource>& vec, Args*... args) { | ||||
|  * @brief Task class that wraps the execution of an aclnn function call. | ||||
|  */ | ||||
| class aclnn_task : public cann_task { | ||||
|     public: | ||||
|         aclnn_task(aclnn_func_t aclnn_func, void * workspace_addr, | ||||
|                    uint64_t workspace_size, aclOpExecutor * executor, | ||||
|                    aclrtStream stream) : | ||||
|             aclnn_func_(aclnn_func), | ||||
|             workspace_addr_(workspace_addr), | ||||
|             workspace_size_(workspace_size), | ||||
|             executor_(executor), | ||||
|             stream_(stream) {} | ||||
|         virtual void run_task() override { | ||||
|             ACL_CHECK(aclnn_func_(workspace_addr_, workspace_size_, executor_, stream_)); | ||||
|         } | ||||
|     private: | ||||
|         aclnn_func_t aclnn_func_; | ||||
|         void *          workspace_addr_; | ||||
|         uint64_t        workspace_size_; | ||||
|         aclOpExecutor * executor_; | ||||
|         aclrtStream     stream_; | ||||
|   public: | ||||
|     aclnn_task(aclnn_func_t    aclnn_func, | ||||
|                void *          workspace_addr, | ||||
|                uint64_t        workspace_size, | ||||
|                aclOpExecutor * executor, | ||||
|                aclrtStream     stream) : | ||||
|         aclnn_func_(aclnn_func), | ||||
|         workspace_addr_(workspace_addr), | ||||
|         workspace_size_(workspace_size), | ||||
|         executor_(executor), | ||||
|         stream_(stream) {} | ||||
|  | ||||
|     virtual void run_task() override { ACL_CHECK(aclnn_func_(workspace_addr_, workspace_size_, executor_, stream_)); } | ||||
|   private: | ||||
|     aclnn_func_t    aclnn_func_; | ||||
|     void *          workspace_addr_; | ||||
|     uint64_t        workspace_size_; | ||||
|     aclOpExecutor * executor_; | ||||
|     aclrtStream     stream_; | ||||
| }; | ||||
|  | ||||
| /** | ||||
|  * @brief Task class that releases ACL resources after usage. | ||||
|  */ | ||||
| class release_resource_task : public cann_task { | ||||
| public: | ||||
|     release_resource_task(std::vector<any_acl_resource>&& resources){ | ||||
|         resource_ = std::move(resources); | ||||
|     } | ||||
|   public: | ||||
|     release_resource_task(std::vector<any_acl_resource> && resources) { resource_ = std::move(resources); } | ||||
|  | ||||
|     virtual void run_task() override { | ||||
|         resource_.clear(); | ||||
|     } | ||||
| private: | ||||
|     virtual void run_task() override { resource_.clear(); } | ||||
|   private: | ||||
|     std::vector<any_acl_resource> resource_; | ||||
| }; | ||||
|  | ||||
| @@ -866,38 +852,40 @@ private: | ||||
|  * @brief Task class for performing asynchronous memory copy operations. | ||||
|  */ | ||||
| class async_memcpy_task : public cann_task { | ||||
| public: | ||||
|     async_memcpy_task(void* dst, const void* src, size_t size, | ||||
|                       aclrtMemcpyKind kind, aclrtStream stream) | ||||
|         : dst_(dst), src_(src), size_(size), kind_(kind), stream_(stream) {} | ||||
|   public: | ||||
|     async_memcpy_task(void * dst, const void * src, size_t size, aclrtMemcpyKind kind, aclrtStream stream) : | ||||
|         dst_(dst), | ||||
|         src_(src), | ||||
|         size_(size), | ||||
|         kind_(kind), | ||||
|         stream_(stream) {} | ||||
|  | ||||
|     virtual void run_task() override { | ||||
|         ACL_CHECK(aclrtMemcpyAsync(dst_, size_, src_, size_, kind_, stream_)); | ||||
|     } | ||||
| private: | ||||
|     void* dst_; | ||||
|     const void* src_; | ||||
|     size_t size_; | ||||
|     virtual void run_task() override { ACL_CHECK(aclrtMemcpyAsync(dst_, size_, src_, size_, kind_, stream_)); } | ||||
|   private: | ||||
|     void *          dst_; | ||||
|     const void *    src_; | ||||
|     size_t          size_; | ||||
|     aclrtMemcpyKind kind_; | ||||
|     aclrtStream stream_; | ||||
|     aclrtStream     stream_; | ||||
| }; | ||||
|  | ||||
| /** | ||||
|  * @brief Task class for performing asynchronous memory set operations. | ||||
|  */ | ||||
| class async_memset_task : public cann_task { | ||||
|     public: | ||||
|     async_memset_task(void* buffer, size_t size, int32_t value, aclrtStream stream) | ||||
|             : buffer_(buffer), size_(size), value_(value), stream_(stream) {} | ||||
|   public: | ||||
|     async_memset_task(void * buffer, size_t size, int32_t value, aclrtStream stream) : | ||||
|         buffer_(buffer), | ||||
|         size_(size), | ||||
|         value_(value), | ||||
|         stream_(stream) {} | ||||
|  | ||||
|         virtual void run_task() override { | ||||
|             ACL_CHECK(aclrtMemsetAsync(buffer_, size_, value_, size_, stream_)); | ||||
|         } | ||||
|     private: | ||||
|         void* buffer_; | ||||
|         size_t size_; | ||||
|         int32_t value_; | ||||
|         aclrtStream stream_; | ||||
|     virtual void run_task() override { ACL_CHECK(aclrtMemsetAsync(buffer_, size_, value_, size_, stream_)); } | ||||
|   private: | ||||
|     void *      buffer_; | ||||
|     size_t      size_; | ||||
|     int32_t     value_; | ||||
|     aclrtStream stream_; | ||||
| }; | ||||
|  | ||||
| /** | ||||
| @@ -918,25 +906,24 @@ class async_memset_task : public cann_task { | ||||
|  * same stream are executed in queue order. | ||||
|  */ | ||||
|  | ||||
| #define GGML_CANN_CALL_ACLNN_OP(CTX, OP_NAME, ...)                                          \ | ||||
|     do {                                                                                    \ | ||||
|         uint64_t        workspaceSize = 0;                                                  \ | ||||
|         aclOpExecutor * executor;                                                           \ | ||||
|         void *          workspaceAddr = nullptr;                                            \ | ||||
|         ACL_CHECK(aclnn##OP_NAME##GetWorkspaceSize(__VA_ARGS__, &workspaceSize, &executor));\ | ||||
|         /* workspace should alloced in main thread to keep malloc order when using vmm. */  \ | ||||
|         if (workspaceSize > 0) {                                                            \ | ||||
|             ggml_cann_pool_alloc workspace_allocator(CTX.pool(), workspaceSize);            \ | ||||
|             workspaceAddr = workspace_allocator.get();                                      \ | ||||
|         }                                                                                   \ | ||||
|         if (CTX.async_mode) {                                                               \ | ||||
|             auto task =                                                                     \ | ||||
|                 std::make_unique<aclnn_task>(aclnn##OP_NAME, workspaceAddr, workspaceSize,  \ | ||||
|                     executor, CTX.stream()); \ | ||||
|             CTX.task_queue.submit_task(std::move(task));                                    \ | ||||
|         } else {                                                                            \ | ||||
|             ACL_CHECK(aclnn##OP_NAME(workspaceAddr, workspaceSize, executor, CTX.stream()));\ | ||||
|         }                                                                                   \ | ||||
| #define GGML_CANN_CALL_ACLNN_OP(CTX, OP_NAME, ...)                                                                  \ | ||||
|     do {                                                                                                            \ | ||||
|         uint64_t        workspaceSize = 0;                                                                          \ | ||||
|         aclOpExecutor * executor;                                                                                   \ | ||||
|         void *          workspaceAddr = nullptr;                                                                    \ | ||||
|         ACL_CHECK(aclnn##OP_NAME##GetWorkspaceSize(__VA_ARGS__, &workspaceSize, &executor));                        \ | ||||
|         /* workspace should alloced in main thread to keep malloc order when using vmm. */                          \ | ||||
|         if (workspaceSize > 0) {                                                                                    \ | ||||
|             ggml_cann_pool_alloc workspace_allocator(CTX.pool(), workspaceSize);                                    \ | ||||
|             workspaceAddr = workspace_allocator.get();                                                              \ | ||||
|         }                                                                                                           \ | ||||
|         if (CTX.async_mode) {                                                                                       \ | ||||
|             auto task =                                                                                             \ | ||||
|                 std::make_unique<aclnn_task>(aclnn##OP_NAME, workspaceAddr, workspaceSize, executor, CTX.stream()); \ | ||||
|             CTX.task_queue.submit_task(std::move(task));                                                            \ | ||||
|         } else {                                                                                                    \ | ||||
|             ACL_CHECK(aclnn##OP_NAME(workspaceAddr, workspaceSize, executor, CTX.stream()));                        \ | ||||
|         }                                                                                                           \ | ||||
|     } while (0) | ||||
|  | ||||
| /** | ||||
| @@ -947,11 +934,10 @@ class async_memset_task : public cann_task { | ||||
|  * @param ctx Backend context which manages task submission and async mode. | ||||
|  * @param args Pointers to ACL resources to be released. | ||||
|  */ | ||||
| template <typename... Args> | ||||
| void ggml_cann_release_resources(ggml_backend_cann_context & ctx, Args &&... args) { | ||||
| template <typename... Args> void ggml_cann_release_resources(ggml_backend_cann_context & ctx, Args &&... args) { | ||||
|     std::vector<any_acl_resource> resources; | ||||
|     register_acl_resources(resources, std::forward<Args>(args)...); | ||||
|     if(ctx.async_mode) { | ||||
|     if (ctx.async_mode) { | ||||
|         auto task = std::make_unique<release_resource_task>(std::move(resources)); | ||||
|         ctx.task_queue.submit_task(std::move(task)); | ||||
|     } | ||||
| @@ -966,8 +952,11 @@ void ggml_cann_release_resources(ggml_backend_cann_context & ctx, Args &&... arg | ||||
|  * @param len Size of memory to copy (in bytes). | ||||
|  * @param kind Type of memory copy (host-to-device, device-to-host, etc). | ||||
|  */ | ||||
| inline void ggml_cann_async_memcpy(ggml_backend_cann_context & ctx, void * dst, | ||||
|                                    const void * src, size_t len, aclrtMemcpyKind kind) { | ||||
| inline void ggml_cann_async_memcpy(ggml_backend_cann_context & ctx, | ||||
|                                    void *                      dst, | ||||
|                                    const void *                src, | ||||
|                                    size_t                      len, | ||||
|                                    aclrtMemcpyKind             kind) { | ||||
|     if (ctx.async_mode) { | ||||
|         auto task = std::make_unique<async_memcpy_task>(dst, const_cast<void *>(src), len, kind, ctx.stream()); | ||||
|         ctx.task_queue.submit_task(std::move(task)); | ||||
| @@ -976,8 +965,11 @@ inline void ggml_cann_async_memcpy(ggml_backend_cann_context & ctx, void * dst, | ||||
|     } | ||||
| } | ||||
|  | ||||
| inline void ggml_cann_async_memcpy(ggml_backend_cann_context * ctx, void * dst, | ||||
|                                    const void * src, size_t len, aclrtMemcpyKind kind) { | ||||
| inline void ggml_cann_async_memcpy(ggml_backend_cann_context * ctx, | ||||
|                                    void *                      dst, | ||||
|                                    const void *                src, | ||||
|                                    size_t                      len, | ||||
|                                    aclrtMemcpyKind             kind) { | ||||
|     if (ctx->async_mode) { | ||||
|         auto task = std::make_unique<async_memcpy_task>(dst, const_cast<void *>(src), len, kind, ctx->stream()); | ||||
|         ctx->task_queue.submit_task(std::move(task)); | ||||
| @@ -994,8 +986,7 @@ inline void ggml_cann_async_memcpy(ggml_backend_cann_context * ctx, void * dst, | ||||
|  * @param size Size of the memory buffer (in bytes). | ||||
|  * @param value Value to set in the buffer. | ||||
|  */ | ||||
| inline void ggml_cann_async_memset(ggml_backend_cann_context & ctx, void * buffer, | ||||
|                                    size_t size, int value) { | ||||
| inline void ggml_cann_async_memset(ggml_backend_cann_context & ctx, void * buffer, size_t size, int value) { | ||||
|     if (ctx.async_mode) { | ||||
|         auto task = std::make_unique<async_memset_task>(buffer, size, value, ctx.stream()); | ||||
|         ctx.task_queue.submit_task(std::move(task)); | ||||
| @@ -1029,7 +1020,7 @@ inline void ggml_cann_async_memset(ggml_backend_cann_context & ctx, void * buffe | ||||
|  * @param dst The destination tensor where the expert-weighted token outputs are stored. | ||||
|  *            Expected to be of shape [M, K, N, 1]. | ||||
|  */ | ||||
| void ggml_cann_mul_mat_id(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
| void ggml_cann_mul_mat_id(ggml_backend_cann_context & ctx, ggml_tensor * dst); | ||||
|  | ||||
| /** | ||||
|  * @brief   Check whether a tensor is a weight tensor for matrix multiplication. | ||||
| @@ -1041,20 +1032,14 @@ void ggml_cann_mul_mat_id(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
|  * | ||||
|  * @param tensor Pointer to the target ggml_tensor object (const-qualified). | ||||
|  */ | ||||
| static bool is_matmul_weight(const ggml_tensor* tensor) { | ||||
|     std::string name = ggml_get_name(tensor); | ||||
|     static const std::unordered_set<std::string> weight_suffixes{ | ||||
|         "output.weight", | ||||
|         "attn_q.weight", | ||||
|         "attn_k.weight", | ||||
|         "attn_v.weight", | ||||
|         "attn_output.weight", | ||||
|         "ffn_gate.weight", | ||||
|         "ffn_up.weight", | ||||
|         "ffn_down.weight" | ||||
|     }; | ||||
| static bool is_matmul_weight(const ggml_tensor * tensor) { | ||||
|     std::string                                  name = ggml_get_name(tensor); | ||||
|     static const std::unordered_set<std::string> weight_suffixes{ "output.weight",      "attn_q.weight", | ||||
|                                                                   "attn_k.weight",      "attn_v.weight", | ||||
|                                                                   "attn_output.weight", "ffn_gate.weight", | ||||
|                                                                   "ffn_up.weight",      "ffn_down.weight" }; | ||||
|  | ||||
|     for (const auto& suffix : weight_suffixes) { | ||||
|     for (const auto & suffix : weight_suffixes) { | ||||
|         if (name.find(suffix) != std::string::npos) { | ||||
|             return true; | ||||
|         } | ||||
| @@ -1078,14 +1063,13 @@ static bool is_matmul_weight(const ggml_tensor* tensor) { | ||||
|  * @param ctx The CANN backend context used to manage execution and resources. | ||||
|  * @param dst The destination tensor. | ||||
|  */ | ||||
| template <auto binary_op> | ||||
| void ggml_cann_binary_op(ggml_backend_cann_context& ctx, ggml_tensor* dst) { | ||||
|     ggml_tensor* src0 = dst->src[0]; | ||||
|     ggml_tensor* src1 = dst->src[1]; | ||||
| template <auto binary_op> void ggml_cann_binary_op(ggml_backend_cann_context & ctx, ggml_tensor * dst) { | ||||
|     ggml_tensor * src0 = dst->src[0]; | ||||
|     ggml_tensor * src1 = dst->src[1]; | ||||
|  | ||||
|     aclTensor* acl_src0; | ||||
|     aclTensor* acl_src1; | ||||
|     aclTensor* acl_dst; | ||||
|     aclTensor * acl_src0; | ||||
|     aclTensor * acl_src1; | ||||
|     aclTensor * acl_dst; | ||||
|  | ||||
|     // Need bcast | ||||
|     bcast_shape(src0, src1, dst, &acl_src0, &acl_src1, &acl_dst); | ||||
| @@ -1094,7 +1078,6 @@ void ggml_cann_binary_op(ggml_backend_cann_context& ctx, ggml_tensor* dst) { | ||||
|     ggml_cann_release_resources(ctx, acl_src0, acl_src1, acl_dst); | ||||
| } | ||||
|  | ||||
|  | ||||
| /** | ||||
|  * @brief Applies a unary operation to an input tensor using the CANN backend. | ||||
|  * | ||||
| @@ -1107,12 +1090,12 @@ void ggml_cann_binary_op(ggml_backend_cann_context& ctx, ggml_tensor* dst) { | ||||
|  * @param ctx The CANN backend context for managing resources and execution. | ||||
|  * @param dst The destination tensor. Its src[0] is treated as the input tensor. | ||||
|  */ | ||||
| template <void unary_op(ggml_backend_cann_context&, aclTensor*, aclTensor*)> | ||||
|     void ggml_cann_op_unary(ggml_backend_cann_context& ctx, ggml_tensor* dst) { | ||||
|     ggml_tensor* src = dst->src[0]; | ||||
| template <void unary_op(ggml_backend_cann_context &, aclTensor *, aclTensor *)> | ||||
| void ggml_cann_op_unary(ggml_backend_cann_context & ctx, ggml_tensor * dst) { | ||||
|     ggml_tensor * src = dst->src[0]; | ||||
|  | ||||
|     aclTensor* acl_src = ggml_cann_create_tensor(src); | ||||
|     aclTensor* acl_dst = ggml_cann_create_tensor(dst); | ||||
|     aclTensor * acl_src = ggml_cann_create_tensor(src); | ||||
|     aclTensor * acl_dst = ggml_cann_create_tensor(dst); | ||||
|  | ||||
|     unary_op(ctx, acl_src, acl_dst); | ||||
|     ggml_cann_release_resources(ctx, acl_src, acl_dst); | ||||
| @@ -1138,9 +1121,9 @@ template <void unary_op(ggml_backend_cann_context&, aclTensor*, aclTensor*)> | ||||
|  * | ||||
|  * @see GGML_CANN_CALL_OP_UNARY | ||||
|  */ | ||||
| void ggml_cann_op_unary( | ||||
|     std::function<void(ggml_backend_cann_context&, aclTensor*, aclTensor*)> unary_op, | ||||
|     ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
| void ggml_cann_op_unary(std::function<void(ggml_backend_cann_context &, aclTensor *, aclTensor *)> unary_op, | ||||
|                         ggml_backend_cann_context &                                                ctx, | ||||
|                         ggml_tensor *                                                              dst); | ||||
|  | ||||
| /** | ||||
|  * @brief Applies a gated (GLU-style) unary operation using the CANN backend. | ||||
| @@ -1172,9 +1155,9 @@ void ggml_cann_op_unary( | ||||
|  * | ||||
|  * @see GGML_CANN_CALL_OP_UNARY_GATED | ||||
|  */ | ||||
| void ggml_cann_op_unary_gated( | ||||
|     std::function<void(ggml_backend_cann_context&, aclTensor*, aclTensor*)> unary_op, | ||||
|     ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||
| void ggml_cann_op_unary_gated(std::function<void(ggml_backend_cann_context &, aclTensor *, aclTensor *)> unary_op, | ||||
|                               ggml_backend_cann_context &                                                ctx, | ||||
|                               ggml_tensor *                                                              dst); | ||||
|  | ||||
| /** | ||||
|  * @brief Helper macro to call a unary ACL operator via ggml_cann_op_unary. | ||||
| @@ -1197,16 +1180,13 @@ void ggml_cann_op_unary_gated( | ||||
|  * @see ggml_cann_op_unary | ||||
|  * @see GGML_CANN_CALL_ACLNN_OP | ||||
|  */ | ||||
| #define GGML_CANN_CALL_OP_UNARY(OP_NAME)                              \ | ||||
|     do {                                                              \ | ||||
|         auto lambda = [](ggml_backend_cann_context& ctx,              \ | ||||
|             aclTensor* acl_src,                                       \ | ||||
|             aclTensor* acl_dst) {                                     \ | ||||
|             GGML_CANN_CALL_ACLNN_OP(ctx, OP_NAME, acl_src, acl_dst);  \ | ||||
|         };                                                            \ | ||||
|         ggml_cann_op_unary(lambda, ctx, dst);                         \ | ||||
|     }                                                                 \ | ||||
|     while (0) | ||||
| #define GGML_CANN_CALL_OP_UNARY(OP_NAME)                                                              \ | ||||
|     do {                                                                                              \ | ||||
|         auto lambda = [](ggml_backend_cann_context & ctx, aclTensor * acl_src, aclTensor * acl_dst) { \ | ||||
|             GGML_CANN_CALL_ACLNN_OP(ctx, OP_NAME, acl_src, acl_dst);                                  \ | ||||
|         };                                                                                            \ | ||||
|         ggml_cann_op_unary(lambda, ctx, dst);                                                         \ | ||||
|     } while (0) | ||||
|  | ||||
| /** | ||||
|  * @brief Helper macro to call a gated unary ACL operator via ggml_cann_op_unary_gated. | ||||
| @@ -1229,15 +1209,12 @@ void ggml_cann_op_unary_gated( | ||||
|  * @see ggml_cann_op_unary_gated | ||||
|  * @see GGML_CANN_CALL_ACLNN_OP | ||||
|  */ | ||||
| #define GGML_CANN_CALL_OP_UNARY_GATED(OP_NAME)                        \ | ||||
|     do {                                                              \ | ||||
|         auto lambda = [](ggml_backend_cann_context& ctx,              \ | ||||
|             aclTensor* acl_src,                                       \ | ||||
|             aclTensor* acl_dst) {                                     \ | ||||
|             GGML_CANN_CALL_ACLNN_OP(ctx, OP_NAME, acl_src, acl_dst);  \ | ||||
|         };                                                            \ | ||||
|         ggml_cann_op_unary_gated(lambda, ctx, dst);                   \ | ||||
|     }                                                                 \ | ||||
|     while (0) | ||||
| #define GGML_CANN_CALL_OP_UNARY_GATED(OP_NAME)                                                        \ | ||||
|     do {                                                                                              \ | ||||
|         auto lambda = [](ggml_backend_cann_context & ctx, aclTensor * acl_src, aclTensor * acl_dst) { \ | ||||
|             GGML_CANN_CALL_ACLNN_OP(ctx, OP_NAME, acl_src, acl_dst);                                  \ | ||||
|         };                                                                                            \ | ||||
|         ggml_cann_op_unary_gated(lambda, ctx, dst);                                                   \ | ||||
|     } while (0) | ||||
|  | ||||
| #endif  // CANN_ACLNN_OPS | ||||
|   | ||||
							
								
								
									
										191
									
								
								ggml/src/ggml-cann/common.h
									
									
									
									
									
										
										
										Executable file → Normal file
									
								
							
							
						
						
									
										191
									
								
								ggml/src/ggml-cann/common.h
									
									
									
									
									
										
										
										Executable file → Normal file
									
								
							| @@ -44,7 +44,7 @@ | ||||
| #include "../include/ggml.h" | ||||
| #include "../ggml-impl.h" | ||||
|  | ||||
| #define MATRIX_ROW_PADDING 512 | ||||
| #define MATRIX_ROW_PADDING    512 | ||||
| #define GGML_CANN_MAX_STREAMS 8 | ||||
|  | ||||
| /** | ||||
| @@ -56,8 +56,7 @@ | ||||
|  * @param line The line number at which the error occurred. | ||||
|  * @param msg The error message. | ||||
|  */ | ||||
| [[noreturn]] void ggml_cann_error(const char* stmt, const char* func, | ||||
|                                   const char* file, int line, const char* msg); | ||||
| [[noreturn]] void ggml_cann_error(const char * stmt, const char * func, const char * file, int line, const char * msg); | ||||
|  | ||||
| /** | ||||
|  * @brief Checks the result of a CANN function call and invokes the error | ||||
| @@ -89,25 +88,24 @@ struct ggml_cann_device_info { | ||||
|      * @brief Information about a single CANN device. | ||||
|      */ | ||||
|     struct cann_device_info { | ||||
|         int cc;                 /**< Compute capability.                   */ | ||||
|         int    cc;              /**< Compute capability.                   */ | ||||
|         size_t smpb;            /**< Maximum shared memory per block.      */ | ||||
|         bool vmm;               /**< Virtual memory support.               */ | ||||
|         bool   vmm;             /**< Virtual memory support.               */ | ||||
|         size_t vmm_granularity; /**< Granularity of virtual memory.        */ | ||||
|         size_t total_vram;      /**< Total video RAM available on the device. */ | ||||
|     }; | ||||
|  | ||||
|     cann_device_info devices[GGML_CANN_MAX_DEVICES] = | ||||
|         {}; /**< Array of CANN device information. */ | ||||
|     cann_device_info devices[GGML_CANN_MAX_DEVICES] = {}; /**< Array of CANN device information. */ | ||||
| }; | ||||
|  | ||||
| const ggml_cann_device_info& ggml_cann_info(); | ||||
| const ggml_cann_device_info & ggml_cann_info(); | ||||
|  | ||||
| void ggml_cann_set_device(int32_t device); | ||||
| void    ggml_cann_set_device(int32_t device); | ||||
| int32_t ggml_cann_get_device(); | ||||
|  | ||||
| std::optional<std::string> get_env(const std::string& name); | ||||
| bool parse_bool(const std::string& value); | ||||
| int parse_integer(const std::string& value); | ||||
| std::optional<std::string> get_env(const std::string & name); | ||||
| bool                       parse_bool(const std::string & value); | ||||
| int                        parse_integer(const std::string & value); | ||||
|  | ||||
| /** | ||||
|  * @brief Abstract base class for memory pools used by CANN. | ||||
| @@ -126,7 +124,7 @@ struct ggml_cann_pool { | ||||
|      *                     will be stored. | ||||
|      * @return             Pointer to the allocated memory block. | ||||
|      */ | ||||
|     virtual void* alloc(size_t size, size_t* actual_size) = 0; | ||||
|     virtual void * alloc(size_t size, size_t * actual_size) = 0; | ||||
|  | ||||
|     /** | ||||
|      * @brief Frees a previously allocated memory block. | ||||
| @@ -136,16 +134,16 @@ struct ggml_cann_pool { | ||||
|      * @note Note that all CANN opertors are running async. Make sure memory is | ||||
|      *       still avaiable before this operator finished. | ||||
|      */ | ||||
|     virtual void free(void* ptr, size_t size) = 0; | ||||
|     virtual void free(void * ptr, size_t size) = 0; | ||||
| }; | ||||
|  | ||||
| /** | ||||
|  * @brief RAII wrapper for managing memory allocations from a CANN memory pool. | ||||
|  */ | ||||
| struct ggml_cann_pool_alloc { | ||||
|     ggml_cann_pool* pool = nullptr; /**< Pointer to the memory pool. */ | ||||
|     void* ptr = nullptr;    /**< Pointer to the allocated memory block. */ | ||||
|     size_t actual_size = 0; /**< Actual size of the allocated memory block. */ | ||||
|     ggml_cann_pool * pool        = nullptr; /**< Pointer to the memory pool. */ | ||||
|     void *           ptr         = nullptr; /**< Pointer to the allocated memory block. */ | ||||
|     size_t           actual_size = 0;       /**< Actual size of the allocated memory block. */ | ||||
|  | ||||
|     /** | ||||
|      * @brief Default constructor. | ||||
| @@ -156,16 +154,14 @@ struct ggml_cann_pool_alloc { | ||||
|      * @brief Constructor that initializes the memory pool. | ||||
|      * @param pool Reference to the memory pool. | ||||
|      */ | ||||
|     explicit ggml_cann_pool_alloc(ggml_cann_pool& pool) : pool(&pool) {} | ||||
|     explicit ggml_cann_pool_alloc(ggml_cann_pool & pool) : pool(&pool) {} | ||||
|  | ||||
|     /** | ||||
|      * @brief Constructor that initializes the memory pool and allocates memory. | ||||
|      * @param pool Reference to the memory pool. | ||||
|      * @param size Size of the memory block to allocate. | ||||
|      */ | ||||
|     ggml_cann_pool_alloc(ggml_cann_pool& pool, size_t size) : pool(&pool) { | ||||
|         alloc(size); | ||||
|     } | ||||
|     ggml_cann_pool_alloc(ggml_cann_pool & pool, size_t size) : pool(&pool) { alloc(size); } | ||||
|  | ||||
|     /** | ||||
|      * @brief Destructor that frees the allocated memory block. | ||||
| @@ -181,7 +177,7 @@ struct ggml_cann_pool_alloc { | ||||
|      * @param size Size of the memory block to allocate. | ||||
|      * @return Pointer to the allocated memory block. | ||||
|      */ | ||||
|     void* alloc(size_t size) { | ||||
|     void * alloc(size_t size) { | ||||
|         GGML_ASSERT(pool != nullptr); | ||||
|         GGML_ASSERT(ptr == nullptr); | ||||
|         ptr = pool->alloc(size, &this->actual_size); | ||||
| @@ -194,7 +190,7 @@ struct ggml_cann_pool_alloc { | ||||
|      * @param size Size of the memory block to allocate. | ||||
|      * @return Pointer to the allocated memory block. | ||||
|      */ | ||||
|     void* alloc(ggml_cann_pool& pool, size_t size) { | ||||
|     void * alloc(ggml_cann_pool & pool, size_t size) { | ||||
|         this->pool = &pool; | ||||
|         return alloc(size); | ||||
|     } | ||||
| @@ -203,25 +199,25 @@ struct ggml_cann_pool_alloc { | ||||
|      * @brief Gets the pointer to the allocated memory block. | ||||
|      * @return Pointer to the allocated memory block. | ||||
|      */ | ||||
|     void* get() { return ptr; } | ||||
|     void * get() { return ptr; } | ||||
|  | ||||
|     // Deleted copy constructor | ||||
|     ggml_cann_pool_alloc(const ggml_cann_pool_alloc&) = delete; | ||||
|     ggml_cann_pool_alloc(const ggml_cann_pool_alloc &) = delete; | ||||
|  | ||||
|     // Deleted move constructor | ||||
|     ggml_cann_pool_alloc(ggml_cann_pool_alloc&&) = delete; | ||||
|     ggml_cann_pool_alloc(ggml_cann_pool_alloc &&) = delete; | ||||
|  | ||||
|     // Deleted copy assignment operator | ||||
|     ggml_cann_pool_alloc& operator=(const ggml_cann_pool_alloc&) = delete; | ||||
|     ggml_cann_pool_alloc & operator=(const ggml_cann_pool_alloc &) = delete; | ||||
|  | ||||
|     // Deleted move assignment operator | ||||
|     ggml_cann_pool_alloc& operator=(ggml_cann_pool_alloc&&) = delete; | ||||
|     ggml_cann_pool_alloc & operator=(ggml_cann_pool_alloc &&) = delete; | ||||
| }; | ||||
|  | ||||
| /** | ||||
|  * @brief Function pointer type for ACLNN operator calls. | ||||
|  */ | ||||
| using aclnn_func_t = aclnnStatus (*)(void*, uint64_t, aclOpExecutor*, aclrtStream); | ||||
| using aclnn_func_t = aclnnStatus (*)(void *, uint64_t, aclOpExecutor *, aclrtStream); | ||||
|  | ||||
| /** | ||||
|  * @brief Base class for all CANN tasks to be submitted to the task queue. | ||||
| @@ -229,7 +225,7 @@ using aclnn_func_t = aclnnStatus (*)(void*, uint64_t, aclOpExecutor*, aclrtStrea | ||||
|  * Users should override the run_task() method with actual task logic. | ||||
|  */ | ||||
| class cann_task { | ||||
| public: | ||||
|   public: | ||||
|     virtual void run_task() {} | ||||
| }; | ||||
|  | ||||
| @@ -237,16 +233,20 @@ public: | ||||
|  * @brief A lock-free ring-buffer based task queue for asynchronously executing cann_task instances. | ||||
|  */ | ||||
| class cann_task_queue { | ||||
| public: | ||||
|   public: | ||||
|     /** | ||||
|      * @brief Constructs a task queue with a fixed power-of-two capacity for a specific device. | ||||
|      * | ||||
|      * @param capacity Queue capacity. Must be a power of 2. | ||||
|      * @param device Target device ID (used for context setting). | ||||
|      */ | ||||
|     explicit cann_task_queue(size_t capacity, int32_t device) | ||||
|         : buffer_(capacity), capacity_(capacity), head_(0), tail_(0), | ||||
|           running_(false), device_(device) { | ||||
|     explicit cann_task_queue(size_t capacity, int32_t device) : | ||||
|         buffer_(capacity), | ||||
|         capacity_(capacity), | ||||
|         head_(0), | ||||
|         tail_(0), | ||||
|         running_(false), | ||||
|         device_(device) { | ||||
|         GGML_ASSERT((capacity & (capacity - 1)) == 0 && "capacity must be power of 2"); | ||||
|         mask_ = capacity_ - 1; | ||||
|     } | ||||
| @@ -257,7 +257,7 @@ public: | ||||
|      * @param item Unique pointer to the task. | ||||
|      * @return true if the task was successfully enqueued, false if the queue was full. | ||||
|      */ | ||||
|     bool enqueue(std::unique_ptr<cann_task>&& item) { | ||||
|     bool enqueue(std::unique_ptr<cann_task> && item) { | ||||
|         size_t next_tail = (tail_ + 1) & mask_; | ||||
|  | ||||
|         if (next_tail == head_) { | ||||
| @@ -276,17 +276,16 @@ public: | ||||
|      * | ||||
|      * @param task Task to be submitted. | ||||
|      */ | ||||
|     void submit_task(std::unique_ptr<cann_task>&& task) { | ||||
|         while(!enqueue(std::move(task))) { | ||||
|     void submit_task(std::unique_ptr<cann_task> && task) { | ||||
|         while (!enqueue(std::move(task))) { | ||||
|             std::this_thread::yield(); | ||||
|             continue; | ||||
|         } | ||||
|  | ||||
|         if (!running_) { | ||||
|             running_ = true; | ||||
|             thread_ = std::thread(&cann_task_queue::execute, this); | ||||
|             thread_  = std::thread(&cann_task_queue::execute, this); | ||||
|         } | ||||
|  | ||||
|     } | ||||
|  | ||||
|     /** | ||||
| @@ -309,7 +308,7 @@ public: | ||||
|         } | ||||
|     } | ||||
|  | ||||
| private: | ||||
|   private: | ||||
|     /** | ||||
|      * @brief Worker thread function that continuously dequeues and executes tasks. | ||||
|      */ | ||||
| @@ -317,7 +316,7 @@ private: | ||||
|         ggml_cann_set_device(device_); | ||||
|  | ||||
|         while (running_) { | ||||
|             if(head_ == tail_) { | ||||
|             if (head_ == tail_) { | ||||
|                 std::this_thread::yield(); | ||||
|                 continue; | ||||
|             } | ||||
| @@ -330,24 +329,24 @@ private: | ||||
|     } | ||||
|  | ||||
|     std::vector<std::unique_ptr<cann_task>> buffer_; | ||||
|     const size_t capacity_; | ||||
|     size_t mask_; | ||||
|     size_t head_; | ||||
|     size_t tail_; | ||||
|     bool running_; | ||||
|     std::thread thread_; | ||||
|     int32_t device_; | ||||
|     const size_t                            capacity_; | ||||
|     size_t                                  mask_; | ||||
|     size_t                                  head_; | ||||
|     size_t                                  tail_; | ||||
|     bool                                    running_; | ||||
|     std::thread                             thread_; | ||||
|     int32_t                                 device_; | ||||
| }; | ||||
|  | ||||
| #ifdef USE_ACL_GRAPH | ||||
| struct ggml_graph_node_properties { | ||||
|     // dst tensor | ||||
|     void * node_address; | ||||
|     void *  node_address; | ||||
|     int64_t ne[GGML_MAX_DIMS]; | ||||
|     size_t nb[GGML_MAX_DIMS]; | ||||
|     size_t  nb[GGML_MAX_DIMS]; | ||||
|  | ||||
|     // src tensor | ||||
|     void * src_address[GGML_MAX_SRC]; | ||||
|     void *  src_address[GGML_MAX_SRC]; | ||||
|     int64_t src_ne[GGML_MAX_SRC][GGML_MAX_DIMS]; | ||||
|     size_t  src_nb[GGML_MAX_SRC][GGML_MAX_DIMS]; | ||||
|  | ||||
| @@ -376,13 +375,11 @@ struct ggml_cann_graph { | ||||
|  * move existing graphs to the front (most recently used), and clear the cache. | ||||
|  */ | ||||
| struct ggml_cann_graph_lru_cache { | ||||
|     size_t capacity;  /**< Maximum number of graphs in the cache. */ | ||||
|     size_t capacity;                         /**< Maximum number of graphs in the cache. */ | ||||
|  | ||||
|     std::list<ggml_cann_graph*> cache_list; /**< List storing cached graphs as raw pointers. */ | ||||
|     std::list<ggml_cann_graph *> cache_list; /**< List storing cached graphs as raw pointers. */ | ||||
|  | ||||
|     ggml_cann_graph_lru_cache() { | ||||
|         capacity = parse_integer(get_env("GGML_CANN_GRAPH_CACHE_CAPACITY").value_or("12")); | ||||
|     } | ||||
|     ggml_cann_graph_lru_cache() { capacity = parse_integer(get_env("GGML_CANN_GRAPH_CACHE_CAPACITY").value_or("12")); } | ||||
|  | ||||
|     /** | ||||
|      * @brief Push a new graph to the front of the cache. | ||||
| @@ -390,11 +387,11 @@ struct ggml_cann_graph_lru_cache { | ||||
|      * @param new_node Pointer to the new ggml_cann_graph to cache. | ||||
|      *        Ownership is transferred to the cache (cache will delete it). | ||||
|      */ | ||||
|     void push(ggml_cann_graph* new_node) { | ||||
|     void push(ggml_cann_graph * new_node) { | ||||
|         if (cache_list.size() >= capacity) { | ||||
|             ggml_cann_graph* old = cache_list.back(); | ||||
|             ggml_cann_graph * old = cache_list.back(); | ||||
|             cache_list.pop_back(); | ||||
|             delete old; // free the old graph | ||||
|             delete old;  // free the old graph | ||||
|         } | ||||
|         cache_list.push_front(new_node); | ||||
|     } | ||||
| @@ -403,7 +400,7 @@ struct ggml_cann_graph_lru_cache { | ||||
|      * @brief Move an existing graph to the front of the cache. | ||||
|      * @param node Pointer to the ggml_cann_graph to move. | ||||
|      */ | ||||
|     void move_to_front(ggml_cann_graph* node) { | ||||
|     void move_to_front(ggml_cann_graph * node) { | ||||
|         cache_list.remove(node); | ||||
|         cache_list.push_front(node); | ||||
|     } | ||||
| @@ -421,92 +418,89 @@ struct ggml_cann_graph_lru_cache { | ||||
|     /** | ||||
|      * @brief Destructor that clears the cache and frees all cached graphs. | ||||
|      */ | ||||
|     ~ggml_cann_graph_lru_cache() { | ||||
|         clear(); | ||||
|     } | ||||
|     ~ggml_cann_graph_lru_cache() { clear(); } | ||||
| }; | ||||
| #endif  // USE_ACL_GRAPH | ||||
|  | ||||
| struct ggml_cann_rope_cache { | ||||
|     ~ggml_cann_rope_cache() { | ||||
|         if(theta_scale_cache != nullptr) { | ||||
|         if (theta_scale_cache != nullptr) { | ||||
|             ACL_CHECK(aclrtFree(theta_scale_cache)); | ||||
|         } | ||||
|         if(sin_cache != nullptr) { | ||||
|         if (sin_cache != nullptr) { | ||||
|             ACL_CHECK(aclrtFree(sin_cache)); | ||||
|         } | ||||
|         if(cos_cache != nullptr) { | ||||
|         if (cos_cache != nullptr) { | ||||
|             ACL_CHECK(aclrtFree(cos_cache)); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     void* theta_scale_cache = nullptr; | ||||
|     void *  theta_scale_cache  = nullptr; | ||||
|     int64_t theta_scale_length = 0; | ||||
|     // sin/cos cache, used only to accelerate first layer on each device | ||||
|     void* sin_cache = nullptr; | ||||
|     void* cos_cache = nullptr; | ||||
|     int64_t position_length = 0; | ||||
|     void *  sin_cache          = nullptr; | ||||
|     void *  cos_cache          = nullptr; | ||||
|     int64_t position_length    = 0; | ||||
|     // Properties to check before reusing the sincos cache | ||||
|     bool cached = false; | ||||
|     float ext_factor = 0.0f; | ||||
|     float theta_scale = 0.0f; | ||||
|     float freq_scale = 0.0f; | ||||
|     float attn_factor = 0.0f; | ||||
|     bool is_neox = false; | ||||
|     bool    cached             = false; | ||||
|     float   ext_factor         = 0.0f; | ||||
|     float   theta_scale        = 0.0f; | ||||
|     float   freq_scale         = 0.0f; | ||||
|     float   attn_factor        = 0.0f; | ||||
|     bool    is_neox            = false; | ||||
| }; | ||||
|  | ||||
| struct ggml_cann_tensor_cache { | ||||
|     ~ggml_cann_tensor_cache() { | ||||
|         if(cache != nullptr) { | ||||
|         if (cache != nullptr) { | ||||
|             ACL_CHECK(aclrtFree(cache)); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     void* cache = nullptr; | ||||
|     int64_t size = 0; | ||||
|     void *  cache = nullptr; | ||||
|     int64_t size  = 0; | ||||
| }; | ||||
|  | ||||
| /** | ||||
|  * @brief Context for managing CANN backend operations. | ||||
|  */ | ||||
| struct ggml_backend_cann_context { | ||||
|     int32_t device;                  /**< Device ID. */ | ||||
|     std::string name;                /**< Name of the device. */ | ||||
|     std::string description;         /**< Description of the device. */ | ||||
|     aclrtEvent copy_event = nullptr; /**< Event for managing copy operations. */ | ||||
|     int32_t     device;               /**< Device ID. */ | ||||
|     std::string name;                 /**< Name of the device. */ | ||||
|     std::string description;          /**< Description of the device. */ | ||||
|     aclrtEvent  copy_event = nullptr; /**< Event for managing copy operations. */ | ||||
| #ifdef USE_ACL_GRAPH | ||||
|     /// Cached CANN ACL graph used for executing the current ggml computation graph. | ||||
|     ggml_cann_graph_lru_cache graph_lru_cache; | ||||
|     bool acl_graph_mode = true; | ||||
|     bool                      acl_graph_mode = true; | ||||
| #endif | ||||
|     cann_task_queue task_queue; | ||||
|     bool async_mode; | ||||
|     cann_task_queue        task_queue; | ||||
|     bool                   async_mode; | ||||
|     // Rope Cache | ||||
|     ggml_cann_rope_cache rope_cache; | ||||
|     ggml_cann_rope_cache   rope_cache; | ||||
|     // Constant Pool | ||||
|     ggml_cann_tensor_cache rms_norm_one_tensor_cache; | ||||
|     ggml_cann_tensor_cache rms_norm_zero_tensor_cache; | ||||
|  | ||||
|     aclrtStream streams[GGML_CANN_MAX_STREAMS] = {nullptr}; /**< Array of streams for the device. */ | ||||
|     aclrtStream streams[GGML_CANN_MAX_STREAMS] = { nullptr }; /**< Array of streams for the device. */ | ||||
|  | ||||
|     /** | ||||
|      * @brief Constructor for initializing the context with a given device. | ||||
|      * @param device Device ID. | ||||
|      */ | ||||
|     explicit ggml_backend_cann_context(int device) | ||||
|         : device(device), name("CANN" + std::to_string(device)), task_queue(1024, device) { | ||||
|     explicit ggml_backend_cann_context(int device) : | ||||
|         device(device), | ||||
|         name("CANN" + std::to_string(device)), | ||||
|         task_queue(1024, device) { | ||||
|         ggml_cann_set_device(device); | ||||
|         description = aclrtGetSocName(); | ||||
|  | ||||
|         async_mode = parse_bool(get_env("GGML_CANN_ASYNC_MODE").value_or("")); | ||||
|         GGML_LOG_INFO("%s: device %d async operator submission is %s\n", __func__, | ||||
|             device, async_mode ? "ON" : "OFF"); | ||||
|         GGML_LOG_INFO("%s: device %d async operator submission is %s\n", __func__, device, async_mode ? "ON" : "OFF"); | ||||
| #ifdef USE_ACL_GRAPH | ||||
|         acl_graph_mode = parse_bool(get_env("GGML_CANN_ACL_GRAPH").value_or("on")); | ||||
|         GGML_LOG_INFO("%s: device %d execution mode is %s (%s)\n", | ||||
|               __func__, device, | ||||
|               acl_graph_mode ? "GRAPH" : "EAGER", | ||||
|               acl_graph_mode ? "acl graph enabled" : "acl graph disabled"); | ||||
|         GGML_LOG_INFO("%s: device %d execution mode is %s (%s)\n", __func__, device, acl_graph_mode ? "GRAPH" : "EAGER", | ||||
|                       acl_graph_mode ? "acl graph enabled" : "acl graph disabled"); | ||||
| #endif | ||||
|     } | ||||
|  | ||||
| @@ -549,8 +543,7 @@ struct ggml_backend_cann_context { | ||||
|     aclrtStream stream() { return stream(0); } | ||||
|  | ||||
|     // TODO: each stream should have a memory pool. | ||||
|     std::unique_ptr<ggml_cann_pool> | ||||
|         mem_pool; /**< Memory pool for the device. */ | ||||
|     std::unique_ptr<ggml_cann_pool> mem_pool; /**< Memory pool for the device. */ | ||||
|  | ||||
|     /** | ||||
|      * @brief Create a new memory pool for a given device. | ||||
| @@ -563,7 +556,7 @@ struct ggml_backend_cann_context { | ||||
|      * @brief Get or create the memory pool for the context. | ||||
|      * @return Reference to the memory pool. | ||||
|      */ | ||||
|     ggml_cann_pool& pool() { | ||||
|     ggml_cann_pool & pool() { | ||||
|         if (mem_pool == nullptr) { | ||||
|             mem_pool = new_pool_for_device(device); | ||||
|         } | ||||
|   | ||||
							
								
								
									
										1109
									
								
								ggml/src/ggml-cann/ggml-cann.cpp
									
									
									
									
									
										
										
										Executable file → Normal file
									
								
							
							
						
						
									
										1109
									
								
								ggml/src/ggml-cann/ggml-cann.cpp
									
									
									
									
									
										
										
										Executable file → Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							
		Reference in New Issue
	
	Block a user
	 Chenguang Li
					Chenguang Li