CANN: format code using .clang-format (#15863)

This commit applies .clang-format rules to all source files under the
ggml-cann directory to ensure consistent coding style and readability.
The .clang-format option `SortIncludes: false` has been set to disable
automatic reordering of include directives.
No functional changes are introduced.

Co-authored-by: hipudding <huafengchun@gmail.com>
This commit is contained in:
Chenguang Li
2025-10-16 16:41:11 +08:00
committed by GitHub
parent 6f5d924637
commit 7a50cf388a
6 changed files with 2063 additions and 2332 deletions

89
ggml/src/ggml-cann/acl_tensor.cpp Executable file → Normal file
View File

@@ -51,28 +51,31 @@ aclDataType ggml_cann_type_mapping(ggml_type type) {
return ACL_DT_UNDEFINED; return ACL_DT_UNDEFINED;
} }
aclTensor* ggml_cann_create_tensor(const ggml_tensor* tensor, int64_t* ne, aclTensor * ggml_cann_create_tensor(const ggml_tensor * tensor,
size_t* nb, int64_t dims, aclFormat format, int64_t * ne,
size_t offset) { size_t * nb,
int64_t dims,
aclFormat format,
size_t offset) {
// If tensor is bcasted, Up to GGML_MAX_DIMS additional dimensions will be // If tensor is bcasted, Up to GGML_MAX_DIMS additional dimensions will be
// added. // added.
int64_t acl_ne[GGML_MAX_DIMS * 2], acl_stride[GGML_MAX_DIMS * 2]; int64_t acl_ne[GGML_MAX_DIMS * 2], acl_stride[GGML_MAX_DIMS * 2];
if (ne == nullptr) { if (ne == nullptr) {
for (int i = 0; i < GGML_MAX_DIMS; i++) { for (int i = 0; i < GGML_MAX_DIMS; i++) {
acl_ne[i] = tensor->ne[i]; acl_ne[i] = tensor->ne[i];
// The step size of acl is in elements. // The step size of acl is in elements.
acl_stride[i] = tensor->nb[i] / ggml_element_size(tensor); acl_stride[i] = tensor->nb[i] / ggml_element_size(tensor);
} }
} else { } else {
// With bcast // With bcast
for (int i = 0; i < dims; i++) { for (int i = 0; i < dims; i++) {
acl_ne[i] = ne[i]; acl_ne[i] = ne[i];
acl_stride[i] = nb[i] / ggml_element_size(tensor); acl_stride[i] = nb[i] / ggml_element_size(tensor);
} }
} }
int64_t final_dims = (dims == 0 ? GGML_MAX_DIMS : dims); int64_t final_dims = (dims == 0 ? GGML_MAX_DIMS : dims);
int64_t acl_storage_len = 1; int64_t acl_storage_len = 1;
for (int i = 0; i < final_dims; i++) { for (int i = 0; i < final_dims; i++) {
acl_storage_len += (acl_ne[i] - 1) * acl_stride[i]; acl_storage_len += (acl_ne[i] - 1) * acl_stride[i];
@@ -84,15 +87,13 @@ aclTensor* ggml_cann_create_tensor(const ggml_tensor* tensor, int64_t* ne,
std::reverse(acl_ne, acl_ne + final_dims); std::reverse(acl_ne, acl_ne + final_dims);
std::reverse(acl_stride, acl_stride + final_dims); std::reverse(acl_stride, acl_stride + final_dims);
aclTensor* acl_tensor = aclCreateTensor( aclTensor * acl_tensor = aclCreateTensor(acl_ne, final_dims, ggml_cann_type_mapping(tensor->type), acl_stride,
acl_ne, final_dims, ggml_cann_type_mapping(tensor->type), acl_stride, elem_offset, format, &acl_storage_len, 1, tensor->data);
elem_offset, format, &acl_storage_len, 1,
tensor->data);
return acl_tensor; return acl_tensor;
} }
bool ggml_cann_need_bcast(const ggml_tensor* t0, const ggml_tensor* t1) { bool ggml_cann_need_bcast(const ggml_tensor * t0, const ggml_tensor * t1) {
for (int i = 0; i < GGML_MAX_DIMS; i++) { for (int i = 0; i < GGML_MAX_DIMS; i++) {
if (t1->ne[i] != t0->ne[i] && t1->ne[i] != 1) { if (t1->ne[i] != t0->ne[i] && t1->ne[i] != 1) {
return true; return true;
@@ -101,15 +102,16 @@ bool ggml_cann_need_bcast(const ggml_tensor* t0, const ggml_tensor* t1) {
return false; return false;
} }
int64_t ggml_cann_get_bcast_shape(const ggml_tensor* src0, int64_t ggml_cann_get_bcast_shape(const ggml_tensor * src0,
const ggml_tensor* src1, const ggml_tensor * src1,
int64_t* bcast_src0_ne, int64_t * bcast_src0_ne,
int64_t* bcast_src1_ne, size_t* bcast_src0_nb, int64_t * bcast_src1_ne,
size_t* bcast_src1_nb) { size_t * bcast_src0_nb,
size_t * bcast_src1_nb) {
GGML_ASSERT(ggml_can_repeat(src1, src0)); GGML_ASSERT(ggml_can_repeat(src1, src0));
int bcast_dim_cnt = 0; int bcast_dim_cnt = 0;
for (int i = 0; i < GGML_MAX_DIMS; i++) { for (int i = 0; i < GGML_MAX_DIMS; i++) {
int64_t nr = src0->ne[i] / src1->ne[i]; int64_t nr = src0->ne[i] / src1->ne[i];
bcast_src0_ne[bcast_dim_cnt] = src0->ne[i] / nr; bcast_src0_ne[bcast_dim_cnt] = src0->ne[i] / nr;
bcast_src1_ne[bcast_dim_cnt] = src1->ne[i]; bcast_src1_ne[bcast_dim_cnt] = src1->ne[i];
bcast_src0_nb[bcast_dim_cnt] = src0->nb[i]; bcast_src0_nb[bcast_dim_cnt] = src0->nb[i];
@@ -119,21 +121,26 @@ int64_t ggml_cann_get_bcast_shape(const ggml_tensor* src0,
// Need to add an extra dim. // Need to add an extra dim.
bcast_src0_ne[bcast_dim_cnt] = nr; bcast_src0_ne[bcast_dim_cnt] = nr;
bcast_src1_ne[bcast_dim_cnt] = 1; bcast_src1_ne[bcast_dim_cnt] = 1;
bcast_src0_nb[bcast_dim_cnt] = bcast_src0_nb[bcast_dim_cnt - 1] * bcast_src0_nb[bcast_dim_cnt] = bcast_src0_nb[bcast_dim_cnt - 1] * bcast_src0_ne[bcast_dim_cnt - 1];
bcast_src0_ne[bcast_dim_cnt - 1]; bcast_src1_nb[bcast_dim_cnt] = bcast_src1_nb[bcast_dim_cnt - 1] * bcast_src1_ne[bcast_dim_cnt - 1];
bcast_src1_nb[bcast_dim_cnt] = bcast_src1_nb[bcast_dim_cnt - 1] *
bcast_src1_ne[bcast_dim_cnt - 1];
bcast_dim_cnt++; bcast_dim_cnt++;
} }
} }
return bcast_dim_cnt; return bcast_dim_cnt;
} }
int64_t ggml_cann_get_mulmat_bcast_shape( int64_t ggml_cann_get_mulmat_bcast_shape(const int64_t * input_ne,
const int64_t* input_ne, const int64_t* weight_ne, const int64_t* dst_ne, const int64_t * weight_ne,
const size_t* input_nb, const size_t* weight_nb, const size_t* dst_nb, const int64_t * dst_ne,
int64_t* bcast_input_ne, int64_t* bcast_weight_ne, int64_t* bcast_dst_ne, const size_t * input_nb,
size_t* bcast_input_nb, size_t* bcast_weight_nb, size_t* bcast_dst_nb) { const size_t * weight_nb,
const size_t * dst_nb,
int64_t * bcast_input_ne,
int64_t * bcast_weight_ne,
int64_t * bcast_dst_ne,
size_t * bcast_input_nb,
size_t * bcast_weight_nb,
size_t * bcast_dst_nb) {
// input and dst shoule in same shape, except first two dims. // input and dst shoule in same shape, except first two dims.
GGML_ASSERT(input_ne[2] == dst_ne[2]); GGML_ASSERT(input_ne[2] == dst_ne[2]);
GGML_ASSERT(input_ne[3] == dst_ne[3]); GGML_ASSERT(input_ne[3] == dst_ne[3]);
@@ -148,34 +155,30 @@ int64_t ggml_cann_get_mulmat_bcast_shape(
// Do not use bcast in the first two dimensions because we only support // Do not use bcast in the first two dimensions because we only support
// the bcast batch dimension. Just copy them. // the bcast batch dimension. Just copy them.
if (i < 2 || nr == 1) { if (i < 2 || nr == 1) {
bcast_input_ne[bcast_dim_cnt] = input_ne[i]; bcast_input_ne[bcast_dim_cnt] = input_ne[i];
bcast_weight_ne[bcast_dim_cnt] = weight_ne[i]; bcast_weight_ne[bcast_dim_cnt] = weight_ne[i];
bcast_dst_ne[bcast_dim_cnt] = dst_ne[i]; bcast_dst_ne[bcast_dim_cnt] = dst_ne[i];
bcast_input_nb[bcast_dim_cnt] = input_nb[i]; bcast_input_nb[bcast_dim_cnt] = input_nb[i];
bcast_weight_nb[bcast_dim_cnt] = weight_nb[i]; bcast_weight_nb[bcast_dim_cnt] = weight_nb[i];
bcast_dst_nb[bcast_dim_cnt] = dst_nb[i]; bcast_dst_nb[bcast_dim_cnt] = dst_nb[i];
bcast_dim_cnt++; bcast_dim_cnt++;
} else { } else {
// Need to add an extra dim. // Need to add an extra dim.
bcast_input_ne[bcast_dim_cnt] = nr; bcast_input_ne[bcast_dim_cnt] = nr;
bcast_dst_ne[bcast_dim_cnt] = nr; bcast_dst_ne[bcast_dim_cnt] = nr;
bcast_weight_ne[bcast_dim_cnt] = 1; bcast_weight_ne[bcast_dim_cnt] = 1;
bcast_input_nb[bcast_dim_cnt] = input_nb[i]; bcast_input_nb[bcast_dim_cnt] = input_nb[i];
bcast_dst_nb[bcast_dim_cnt] = dst_nb[i]; bcast_dst_nb[bcast_dim_cnt] = dst_nb[i];
bcast_weight_nb[bcast_dim_cnt] = weight_nb[i]; bcast_weight_nb[bcast_dim_cnt] = weight_nb[i];
bcast_dim_cnt++; bcast_dim_cnt++;
bcast_input_ne[bcast_dim_cnt] = input_ne[i] / nr; bcast_input_ne[bcast_dim_cnt] = input_ne[i] / nr;
bcast_dst_ne[bcast_dim_cnt] = dst_ne[i] / nr; bcast_dst_ne[bcast_dim_cnt] = dst_ne[i] / nr;
bcast_weight_ne[bcast_dim_cnt] = weight_ne[i]; bcast_weight_ne[bcast_dim_cnt] = weight_ne[i];
bcast_input_nb[bcast_dim_cnt] = bcast_input_nb[bcast_dim_cnt - 1] * bcast_input_nb[bcast_dim_cnt] = bcast_input_nb[bcast_dim_cnt - 1] * bcast_input_ne[bcast_dim_cnt - 1];
bcast_input_ne[bcast_dim_cnt - 1]; bcast_dst_nb[bcast_dim_cnt] = bcast_dst_nb[bcast_dim_cnt - 1] * bcast_dst_ne[bcast_dim_cnt - 1];
bcast_dst_nb[bcast_dim_cnt] = bcast_dst_nb[bcast_dim_cnt - 1] * bcast_weight_nb[bcast_dim_cnt] = bcast_weight_nb[bcast_dim_cnt - 1] * bcast_weight_ne[bcast_dim_cnt - 1];
bcast_dst_ne[bcast_dim_cnt - 1];
bcast_weight_nb[bcast_dim_cnt] =
bcast_weight_nb[bcast_dim_cnt - 1] *
bcast_weight_ne[bcast_dim_cnt - 1];
bcast_dim_cnt++; bcast_dim_cnt++;
} }
} }

97
ggml/src/ggml-cann/acl_tensor.h Executable file → Normal file
View File

@@ -62,10 +62,12 @@ aclDataType ggml_cann_type_mapping(ggml_type type);
* @param offset Offset in bytes for the ACL tensor data. Defaults to 0. * @param offset Offset in bytes for the ACL tensor data. Defaults to 0.
* @return Pointer to the created ACL tensor. * @return Pointer to the created ACL tensor.
*/ */
aclTensor* ggml_cann_create_tensor(const ggml_tensor* tensor, int64_t* ne = nullptr, aclTensor * ggml_cann_create_tensor(const ggml_tensor * tensor,
size_t* nb = nullptr, int64_t dims = 0, int64_t * ne = nullptr,
aclFormat format = ACL_FORMAT_ND, size_t * nb = nullptr,
size_t offset = 0); int64_t dims = 0,
aclFormat format = ACL_FORMAT_ND,
size_t offset = 0);
/** /**
* @brief Template for creating an ACL tensor from provided parameters. typename TYPE * @brief Template for creating an ACL tensor from provided parameters. typename TYPE
@@ -87,12 +89,15 @@ aclTensor* ggml_cann_create_tensor(const ggml_tensor* tensor, int64_t* ne = null
* @param offset Offset in bytes for the ACL tensor data. Defaults to 0. * @param offset Offset in bytes for the ACL tensor data. Defaults to 0.
* @return Pointer to the created ACL tensor. * @return Pointer to the created ACL tensor.
*/ */
template<typename TYPE> template <typename TYPE>
aclTensor* ggml_cann_create_tensor(void* data_ptr, aclDataType dtype, aclTensor * ggml_cann_create_tensor(void * data_ptr,
TYPE type_size, int64_t* ne, TYPE* nb, aclDataType dtype,
int64_t dims, TYPE type_size,
aclFormat format = ACL_FORMAT_ND, int64_t * ne,
size_t offset = 0) { TYPE * nb,
int64_t dims,
aclFormat format = ACL_FORMAT_ND,
size_t offset = 0) {
int64_t tmp_ne[GGML_MAX_DIMS * 2]; int64_t tmp_ne[GGML_MAX_DIMS * 2];
int64_t tmp_stride[GGML_MAX_DIMS * 2]; int64_t tmp_stride[GGML_MAX_DIMS * 2];
@@ -109,9 +114,8 @@ aclTensor* ggml_cann_create_tensor(void* data_ptr, aclDataType dtype,
std::reverse(tmp_ne, tmp_ne + dims); std::reverse(tmp_ne, tmp_ne + dims);
std::reverse(tmp_stride, tmp_stride + dims); std::reverse(tmp_stride, tmp_stride + dims);
aclTensor* acl_tensor = aclTensor * acl_tensor =
aclCreateTensor(tmp_ne, dims, dtype, tmp_stride, offset / type_size, aclCreateTensor(tmp_ne, dims, dtype, tmp_stride, offset / type_size, format, &acl_storage_len, 1, data_ptr);
format, &acl_storage_len, 1, data_ptr);
return acl_tensor; return acl_tensor;
} }
@@ -132,7 +136,7 @@ aclTensor* ggml_cann_create_tensor(void* data_ptr, aclDataType dtype,
* to 1. If such a dimension is found, broadcasting is required to align t1 * to 1. If such a dimension is found, broadcasting is required to align t1
* with t0 for element-wise operations. * with t0 for element-wise operations.
*/ */
bool ggml_cann_need_bcast(const ggml_tensor* t0, const ggml_tensor* t1); bool ggml_cann_need_bcast(const ggml_tensor * t0, const ggml_tensor * t1);
/** /**
* @brief Computes broadcast shapes and strides for two ggml_tensors. * @brief Computes broadcast shapes and strides for two ggml_tensors.
@@ -187,19 +191,21 @@ bool ggml_cann_need_bcast(const ggml_tensor* t0, const ggml_tensor* t1);
* dim1 in a inserted dim, should add nb for dim1, * dim1 in a inserted dim, should add nb for dim1,
* and all other nb moves to next in order. * and all other nb moves to next in order.
*/ */
int64_t ggml_cann_get_bcast_shape(const ggml_tensor* src0, const ggml_tensor* src1, int64_t ggml_cann_get_bcast_shape(const ggml_tensor * src0,
int64_t* bcast_ne_src0, int64_t* bcast_ne_src1, const ggml_tensor * src1,
size_t* bcast_nb_src0, size_t* bcast_nb_src1); int64_t * bcast_ne_src0,
int64_t * bcast_ne_src1,
size_t * bcast_nb_src0,
size_t * bcast_nb_src1);
// Bcast macro to avoid duplicate code. // Bcast macro to avoid duplicate code.
#define BCAST_SHAPE(src0, src1) \ #define BCAST_SHAPE(src0, src1) \
int64_t bcast_##src0##_ne[GGML_MAX_DIMS * 2]; \ int64_t bcast_##src0##_ne[GGML_MAX_DIMS * 2]; \
int64_t bcast_##src1##_ne[GGML_MAX_DIMS * 2]; \ int64_t bcast_##src1##_ne[GGML_MAX_DIMS * 2]; \
size_t bcast_##src0##_nb[GGML_MAX_DIMS * 2]; \ size_t bcast_##src0##_nb[GGML_MAX_DIMS * 2]; \
size_t bcast_##src1##_nb[GGML_MAX_DIMS * 2]; \ size_t bcast_##src1##_nb[GGML_MAX_DIMS * 2]; \
int64_t bcast_dims = ggml_cann_get_bcast_shape( \ int64_t bcast_dims = ggml_cann_get_bcast_shape(src0, src1, bcast_##src0##_ne, bcast_##src1##_ne, \
src0, src1, bcast_##src0##_ne, bcast_##src1##_ne, bcast_##src0##_nb, \ bcast_##src0##_nb, bcast_##src1##_nb);
bcast_##src1##_nb);
#define BCAST_PARAM(tensor) bcast_##tensor##_ne, bcast_##tensor##_nb, bcast_dims #define BCAST_PARAM(tensor) bcast_##tensor##_ne, bcast_##tensor##_nb, bcast_dims
@@ -233,26 +239,31 @@ int64_t ggml_cann_get_bcast_shape(const ggml_tensor* src0, const ggml_tensor* sr
* before cast dim. * before cast dim.
* @sa ggml_cann_get_bcast_shape * @sa ggml_cann_get_bcast_shape
*/ */
int64_t ggml_cann_get_mulmat_bcast_shape( int64_t ggml_cann_get_mulmat_bcast_shape(const int64_t * input_ne,
const int64_t* input_ne, const int64_t* weight_ne, const int64_t* dst_ne, const int64_t * weight_ne,
const size_t* input_nb, const size_t* weight_nb, const size_t* dst_nb, const int64_t * dst_ne,
int64_t* bcast_input_ne, int64_t* bcast_weight_ne, int64_t* bcast_dst_ne, const size_t * input_nb,
size_t* bcast_input_nb, size_t* bcast_weight_nb, size_t* bcast_dst_nb); const size_t * weight_nb,
const size_t * dst_nb,
int64_t * bcast_input_ne,
int64_t * bcast_weight_ne,
int64_t * bcast_dst_ne,
size_t * bcast_input_nb,
size_t * bcast_weight_nb,
size_t * bcast_dst_nb);
// Bcast macro to avoid duplicate code. // Bcast macro to avoid duplicate code.
#define BCAST_MUL_MAT_SHAPE(input, weight, dst) \ #define BCAST_MUL_MAT_SHAPE(input, weight, dst) \
int64_t bcast_##input##_ne[GGML_MAX_DIMS * 2]; \ int64_t bcast_##input##_ne[GGML_MAX_DIMS * 2]; \
int64_t bcast_##weight##_ne[GGML_MAX_DIMS * 2]; \ int64_t bcast_##weight##_ne[GGML_MAX_DIMS * 2]; \
int64_t bcast_##dst##_ne[GGML_MAX_DIMS * 2]; \ int64_t bcast_##dst##_ne[GGML_MAX_DIMS * 2]; \
size_t bcast_##input##_nb[GGML_MAX_DIMS * 2]; \ size_t bcast_##input##_nb[GGML_MAX_DIMS * 2]; \
size_t bcast_##weight##_nb[GGML_MAX_DIMS * 2]; \ size_t bcast_##weight##_nb[GGML_MAX_DIMS * 2]; \
size_t bcast_##dst##_nb[GGML_MAX_DIMS * 2]; \ size_t bcast_##dst##_nb[GGML_MAX_DIMS * 2]; \
int64_t bcast_dims = ggml_cann_get_mulmat_bcast_shape( \ int64_t bcast_dims = ggml_cann_get_mulmat_bcast_shape( \
input->ne, weight->ne, dst->ne, input->nb, weight->nb, dst->nb, \ input->ne, weight->ne, dst->ne, input->nb, weight->nb, dst->nb, bcast_##input##_ne, bcast_##weight##_ne, \
bcast_##input##_ne, bcast_##weight##_ne, bcast_##dst##_ne, \ bcast_##dst##_ne, bcast_##input##_nb, bcast_##weight##_nb, bcast_##dst##_nb);
bcast_##input##_nb, bcast_##weight##_nb, bcast_##dst##_nb);
#define BCAST_MUL_MAT_PARAM(tensor) \ #define BCAST_MUL_MAT_PARAM(tensor) bcast_##tensor##_ne, bcast_##tensor##_nb, bcast_dims
bcast_##tensor##_ne, bcast_##tensor##_nb, bcast_dims
#endif // CANN_ACL_TENSOR_H #endif // CANN_ACL_TENSOR_H

2508
ggml/src/ggml-cann/aclnn_ops.cpp Executable file → Normal file

File diff suppressed because it is too large Load Diff

401
ggml/src/ggml-cann/aclnn_ops.h Executable file → Normal file
View File

@@ -62,7 +62,7 @@
* @param dst The ggml tensor representing the destination, which op is * @param dst The ggml tensor representing the destination, which op is
* GGML_OP_REPEAT and specifies the desired dimensions. * GGML_OP_REPEAT and specifies the desired dimensions.
*/ */
void ggml_cann_repeat(ggml_backend_cann_context& ctx, ggml_tensor* dst); void ggml_cann_repeat(ggml_backend_cann_context & ctx, ggml_tensor * dst);
/** /**
* @brief Applies the Leaky ReLU activation function to a tensor using the CANN * @brief Applies the Leaky ReLU activation function to a tensor using the CANN
@@ -82,7 +82,7 @@ void ggml_cann_repeat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
* @param dst The destination tensor where the result of the Leaky ReLU * @param dst The destination tensor where the result of the Leaky ReLU
* activation is stored, which op is `GGML_OP_LEAKY_RELU` * activation is stored, which op is `GGML_OP_LEAKY_RELU`
*/ */
void ggml_cann_leaky_relu(ggml_backend_cann_context& ctx, ggml_tensor* dst); void ggml_cann_leaky_relu(ggml_backend_cann_context & ctx, ggml_tensor * dst);
/** /**
* @brief Concatenates multiple tensors along a specified dimension using the * @brief Concatenates multiple tensors along a specified dimension using the
@@ -97,7 +97,7 @@ void ggml_cann_leaky_relu(ggml_backend_cann_context& ctx, ggml_tensor* dst);
* @attention tensorList length should be 2 and the dimension using for concat * @attention tensorList length should be 2 and the dimension using for concat
* default to 1. * default to 1.
*/ */
void ggml_cann_concat(ggml_backend_cann_context& ctx, ggml_tensor* dst); void ggml_cann_concat(ggml_backend_cann_context & ctx, ggml_tensor * dst);
/** /**
* @brief Generates a sequence of evenly spaced values within a specified * @brief Generates a sequence of evenly spaced values within a specified
@@ -113,7 +113,7 @@ void ggml_cann_concat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
* `start`, 'stop' and 'step' are in dst->op_params and dst->op is * `start`, 'stop' and 'step' are in dst->op_params and dst->op is
* `GGML_OP_ARANGE`. * `GGML_OP_ARANGE`.
*/ */
void ggml_cann_arange(ggml_backend_cann_context& ctx, ggml_tensor* dst); void ggml_cann_arange(ggml_backend_cann_context & ctx, ggml_tensor * dst);
/** /**
* @brief Applies a clamp operation to the elements of a ggml tensor using the * @brief Applies a clamp operation to the elements of a ggml tensor using the
@@ -131,7 +131,7 @@ void ggml_cann_arange(ggml_backend_cann_context& ctx, ggml_tensor* dst);
* @param dst The destination tensor where the clamped values will be stored. * @param dst The destination tensor where the clamped values will be stored.
* dst->op is `GGML_OP_CLAMP`, `min` and `max` value is in dst->params. * dst->op is `GGML_OP_CLAMP`, `min` and `max` value is in dst->params.
*/ */
void ggml_cann_clamp(ggml_backend_cann_context& ctx, ggml_tensor* dst); void ggml_cann_clamp(ggml_backend_cann_context & ctx, ggml_tensor * dst);
/** /**
* @brief Scales the elements of a ggml tensor by a constant factor using the * @brief Scales the elements of a ggml tensor by a constant factor using the
@@ -148,7 +148,7 @@ void ggml_cann_clamp(ggml_backend_cann_context& ctx, ggml_tensor* dst);
* @param dst The destination tensor where the scaled values will be stored. * @param dst The destination tensor where the scaled values will be stored.
* dst->op is `GGML_OP_SCALE` and `scale` value is in dst->params. * dst->op is `GGML_OP_SCALE` and `scale` value is in dst->params.
*/ */
void ggml_cann_scale(ggml_backend_cann_context& ctx, ggml_tensor* dst); void ggml_cann_scale(ggml_backend_cann_context & ctx, ggml_tensor * dst);
/** /**
* @brief Sorts the elements of a ggml tensor and returns the indices that * @brief Sorts the elements of a ggml tensor and returns the indices that
@@ -163,7 +163,7 @@ void ggml_cann_scale(ggml_backend_cann_context& ctx, ggml_tensor* dst);
* @param dst The destination tensor where the sorted indices will be stored. * @param dst The destination tensor where the sorted indices will be stored.
* dst->op is `GGML_OP_ARGSORT`. * dst->op is `GGML_OP_ARGSORT`.
*/ */
void ggml_cann_argsort(ggml_backend_cann_context& ctx, ggml_tensor* dst); void ggml_cann_argsort(ggml_backend_cann_context & ctx, ggml_tensor * dst);
/** /**
* @brief Computes the Layer Normalization for a ggml tensor using the CANN * @brief Computes the Layer Normalization for a ggml tensor using the CANN
@@ -185,7 +185,7 @@ void ggml_cann_argsort(ggml_backend_cann_context& ctx, ggml_tensor* dst);
* @param dst The destination tensor where the normalized values will be stored. * @param dst The destination tensor where the normalized values will be stored.
* @attention `Var` defaults to dst->ne[0]. * @attention `Var` defaults to dst->ne[0].
*/ */
void ggml_cann_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst); void ggml_cann_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst);
/** /**
* @brief Computes the Group Normalization for a ggml tensor using the CANN * @brief Computes the Group Normalization for a ggml tensor using the CANN
@@ -209,7 +209,7 @@ void ggml_cann_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
* *
* @attention eps defaults to 1e-6f. * @attention eps defaults to 1e-6f.
*/ */
void ggml_cann_group_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst); void ggml_cann_group_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst);
/** /**
* @brief Computes the accumulation of tensors using the CANN backend. * @brief Computes the accumulation of tensors using the CANN backend.
@@ -228,7 +228,7 @@ void ggml_cann_group_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
* @param dst The destination tensor where the accumulated values will be stored. * @param dst The destination tensor where the accumulated values will be stored.
* `inplace` is in dst->params, and dst->op is `GGML_OP_ACC`. * `inplace` is in dst->params, and dst->op is `GGML_OP_ACC`.
*/ */
void ggml_cann_acc(ggml_backend_cann_context& ctx, ggml_tensor* dst); void ggml_cann_acc(ggml_backend_cann_context & ctx, ggml_tensor * dst);
/** /**
* @brief Computes the sum of elements along the last dimension of a ggml tensor * @brief Computes the sum of elements along the last dimension of a ggml tensor
@@ -244,7 +244,7 @@ void ggml_cann_acc(ggml_backend_cann_context& ctx, ggml_tensor* dst);
* *
* @attention `reduce_dims` defaults to 3, which means the last dimension. * @attention `reduce_dims` defaults to 3, which means the last dimension.
*/ */
void ggml_cann_sum_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst); void ggml_cann_sum_rows(ggml_backend_cann_context & ctx, ggml_tensor * dst);
/** /**
* @brief Computes the sum of elements in a ggml tensor. * @brief Computes the sum of elements in a ggml tensor.
@@ -258,7 +258,7 @@ void ggml_cann_sum_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
* *
*/ */
void ggml_cann_sum(ggml_backend_cann_context& ctx, ggml_tensor* dst); void ggml_cann_sum(ggml_backend_cann_context & ctx, ggml_tensor * dst);
/** /**
* @brief Upsamples a ggml tensor using nearest neighbor interpolation using * @brief Upsamples a ggml tensor using nearest neighbor interpolation using
@@ -274,8 +274,7 @@ void ggml_cann_sum(ggml_backend_cann_context& ctx, ggml_tensor* dst);
* @param dst The destination tensor where the upsampled values will be stored. * @param dst The destination tensor where the upsampled values will be stored.
* dst->op is `GGML_OP_UPSCALE`. * dst->op is `GGML_OP_UPSCALE`.
*/ */
void ggml_cann_upsample_nearest2d(ggml_backend_cann_context& ctx, void ggml_cann_upsample_nearest2d(ggml_backend_cann_context & ctx, ggml_tensor * dst);
ggml_tensor* dst);
/** /**
* @brief Pads a ggml tensor to match the dimensions of the destination tensor * @brief Pads a ggml tensor to match the dimensions of the destination tensor
@@ -290,7 +289,7 @@ void ggml_cann_upsample_nearest2d(ggml_backend_cann_context& ctx,
* @param dst The destination tensor, which specifies the target dimensions for * @param dst The destination tensor, which specifies the target dimensions for
* padding. dst->op is `GGML_OP_PAD`. * padding. dst->op is `GGML_OP_PAD`.
*/ */
void ggml_cann_pad(ggml_backend_cann_context& ctx, ggml_tensor* dst); void ggml_cann_pad(ggml_backend_cann_context & ctx, ggml_tensor * dst);
/** /**
* @brief Executes a 2D pooling operation on a ggml tensor using the CANN * @brief Executes a 2D pooling operation on a ggml tensor using the CANN
@@ -307,7 +306,7 @@ void ggml_cann_pad(ggml_backend_cann_context& ctx, ggml_tensor* dst);
* @param dst The destination tensor on which the pooling operation is to be * @param dst The destination tensor on which the pooling operation is to be
* performed. dst->op is `GGML_OP_POOL_2D`. * performed. dst->op is `GGML_OP_POOL_2D`.
*/ */
void ggml_cann_pool2d(ggml_backend_cann_context& ctx, ggml_tensor* dst); void ggml_cann_pool2d(ggml_backend_cann_context & ctx, ggml_tensor * dst);
/** /**
* @brief Duplicates a ggml tensor using the CANN backend. * @brief Duplicates a ggml tensor using the CANN backend.
@@ -326,7 +325,7 @@ void ggml_cann_pool2d(ggml_backend_cann_context& ctx, ggml_tensor* dst);
* different shape and dst is no-contiguous. * different shape and dst is no-contiguous.
* @note: This func need to simplify. * @note: This func need to simplify.
*/ */
void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst); void ggml_cann_dup(ggml_backend_cann_context & ctx, ggml_tensor * dst);
/** /**
* @brief Computes the Root Mean Square (RMS) normalization of a ggml tensor * @brief Computes the Root Mean Square (RMS) normalization of a ggml tensor
@@ -348,7 +347,7 @@ void ggml_cann_dup(ggml_backend_cann_context& ctx, ggml_tensor* dst);
* @param dst The destination tensor where the normalized values will be stored. * @param dst The destination tensor where the normalized values will be stored.
* dst->op is `GGML_OP_RMS_NORM`. * dst->op is `GGML_OP_RMS_NORM`.
*/ */
void ggml_cann_rms_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst); void ggml_cann_rms_norm(ggml_backend_cann_context & ctx, ggml_tensor * dst);
/** /**
* @brief Applies a diagonal mask to the tensor with a specified value. * @brief Applies a diagonal mask to the tensor with a specified value.
@@ -363,7 +362,7 @@ void ggml_cann_rms_norm(ggml_backend_cann_context& ctx, ggml_tensor* dst);
* `GGML_OP_DIAG_MASK` * `GGML_OP_DIAG_MASK`
* @param value The value to use for masking. * @param value The value to use for masking.
*/ */
void ggml_cann_diag_mask(ggml_backend_cann_context& ctx, ggml_tensor* dst, float value); void ggml_cann_diag_mask(ggml_backend_cann_context & ctx, ggml_tensor * dst, float value);
/** /**
* @brief Performs an image-to-column transformation on the input tensor. * @brief Performs an image-to-column transformation on the input tensor.
@@ -378,7 +377,7 @@ void ggml_cann_diag_mask(ggml_backend_cann_context& ctx, ggml_tensor* dst, float
* @param dst The destination tensor that stores the result of the operation. * @param dst The destination tensor that stores the result of the operation.
* dst->op is `GGML_OP_IM2COL`. * dst->op is `GGML_OP_IM2COL`.
*/ */
void ggml_cann_im2col(ggml_backend_cann_context& ctx, ggml_tensor* dst); void ggml_cann_im2col(ggml_backend_cann_context & ctx, ggml_tensor * dst);
/** /**
* @brief Computes time step embeddings using sine and cosine functions. * @brief Computes time step embeddings using sine and cosine functions.
@@ -392,10 +391,10 @@ void ggml_cann_im2col(ggml_backend_cann_context& ctx, ggml_tensor* dst);
* @param dst The destination tensor where the result of the embedding operation * @param dst The destination tensor where the result of the embedding operation
* will be stored. dst->op is `GGML_OP_TIMESTEP_EMBEDDING`. * will be stored. dst->op is `GGML_OP_TIMESTEP_EMBEDDING`.
*/ */
void ggml_cann_timestep_embedding(ggml_backend_cann_context& ctx, ggml_tensor* dst); void ggml_cann_timestep_embedding(ggml_backend_cann_context & ctx, ggml_tensor * dst);
// @see ggml_cann_dup. // @see ggml_cann_dup.
void ggml_cann_cpy(ggml_backend_cann_context& ctx, ggml_tensor* dst); void ggml_cann_cpy(ggml_backend_cann_context & ctx, ggml_tensor * dst);
/** /**
* @brief Computes the softmax activation with optional masking. * @brief Computes the softmax activation with optional masking.
@@ -417,7 +416,7 @@ void ggml_cann_cpy(ggml_backend_cann_context& ctx, ggml_tensor* dst);
* @param dst The destination tensor where the result will be stored. dst->op is * @param dst The destination tensor where the result will be stored. dst->op is
* `GGML_OP_SOFTMAX`. * `GGML_OP_SOFTMAX`.
*/ */
void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst); void ggml_cann_softmax(ggml_backend_cann_context & ctx, ggml_tensor * dst);
/** /**
* @brief Extracts specific rows from a tensor based on indices. * @brief Extracts specific rows from a tensor based on indices.
@@ -429,7 +428,7 @@ void ggml_cann_softmax(ggml_backend_cann_context& ctx, ggml_tensor* dst);
* @param ctx The backend CANN context for executing operations. * @param ctx The backend CANN context for executing operations.
* @param dst The destination tensor where the extracted rows will be stored. * @param dst The destination tensor where the extracted rows will be stored.
*/ */
void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst); void ggml_cann_get_rows(ggml_backend_cann_context & ctx, ggml_tensor * dst);
/** /**
* @brief Writes specific rows into a tensor at positions specified by indices. * @brief Writes specific rows into a tensor at positions specified by indices.
@@ -441,7 +440,7 @@ void ggml_cann_get_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
* @param ctx The backend CANN context for executing operations. * @param ctx The backend CANN context for executing operations.
* @param dst The destination tensor where the specified rows will be updated. * @param dst The destination tensor where the specified rows will be updated.
*/ */
void ggml_cann_set_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst); void ggml_cann_set_rows(ggml_backend_cann_context & ctx, ggml_tensor * dst);
/** /**
* @brief Executes matrix multiplication for the given tensor. * @brief Executes matrix multiplication for the given tensor.
@@ -454,7 +453,7 @@ void ggml_cann_set_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst);
* @param dst The destination tensor for storing the result of the matrix * @param dst The destination tensor for storing the result of the matrix
* multiplication. dst->op is `GGML_OP_MUL_MAT`. * multiplication. dst->op is `GGML_OP_MUL_MAT`.
*/ */
void ggml_cann_mul_mat(ggml_backend_cann_context& ctx, ggml_tensor* dst); void ggml_cann_mul_mat(ggml_backend_cann_context & ctx, ggml_tensor * dst);
/** /**
* @brief Applies Rotary Positional Embedding (RoPE) to the input tensor. * @brief Applies Rotary Positional Embedding (RoPE) to the input tensor.
@@ -477,7 +476,7 @@ void ggml_cann_mul_mat(ggml_backend_cann_context& ctx, ggml_tensor* dst);
* @note The function currently does not support cases where the freq_scale is * @note The function currently does not support cases where the freq_scale is
* not equal 1. * not equal 1.
*/ */
void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst); void ggml_cann_rope(ggml_backend_cann_context & ctx, ggml_tensor * dst);
/** /**
* @brief Computes the index of the maximum value along the specified dimension * @brief Computes the index of the maximum value along the specified dimension
@@ -492,7 +491,7 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst);
* @param dst The destination tensor where the indices of the maximum values will * @param dst The destination tensor where the indices of the maximum values will
* be stored. dst->op is `GGML_OP_ARGMAX`. * be stored. dst->op is `GGML_OP_ARGMAX`.
*/ */
void ggml_cann_argmax(ggml_backend_cann_context& ctx, ggml_tensor* dst); void ggml_cann_argmax(ggml_backend_cann_context & ctx, ggml_tensor * dst);
/** /**
* @brief Adds two tensors element-wise and stores the result in a destination * @brief Adds two tensors element-wise and stores the result in a destination
@@ -509,8 +508,10 @@ void ggml_cann_argmax(ggml_backend_cann_context& ctx, ggml_tensor* dst);
* @param acl_src1 The second source tensor. * @param acl_src1 The second source tensor.
* @param acl_dst The destination tensor where the result will be stored. * @param acl_dst The destination tensor where the result will be stored.
*/ */
void aclnn_add(ggml_backend_cann_context& ctx, aclTensor* acl_src0, void aclnn_add(ggml_backend_cann_context & ctx,
aclTensor* acl_src1, aclTensor* acl_dst = nullptr); aclTensor * acl_src0,
aclTensor * acl_src1,
aclTensor * acl_dst = nullptr);
/** /**
* @brief Sub two tensors element-wise and stores the result in a destination * @brief Sub two tensors element-wise and stores the result in a destination
@@ -527,8 +528,10 @@ void aclnn_add(ggml_backend_cann_context& ctx, aclTensor* acl_src0,
* @param acl_src1 The second source tensor. * @param acl_src1 The second source tensor.
* @param acl_dst The destination tensor where the result will be stored. * @param acl_dst The destination tensor where the result will be stored.
*/ */
void aclnn_sub(ggml_backend_cann_context& ctx, aclTensor* acl_src0, void aclnn_sub(ggml_backend_cann_context & ctx,
aclTensor* acl_src1, aclTensor* acl_dst = nullptr); aclTensor * acl_src0,
aclTensor * acl_src1,
aclTensor * acl_dst = nullptr);
/** /**
* @brief Performs element-wise multiplication of two tensors and stores the * @brief Performs element-wise multiplication of two tensors and stores the
@@ -546,8 +549,10 @@ void aclnn_sub(ggml_backend_cann_context& ctx, aclTensor* acl_src0,
* @param acl_other The second tensor for element-wise multiplication. * @param acl_other The second tensor for element-wise multiplication.
* @param acl_dst The destination tensor where the result will be stored. * @param acl_dst The destination tensor where the result will be stored.
*/ */
void aclnn_mul(ggml_backend_cann_context& ctx, aclTensor* acl_src, void aclnn_mul(ggml_backend_cann_context & ctx,
aclTensor* acl_other, aclTensor* acl_dst = nullptr); aclTensor * acl_src,
aclTensor * acl_other,
aclTensor * acl_dst = nullptr);
/** /**
* @brief Matrix division, optionally in-place. * @brief Matrix division, optionally in-place.
@@ -567,8 +572,10 @@ void aclnn_mul(ggml_backend_cann_context& ctx, aclTensor* acl_src,
* @param inplace Flag indicating whether to perform the operation in-place on * @param inplace Flag indicating whether to perform the operation in-place on
* `acl_src`. * `acl_src`.
*/ */
void aclnn_div(ggml_backend_cann_context& ctx, aclTensor* acl_src, void aclnn_div(ggml_backend_cann_context & ctx,
aclTensor* acl_other, aclTensor* acl_dst = nullptr); aclTensor * acl_src,
aclTensor * acl_other,
aclTensor * acl_dst = nullptr);
/** /**
* @brief Applies element-wise cosine function to the elements of a tensor. * @brief Applies element-wise cosine function to the elements of a tensor.
@@ -584,8 +591,7 @@ void aclnn_div(ggml_backend_cann_context& ctx, aclTensor* acl_src,
* @param acl_dst The destination tensor where the cosine results will be * @param acl_dst The destination tensor where the cosine results will be
* stored. * stored.
*/ */
void aclnn_cos(ggml_backend_cann_context& ctx, aclTensor* acl_src, void aclnn_cos(ggml_backend_cann_context & ctx, aclTensor * acl_src, aclTensor * acl_dst);
aclTensor* acl_dst);
/** /**
* @brief Applies element-wise sine function to the elements of a tensor. * @brief Applies element-wise sine function to the elements of a tensor.
@@ -602,8 +608,7 @@ void aclnn_cos(ggml_backend_cann_context& ctx, aclTensor* acl_src,
* @param acl_src The source tensor on which the sine function will be applied. * @param acl_src The source tensor on which the sine function will be applied.
* @param acl_dst The destination tensor where the sine results will be stored. * @param acl_dst The destination tensor where the sine results will be stored.
*/ */
void aclnn_sin(ggml_backend_cann_context& ctx, aclTensor* acl_src, void aclnn_sin(ggml_backend_cann_context & ctx, aclTensor * acl_src, aclTensor * acl_dst);
aclTensor* acl_dst);
/** /**
* @brief Prepares broadcast-compatible ACL tensors for two input tensors and one * @brief Prepares broadcast-compatible ACL tensors for two input tensors and one
@@ -621,8 +626,12 @@ void aclnn_sin(ggml_backend_cann_context& ctx, aclTensor* acl_src,
* @param acl_src1 Output pointer to the created ACL tensor corresponding to src1. * @param acl_src1 Output pointer to the created ACL tensor corresponding to src1.
* @param acl_dst Output pointer to the created ACL tensor corresponding to dst. * @param acl_dst Output pointer to the created ACL tensor corresponding to dst.
*/ */
void bcast_shape(ggml_tensor * src0, ggml_tensor * src1, ggml_tensor * dst, void bcast_shape(ggml_tensor * src0,
aclTensor ** acl_src0, aclTensor ** acl_src1, aclTensor ** acl_dst); ggml_tensor * src1,
ggml_tensor * dst,
aclTensor ** acl_src0,
aclTensor ** acl_src1,
aclTensor ** acl_dst);
/** /**
* @brief Computes the 1D transposed convolution (deconvolution) of a ggml * @brief Computes the 1D transposed convolution (deconvolution) of a ggml
@@ -637,7 +646,7 @@ void bcast_shape(ggml_tensor * src0, ggml_tensor * src1, ggml_tensor * dst,
* @param dst The destination tensor where the transposed convolution result * @param dst The destination tensor where the transposed convolution result
* will be stored. dst->op is `GGML_OP_CONV_TRANSPOSE_1D`. * will be stored. dst->op is `GGML_OP_CONV_TRANSPOSE_1D`.
*/ */
void ggml_cann_conv_transpose_1d(ggml_backend_cann_context& ctx, ggml_tensor* dst); void ggml_cann_conv_transpose_1d(ggml_backend_cann_context & ctx, ggml_tensor * dst);
/** /**
* @brief Applies the ELU (Exponential Linear Unit) activation to a ggml tensor * @brief Applies the ELU (Exponential Linear Unit) activation to a ggml tensor
@@ -662,7 +671,7 @@ void ggml_cann_conv_transpose_1d(ggml_backend_cann_context& ctx, ggml_tensor* ds
* @param dst The destination tensor where the ELU-activated result will be stored. * @param dst The destination tensor where the ELU-activated result will be stored.
* dst->op is expected to be `GGML_OP_ELU`. * dst->op is expected to be `GGML_OP_ELU`.
*/ */
void ggml_cann_elu(ggml_backend_cann_context& ctx, ggml_tensor* dst); void ggml_cann_elu(ggml_backend_cann_context & ctx, ggml_tensor * dst);
/** /**
* @brief Computes the mean of a ggml tensor element-wise using the CANN backend. * @brief Computes the mean of a ggml tensor element-wise using the CANN backend.
@@ -677,7 +686,7 @@ void ggml_cann_elu(ggml_backend_cann_context& ctx, ggml_tensor* dst);
* @param dst The destination tensor where the mean result will be stored. * @param dst The destination tensor where the mean result will be stored.
* dst->op is expected to be `GGML_OP_MEAN`. * dst->op is expected to be `GGML_OP_MEAN`.
*/ */
void ggml_cann_mean(ggml_backend_cann_context& ctx, ggml_tensor* dst); void ggml_cann_mean(ggml_backend_cann_context & ctx, ggml_tensor * dst);
/** /**
* @brief Applies 1D reflect padding to a ggml tensor using the CANN backend. * @brief Applies 1D reflect padding to a ggml tensor using the CANN backend.
@@ -692,7 +701,7 @@ void ggml_cann_mean(ggml_backend_cann_context& ctx, ggml_tensor* dst);
* @param dst The destination tensor where the padded result will be stored. * @param dst The destination tensor where the padded result will be stored.
* dst->op is expected to be `GGML_OP_PAD_REFLECT_1D`. * dst->op is expected to be `GGML_OP_PAD_REFLECT_1D`.
*/ */
void ggml_cann_pad_reflect_1d(ggml_backend_cann_context& ctx, ggml_tensor* dst); void ggml_cann_pad_reflect_1d(ggml_backend_cann_context & ctx, ggml_tensor * dst);
/** /**
* @brief Counts the number of equal elements in two ggml tensors using the CANN backend. * @brief Counts the number of equal elements in two ggml tensors using the CANN backend.
@@ -708,7 +717,7 @@ void ggml_cann_pad_reflect_1d(ggml_backend_cann_context& ctx, ggml_tensor* dst);
* @param dst The destination tensor where the result will be stored. * @param dst The destination tensor where the result will be stored.
* dst->op is expected to be `GGML_OP_COUNT_EQUAL`. * dst->op is expected to be `GGML_OP_COUNT_EQUAL`.
*/ */
void ggml_cann_count_equal(ggml_backend_cann_context& ctx, ggml_tensor* dst); void ggml_cann_count_equal(ggml_backend_cann_context & ctx, ggml_tensor * dst);
/** /**
* @brief Applies the Step activation function to a ggml tensor using the CANN backend. * @brief Applies the Step activation function to a ggml tensor using the CANN backend.
@@ -723,7 +732,7 @@ void ggml_cann_count_equal(ggml_backend_cann_context& ctx, ggml_tensor* dst);
* @param dst The destination tensor where the result will be stored. * @param dst The destination tensor where the result will be stored.
* dst->op is expected to be `GGML_OP_STEP`. * dst->op is expected to be `GGML_OP_STEP`.
*/ */
void ggml_cann_step(ggml_backend_cann_context& ctx, ggml_tensor* dst); void ggml_cann_step(ggml_backend_cann_context & ctx, ggml_tensor * dst);
/** /**
* @brief Performs the Flash Attention extended operator using the CANN backend. * @brief Performs the Flash Attention extended operator using the CANN backend.
@@ -738,59 +747,46 @@ void ggml_cann_step(ggml_backend_cann_context& ctx, ggml_tensor* dst);
* @param dst The destination tensor where the result will be stored. * @param dst The destination tensor where the result will be stored.
* dst->op is expected to be `GGML_OP_FLASH_ATTN_EXT`. * dst->op is expected to be `GGML_OP_FLASH_ATTN_EXT`.
*/ */
void ggml_cann_flash_attn_ext(ggml_backend_cann_context& ctx, ggml_tensor* dst); void ggml_cann_flash_attn_ext(ggml_backend_cann_context & ctx, ggml_tensor * dst);
/* /*
* @brief A generic wrapper for ACL resources with custom deleter support. * @brief A generic wrapper for ACL resources with custom deleter support.
*/ */
using any_acl_resource = std::unique_ptr<void, std::function<void(void*)>>; using any_acl_resource = std::unique_ptr<void, std::function<void(void *)>>;
/** /**
* @brief Trait structure used to define how to destroy a given ACL resource type. * @brief Trait structure used to define how to destroy a given ACL resource type.
* *
* @tparam T ACL resource type. * @tparam T ACL resource type.
*/ */
template<typename T> template <typename T> struct acl_resource_traits;
struct acl_resource_traits;
/** /**
* @brief Specialization for aclTensor, defines how to destroy an aclTensor resource. * @brief Specialization for aclTensor, defines how to destroy an aclTensor resource.
*/ */
template<> template <> struct acl_resource_traits<aclTensor> {
struct acl_resource_traits<aclTensor> { static void destroy(void * p) { ACL_CHECK(aclDestroyTensor(static_cast<aclTensor *>(p))); }
static void destroy(void* p) {
ACL_CHECK(aclDestroyTensor(static_cast<aclTensor*>(p)));
}
}; };
/** /**
* @brief Specialization for aclIntArray, defines how to destroy an aclIntArray resource. * @brief Specialization for aclIntArray, defines how to destroy an aclIntArray resource.
*/ */
template<> template <> struct acl_resource_traits<aclIntArray> {
struct acl_resource_traits<aclIntArray> { static void destroy(void * p) { ACL_CHECK(aclDestroyIntArray(static_cast<aclIntArray *>(p))); }
static void destroy(void* p) {
ACL_CHECK(aclDestroyIntArray(static_cast<aclIntArray*>(p)));
}
}; };
/** /**
* @brief Specialization for aclScalar, defines how to destroy an aclScalar resource. * @brief Specialization for aclScalar, defines how to destroy an aclScalar resource.
*/ */
template<> template <> struct acl_resource_traits<aclScalar> {
struct acl_resource_traits<aclScalar> { static void destroy(void * p) { ACL_CHECK(aclDestroyScalar(static_cast<aclScalar *>(p))); }
static void destroy(void* p) {
ACL_CHECK(aclDestroyScalar(static_cast<aclScalar*>(p)));
}
}; };
/** /**
* @brief Specialization for aclTensorList, defines how to destroy an aclTensorList resource. * @brief Specialization for aclTensorList, defines how to destroy an aclTensorList resource.
*/ */
template<> template <> struct acl_resource_traits<aclTensorList> {
struct acl_resource_traits<aclTensorList> { static void destroy(void * p) { ACL_CHECK(aclDestroyTensorList(static_cast<aclTensorList *>(p))); }
static void destroy(void* p) {
ACL_CHECK(aclDestroyTensorList(static_cast<aclTensorList*>(p)));
}
}; };
/** /**
@@ -800,14 +796,8 @@ struct acl_resource_traits<aclTensorList> {
* @param ptr Raw pointer to ACL resource. * @param ptr Raw pointer to ACL resource.
* @return any_acl_resource Smart pointer that handles destruction. * @return any_acl_resource Smart pointer that handles destruction.
*/ */
template<typename T> template <typename T> any_acl_resource make_acl_resource(T * ptr) {
any_acl_resource make_acl_resource(T* ptr) { return any_acl_resource(static_cast<void *>(ptr), [](void * p) { acl_resource_traits<T>::destroy(p); });
return any_acl_resource(
static_cast<void*>(ptr),
[](void* p) {
acl_resource_traits<T>::destroy(p);
}
);
} }
/** /**
@@ -817,8 +807,7 @@ any_acl_resource make_acl_resource(T* ptr) {
* @param vec Target vector to hold ACL resources. * @param vec Target vector to hold ACL resources.
* @param args Raw pointers to ACL resources. * @param args Raw pointers to ACL resources.
*/ */
template<typename... Args> template <typename... Args> void register_acl_resources(std::vector<any_acl_resource> & vec, Args *... args) {
void register_acl_resources(std::vector<any_acl_resource>& vec, Args*... args) {
(vec.emplace_back(make_acl_resource(args)), ...); (vec.emplace_back(make_acl_resource(args)), ...);
} }
@@ -826,39 +815,36 @@ void register_acl_resources(std::vector<any_acl_resource>& vec, Args*... args) {
* @brief Task class that wraps the execution of an aclnn function call. * @brief Task class that wraps the execution of an aclnn function call.
*/ */
class aclnn_task : public cann_task { class aclnn_task : public cann_task {
public: public:
aclnn_task(aclnn_func_t aclnn_func, void * workspace_addr, aclnn_task(aclnn_func_t aclnn_func,
uint64_t workspace_size, aclOpExecutor * executor, void * workspace_addr,
aclrtStream stream) : uint64_t workspace_size,
aclnn_func_(aclnn_func), aclOpExecutor * executor,
workspace_addr_(workspace_addr), aclrtStream stream) :
workspace_size_(workspace_size), aclnn_func_(aclnn_func),
executor_(executor), workspace_addr_(workspace_addr),
stream_(stream) {} workspace_size_(workspace_size),
virtual void run_task() override { executor_(executor),
ACL_CHECK(aclnn_func_(workspace_addr_, workspace_size_, executor_, stream_)); stream_(stream) {}
}
private: virtual void run_task() override { ACL_CHECK(aclnn_func_(workspace_addr_, workspace_size_, executor_, stream_)); }
aclnn_func_t aclnn_func_; private:
void * workspace_addr_; aclnn_func_t aclnn_func_;
uint64_t workspace_size_; void * workspace_addr_;
aclOpExecutor * executor_; uint64_t workspace_size_;
aclrtStream stream_; aclOpExecutor * executor_;
aclrtStream stream_;
}; };
/** /**
* @brief Task class that releases ACL resources after usage. * @brief Task class that releases ACL resources after usage.
*/ */
class release_resource_task : public cann_task { class release_resource_task : public cann_task {
public: public:
release_resource_task(std::vector<any_acl_resource>&& resources){ release_resource_task(std::vector<any_acl_resource> && resources) { resource_ = std::move(resources); }
resource_ = std::move(resources);
}
virtual void run_task() override { virtual void run_task() override { resource_.clear(); }
resource_.clear(); private:
}
private:
std::vector<any_acl_resource> resource_; std::vector<any_acl_resource> resource_;
}; };
@@ -866,38 +852,40 @@ private:
* @brief Task class for performing asynchronous memory copy operations. * @brief Task class for performing asynchronous memory copy operations.
*/ */
class async_memcpy_task : public cann_task { class async_memcpy_task : public cann_task {
public: public:
async_memcpy_task(void* dst, const void* src, size_t size, async_memcpy_task(void * dst, const void * src, size_t size, aclrtMemcpyKind kind, aclrtStream stream) :
aclrtMemcpyKind kind, aclrtStream stream) dst_(dst),
: dst_(dst), src_(src), size_(size), kind_(kind), stream_(stream) {} src_(src),
size_(size),
kind_(kind),
stream_(stream) {}
virtual void run_task() override { virtual void run_task() override { ACL_CHECK(aclrtMemcpyAsync(dst_, size_, src_, size_, kind_, stream_)); }
ACL_CHECK(aclrtMemcpyAsync(dst_, size_, src_, size_, kind_, stream_)); private:
} void * dst_;
private: const void * src_;
void* dst_; size_t size_;
const void* src_;
size_t size_;
aclrtMemcpyKind kind_; aclrtMemcpyKind kind_;
aclrtStream stream_; aclrtStream stream_;
}; };
/** /**
* @brief Task class for performing asynchronous memory set operations. * @brief Task class for performing asynchronous memory set operations.
*/ */
class async_memset_task : public cann_task { class async_memset_task : public cann_task {
public: public:
async_memset_task(void* buffer, size_t size, int32_t value, aclrtStream stream) async_memset_task(void * buffer, size_t size, int32_t value, aclrtStream stream) :
: buffer_(buffer), size_(size), value_(value), stream_(stream) {} buffer_(buffer),
size_(size),
value_(value),
stream_(stream) {}
virtual void run_task() override { virtual void run_task() override { ACL_CHECK(aclrtMemsetAsync(buffer_, size_, value_, size_, stream_)); }
ACL_CHECK(aclrtMemsetAsync(buffer_, size_, value_, size_, stream_)); private:
} void * buffer_;
private: size_t size_;
void* buffer_; int32_t value_;
size_t size_; aclrtStream stream_;
int32_t value_;
aclrtStream stream_;
}; };
/** /**
@@ -918,25 +906,24 @@ class async_memset_task : public cann_task {
* same stream are executed in queue order. * same stream are executed in queue order.
*/ */
#define GGML_CANN_CALL_ACLNN_OP(CTX, OP_NAME, ...) \ #define GGML_CANN_CALL_ACLNN_OP(CTX, OP_NAME, ...) \
do { \ do { \
uint64_t workspaceSize = 0; \ uint64_t workspaceSize = 0; \
aclOpExecutor * executor; \ aclOpExecutor * executor; \
void * workspaceAddr = nullptr; \ void * workspaceAddr = nullptr; \
ACL_CHECK(aclnn##OP_NAME##GetWorkspaceSize(__VA_ARGS__, &workspaceSize, &executor));\ ACL_CHECK(aclnn##OP_NAME##GetWorkspaceSize(__VA_ARGS__, &workspaceSize, &executor)); \
/* workspace should alloced in main thread to keep malloc order when using vmm. */ \ /* workspace should alloced in main thread to keep malloc order when using vmm. */ \
if (workspaceSize > 0) { \ if (workspaceSize > 0) { \
ggml_cann_pool_alloc workspace_allocator(CTX.pool(), workspaceSize); \ ggml_cann_pool_alloc workspace_allocator(CTX.pool(), workspaceSize); \
workspaceAddr = workspace_allocator.get(); \ workspaceAddr = workspace_allocator.get(); \
} \ } \
if (CTX.async_mode) { \ if (CTX.async_mode) { \
auto task = \ auto task = \
std::make_unique<aclnn_task>(aclnn##OP_NAME, workspaceAddr, workspaceSize, \ std::make_unique<aclnn_task>(aclnn##OP_NAME, workspaceAddr, workspaceSize, executor, CTX.stream()); \
executor, CTX.stream()); \ CTX.task_queue.submit_task(std::move(task)); \
CTX.task_queue.submit_task(std::move(task)); \ } else { \
} else { \ ACL_CHECK(aclnn##OP_NAME(workspaceAddr, workspaceSize, executor, CTX.stream())); \
ACL_CHECK(aclnn##OP_NAME(workspaceAddr, workspaceSize, executor, CTX.stream()));\ } \
} \
} while (0) } while (0)
/** /**
@@ -947,11 +934,10 @@ class async_memset_task : public cann_task {
* @param ctx Backend context which manages task submission and async mode. * @param ctx Backend context which manages task submission and async mode.
* @param args Pointers to ACL resources to be released. * @param args Pointers to ACL resources to be released.
*/ */
template <typename... Args> template <typename... Args> void ggml_cann_release_resources(ggml_backend_cann_context & ctx, Args &&... args) {
void ggml_cann_release_resources(ggml_backend_cann_context & ctx, Args &&... args) {
std::vector<any_acl_resource> resources; std::vector<any_acl_resource> resources;
register_acl_resources(resources, std::forward<Args>(args)...); register_acl_resources(resources, std::forward<Args>(args)...);
if(ctx.async_mode) { if (ctx.async_mode) {
auto task = std::make_unique<release_resource_task>(std::move(resources)); auto task = std::make_unique<release_resource_task>(std::move(resources));
ctx.task_queue.submit_task(std::move(task)); ctx.task_queue.submit_task(std::move(task));
} }
@@ -966,8 +952,11 @@ void ggml_cann_release_resources(ggml_backend_cann_context & ctx, Args &&... arg
* @param len Size of memory to copy (in bytes). * @param len Size of memory to copy (in bytes).
* @param kind Type of memory copy (host-to-device, device-to-host, etc). * @param kind Type of memory copy (host-to-device, device-to-host, etc).
*/ */
inline void ggml_cann_async_memcpy(ggml_backend_cann_context & ctx, void * dst, inline void ggml_cann_async_memcpy(ggml_backend_cann_context & ctx,
const void * src, size_t len, aclrtMemcpyKind kind) { void * dst,
const void * src,
size_t len,
aclrtMemcpyKind kind) {
if (ctx.async_mode) { if (ctx.async_mode) {
auto task = std::make_unique<async_memcpy_task>(dst, const_cast<void *>(src), len, kind, ctx.stream()); auto task = std::make_unique<async_memcpy_task>(dst, const_cast<void *>(src), len, kind, ctx.stream());
ctx.task_queue.submit_task(std::move(task)); ctx.task_queue.submit_task(std::move(task));
@@ -976,8 +965,11 @@ inline void ggml_cann_async_memcpy(ggml_backend_cann_context & ctx, void * dst,
} }
} }
inline void ggml_cann_async_memcpy(ggml_backend_cann_context * ctx, void * dst, inline void ggml_cann_async_memcpy(ggml_backend_cann_context * ctx,
const void * src, size_t len, aclrtMemcpyKind kind) { void * dst,
const void * src,
size_t len,
aclrtMemcpyKind kind) {
if (ctx->async_mode) { if (ctx->async_mode) {
auto task = std::make_unique<async_memcpy_task>(dst, const_cast<void *>(src), len, kind, ctx->stream()); auto task = std::make_unique<async_memcpy_task>(dst, const_cast<void *>(src), len, kind, ctx->stream());
ctx->task_queue.submit_task(std::move(task)); ctx->task_queue.submit_task(std::move(task));
@@ -994,8 +986,7 @@ inline void ggml_cann_async_memcpy(ggml_backend_cann_context * ctx, void * dst,
* @param size Size of the memory buffer (in bytes). * @param size Size of the memory buffer (in bytes).
* @param value Value to set in the buffer. * @param value Value to set in the buffer.
*/ */
inline void ggml_cann_async_memset(ggml_backend_cann_context & ctx, void * buffer, inline void ggml_cann_async_memset(ggml_backend_cann_context & ctx, void * buffer, size_t size, int value) {
size_t size, int value) {
if (ctx.async_mode) { if (ctx.async_mode) {
auto task = std::make_unique<async_memset_task>(buffer, size, value, ctx.stream()); auto task = std::make_unique<async_memset_task>(buffer, size, value, ctx.stream());
ctx.task_queue.submit_task(std::move(task)); ctx.task_queue.submit_task(std::move(task));
@@ -1029,7 +1020,7 @@ inline void ggml_cann_async_memset(ggml_backend_cann_context & ctx, void * buffe
* @param dst The destination tensor where the expert-weighted token outputs are stored. * @param dst The destination tensor where the expert-weighted token outputs are stored.
* Expected to be of shape [M, K, N, 1]. * Expected to be of shape [M, K, N, 1].
*/ */
void ggml_cann_mul_mat_id(ggml_backend_cann_context& ctx, ggml_tensor* dst); void ggml_cann_mul_mat_id(ggml_backend_cann_context & ctx, ggml_tensor * dst);
/** /**
* @brief Check whether a tensor is a weight tensor for matrix multiplication. * @brief Check whether a tensor is a weight tensor for matrix multiplication.
@@ -1041,20 +1032,14 @@ void ggml_cann_mul_mat_id(ggml_backend_cann_context& ctx, ggml_tensor* dst);
* *
* @param tensor Pointer to the target ggml_tensor object (const-qualified). * @param tensor Pointer to the target ggml_tensor object (const-qualified).
*/ */
static bool is_matmul_weight(const ggml_tensor* tensor) { static bool is_matmul_weight(const ggml_tensor * tensor) {
std::string name = ggml_get_name(tensor); std::string name = ggml_get_name(tensor);
static const std::unordered_set<std::string> weight_suffixes{ static const std::unordered_set<std::string> weight_suffixes{ "output.weight", "attn_q.weight",
"output.weight", "attn_k.weight", "attn_v.weight",
"attn_q.weight", "attn_output.weight", "ffn_gate.weight",
"attn_k.weight", "ffn_up.weight", "ffn_down.weight" };
"attn_v.weight",
"attn_output.weight",
"ffn_gate.weight",
"ffn_up.weight",
"ffn_down.weight"
};
for (const auto& suffix : weight_suffixes) { for (const auto & suffix : weight_suffixes) {
if (name.find(suffix) != std::string::npos) { if (name.find(suffix) != std::string::npos) {
return true; return true;
} }
@@ -1078,14 +1063,13 @@ static bool is_matmul_weight(const ggml_tensor* tensor) {
* @param ctx The CANN backend context used to manage execution and resources. * @param ctx The CANN backend context used to manage execution and resources.
* @param dst The destination tensor. * @param dst The destination tensor.
*/ */
template <auto binary_op> template <auto binary_op> void ggml_cann_binary_op(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
void ggml_cann_binary_op(ggml_backend_cann_context& ctx, ggml_tensor* dst) { ggml_tensor * src0 = dst->src[0];
ggml_tensor* src0 = dst->src[0]; ggml_tensor * src1 = dst->src[1];
ggml_tensor* src1 = dst->src[1];
aclTensor* acl_src0; aclTensor * acl_src0;
aclTensor* acl_src1; aclTensor * acl_src1;
aclTensor* acl_dst; aclTensor * acl_dst;
// Need bcast // Need bcast
bcast_shape(src0, src1, dst, &acl_src0, &acl_src1, &acl_dst); bcast_shape(src0, src1, dst, &acl_src0, &acl_src1, &acl_dst);
@@ -1094,7 +1078,6 @@ void ggml_cann_binary_op(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
ggml_cann_release_resources(ctx, acl_src0, acl_src1, acl_dst); ggml_cann_release_resources(ctx, acl_src0, acl_src1, acl_dst);
} }
/** /**
* @brief Applies a unary operation to an input tensor using the CANN backend. * @brief Applies a unary operation to an input tensor using the CANN backend.
* *
@@ -1107,12 +1090,12 @@ void ggml_cann_binary_op(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
* @param ctx The CANN backend context for managing resources and execution. * @param ctx The CANN backend context for managing resources and execution.
* @param dst The destination tensor. Its src[0] is treated as the input tensor. * @param dst The destination tensor. Its src[0] is treated as the input tensor.
*/ */
template <void unary_op(ggml_backend_cann_context&, aclTensor*, aclTensor*)> template <void unary_op(ggml_backend_cann_context &, aclTensor *, aclTensor *)>
void ggml_cann_op_unary(ggml_backend_cann_context& ctx, ggml_tensor* dst) { void ggml_cann_op_unary(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
ggml_tensor* src = dst->src[0]; ggml_tensor * src = dst->src[0];
aclTensor* acl_src = ggml_cann_create_tensor(src); aclTensor * acl_src = ggml_cann_create_tensor(src);
aclTensor* acl_dst = ggml_cann_create_tensor(dst); aclTensor * acl_dst = ggml_cann_create_tensor(dst);
unary_op(ctx, acl_src, acl_dst); unary_op(ctx, acl_src, acl_dst);
ggml_cann_release_resources(ctx, acl_src, acl_dst); ggml_cann_release_resources(ctx, acl_src, acl_dst);
@@ -1138,9 +1121,9 @@ template <void unary_op(ggml_backend_cann_context&, aclTensor*, aclTensor*)>
* *
* @see GGML_CANN_CALL_OP_UNARY * @see GGML_CANN_CALL_OP_UNARY
*/ */
void ggml_cann_op_unary( void ggml_cann_op_unary(std::function<void(ggml_backend_cann_context &, aclTensor *, aclTensor *)> unary_op,
std::function<void(ggml_backend_cann_context&, aclTensor*, aclTensor*)> unary_op, ggml_backend_cann_context & ctx,
ggml_backend_cann_context& ctx, ggml_tensor* dst); ggml_tensor * dst);
/** /**
* @brief Applies a gated (GLU-style) unary operation using the CANN backend. * @brief Applies a gated (GLU-style) unary operation using the CANN backend.
@@ -1172,9 +1155,9 @@ void ggml_cann_op_unary(
* *
* @see GGML_CANN_CALL_OP_UNARY_GATED * @see GGML_CANN_CALL_OP_UNARY_GATED
*/ */
void ggml_cann_op_unary_gated( void ggml_cann_op_unary_gated(std::function<void(ggml_backend_cann_context &, aclTensor *, aclTensor *)> unary_op,
std::function<void(ggml_backend_cann_context&, aclTensor*, aclTensor*)> unary_op, ggml_backend_cann_context & ctx,
ggml_backend_cann_context& ctx, ggml_tensor* dst); ggml_tensor * dst);
/** /**
* @brief Helper macro to call a unary ACL operator via ggml_cann_op_unary. * @brief Helper macro to call a unary ACL operator via ggml_cann_op_unary.
@@ -1197,16 +1180,13 @@ void ggml_cann_op_unary_gated(
* @see ggml_cann_op_unary * @see ggml_cann_op_unary
* @see GGML_CANN_CALL_ACLNN_OP * @see GGML_CANN_CALL_ACLNN_OP
*/ */
#define GGML_CANN_CALL_OP_UNARY(OP_NAME) \ #define GGML_CANN_CALL_OP_UNARY(OP_NAME) \
do { \ do { \
auto lambda = [](ggml_backend_cann_context& ctx, \ auto lambda = [](ggml_backend_cann_context & ctx, aclTensor * acl_src, aclTensor * acl_dst) { \
aclTensor* acl_src, \ GGML_CANN_CALL_ACLNN_OP(ctx, OP_NAME, acl_src, acl_dst); \
aclTensor* acl_dst) { \ }; \
GGML_CANN_CALL_ACLNN_OP(ctx, OP_NAME, acl_src, acl_dst); \ ggml_cann_op_unary(lambda, ctx, dst); \
}; \ } while (0)
ggml_cann_op_unary(lambda, ctx, dst); \
} \
while (0)
/** /**
* @brief Helper macro to call a gated unary ACL operator via ggml_cann_op_unary_gated. * @brief Helper macro to call a gated unary ACL operator via ggml_cann_op_unary_gated.
@@ -1229,15 +1209,12 @@ void ggml_cann_op_unary_gated(
* @see ggml_cann_op_unary_gated * @see ggml_cann_op_unary_gated
* @see GGML_CANN_CALL_ACLNN_OP * @see GGML_CANN_CALL_ACLNN_OP
*/ */
#define GGML_CANN_CALL_OP_UNARY_GATED(OP_NAME) \ #define GGML_CANN_CALL_OP_UNARY_GATED(OP_NAME) \
do { \ do { \
auto lambda = [](ggml_backend_cann_context& ctx, \ auto lambda = [](ggml_backend_cann_context & ctx, aclTensor * acl_src, aclTensor * acl_dst) { \
aclTensor* acl_src, \ GGML_CANN_CALL_ACLNN_OP(ctx, OP_NAME, acl_src, acl_dst); \
aclTensor* acl_dst) { \ }; \
GGML_CANN_CALL_ACLNN_OP(ctx, OP_NAME, acl_src, acl_dst); \ ggml_cann_op_unary_gated(lambda, ctx, dst); \
}; \ } while (0)
ggml_cann_op_unary_gated(lambda, ctx, dst); \
} \
while (0)
#endif // CANN_ACLNN_OPS #endif // CANN_ACLNN_OPS

191
ggml/src/ggml-cann/common.h Executable file → Normal file
View File

@@ -44,7 +44,7 @@
#include "../include/ggml.h" #include "../include/ggml.h"
#include "../ggml-impl.h" #include "../ggml-impl.h"
#define MATRIX_ROW_PADDING 512 #define MATRIX_ROW_PADDING 512
#define GGML_CANN_MAX_STREAMS 8 #define GGML_CANN_MAX_STREAMS 8
/** /**
@@ -56,8 +56,7 @@
* @param line The line number at which the error occurred. * @param line The line number at which the error occurred.
* @param msg The error message. * @param msg The error message.
*/ */
[[noreturn]] void ggml_cann_error(const char* stmt, const char* func, [[noreturn]] void ggml_cann_error(const char * stmt, const char * func, const char * file, int line, const char * msg);
const char* file, int line, const char* msg);
/** /**
* @brief Checks the result of a CANN function call and invokes the error * @brief Checks the result of a CANN function call and invokes the error
@@ -89,25 +88,24 @@ struct ggml_cann_device_info {
* @brief Information about a single CANN device. * @brief Information about a single CANN device.
*/ */
struct cann_device_info { struct cann_device_info {
int cc; /**< Compute capability. */ int cc; /**< Compute capability. */
size_t smpb; /**< Maximum shared memory per block. */ size_t smpb; /**< Maximum shared memory per block. */
bool vmm; /**< Virtual memory support. */ bool vmm; /**< Virtual memory support. */
size_t vmm_granularity; /**< Granularity of virtual memory. */ size_t vmm_granularity; /**< Granularity of virtual memory. */
size_t total_vram; /**< Total video RAM available on the device. */ size_t total_vram; /**< Total video RAM available on the device. */
}; };
cann_device_info devices[GGML_CANN_MAX_DEVICES] = cann_device_info devices[GGML_CANN_MAX_DEVICES] = {}; /**< Array of CANN device information. */
{}; /**< Array of CANN device information. */
}; };
const ggml_cann_device_info& ggml_cann_info(); const ggml_cann_device_info & ggml_cann_info();
void ggml_cann_set_device(int32_t device); void ggml_cann_set_device(int32_t device);
int32_t ggml_cann_get_device(); int32_t ggml_cann_get_device();
std::optional<std::string> get_env(const std::string& name); std::optional<std::string> get_env(const std::string & name);
bool parse_bool(const std::string& value); bool parse_bool(const std::string & value);
int parse_integer(const std::string& value); int parse_integer(const std::string & value);
/** /**
* @brief Abstract base class for memory pools used by CANN. * @brief Abstract base class for memory pools used by CANN.
@@ -126,7 +124,7 @@ struct ggml_cann_pool {
* will be stored. * will be stored.
* @return Pointer to the allocated memory block. * @return Pointer to the allocated memory block.
*/ */
virtual void* alloc(size_t size, size_t* actual_size) = 0; virtual void * alloc(size_t size, size_t * actual_size) = 0;
/** /**
* @brief Frees a previously allocated memory block. * @brief Frees a previously allocated memory block.
@@ -136,16 +134,16 @@ struct ggml_cann_pool {
* @note Note that all CANN opertors are running async. Make sure memory is * @note Note that all CANN opertors are running async. Make sure memory is
* still avaiable before this operator finished. * still avaiable before this operator finished.
*/ */
virtual void free(void* ptr, size_t size) = 0; virtual void free(void * ptr, size_t size) = 0;
}; };
/** /**
* @brief RAII wrapper for managing memory allocations from a CANN memory pool. * @brief RAII wrapper for managing memory allocations from a CANN memory pool.
*/ */
struct ggml_cann_pool_alloc { struct ggml_cann_pool_alloc {
ggml_cann_pool* pool = nullptr; /**< Pointer to the memory pool. */ ggml_cann_pool * pool = nullptr; /**< Pointer to the memory pool. */
void* ptr = nullptr; /**< Pointer to the allocated memory block. */ void * ptr = nullptr; /**< Pointer to the allocated memory block. */
size_t actual_size = 0; /**< Actual size of the allocated memory block. */ size_t actual_size = 0; /**< Actual size of the allocated memory block. */
/** /**
* @brief Default constructor. * @brief Default constructor.
@@ -156,16 +154,14 @@ struct ggml_cann_pool_alloc {
* @brief Constructor that initializes the memory pool. * @brief Constructor that initializes the memory pool.
* @param pool Reference to the memory pool. * @param pool Reference to the memory pool.
*/ */
explicit ggml_cann_pool_alloc(ggml_cann_pool& pool) : pool(&pool) {} explicit ggml_cann_pool_alloc(ggml_cann_pool & pool) : pool(&pool) {}
/** /**
* @brief Constructor that initializes the memory pool and allocates memory. * @brief Constructor that initializes the memory pool and allocates memory.
* @param pool Reference to the memory pool. * @param pool Reference to the memory pool.
* @param size Size of the memory block to allocate. * @param size Size of the memory block to allocate.
*/ */
ggml_cann_pool_alloc(ggml_cann_pool& pool, size_t size) : pool(&pool) { ggml_cann_pool_alloc(ggml_cann_pool & pool, size_t size) : pool(&pool) { alloc(size); }
alloc(size);
}
/** /**
* @brief Destructor that frees the allocated memory block. * @brief Destructor that frees the allocated memory block.
@@ -181,7 +177,7 @@ struct ggml_cann_pool_alloc {
* @param size Size of the memory block to allocate. * @param size Size of the memory block to allocate.
* @return Pointer to the allocated memory block. * @return Pointer to the allocated memory block.
*/ */
void* alloc(size_t size) { void * alloc(size_t size) {
GGML_ASSERT(pool != nullptr); GGML_ASSERT(pool != nullptr);
GGML_ASSERT(ptr == nullptr); GGML_ASSERT(ptr == nullptr);
ptr = pool->alloc(size, &this->actual_size); ptr = pool->alloc(size, &this->actual_size);
@@ -194,7 +190,7 @@ struct ggml_cann_pool_alloc {
* @param size Size of the memory block to allocate. * @param size Size of the memory block to allocate.
* @return Pointer to the allocated memory block. * @return Pointer to the allocated memory block.
*/ */
void* alloc(ggml_cann_pool& pool, size_t size) { void * alloc(ggml_cann_pool & pool, size_t size) {
this->pool = &pool; this->pool = &pool;
return alloc(size); return alloc(size);
} }
@@ -203,25 +199,25 @@ struct ggml_cann_pool_alloc {
* @brief Gets the pointer to the allocated memory block. * @brief Gets the pointer to the allocated memory block.
* @return Pointer to the allocated memory block. * @return Pointer to the allocated memory block.
*/ */
void* get() { return ptr; } void * get() { return ptr; }
// Deleted copy constructor // Deleted copy constructor
ggml_cann_pool_alloc(const ggml_cann_pool_alloc&) = delete; ggml_cann_pool_alloc(const ggml_cann_pool_alloc &) = delete;
// Deleted move constructor // Deleted move constructor
ggml_cann_pool_alloc(ggml_cann_pool_alloc&&) = delete; ggml_cann_pool_alloc(ggml_cann_pool_alloc &&) = delete;
// Deleted copy assignment operator // Deleted copy assignment operator
ggml_cann_pool_alloc& operator=(const ggml_cann_pool_alloc&) = delete; ggml_cann_pool_alloc & operator=(const ggml_cann_pool_alloc &) = delete;
// Deleted move assignment operator // Deleted move assignment operator
ggml_cann_pool_alloc& operator=(ggml_cann_pool_alloc&&) = delete; ggml_cann_pool_alloc & operator=(ggml_cann_pool_alloc &&) = delete;
}; };
/** /**
* @brief Function pointer type for ACLNN operator calls. * @brief Function pointer type for ACLNN operator calls.
*/ */
using aclnn_func_t = aclnnStatus (*)(void*, uint64_t, aclOpExecutor*, aclrtStream); using aclnn_func_t = aclnnStatus (*)(void *, uint64_t, aclOpExecutor *, aclrtStream);
/** /**
* @brief Base class for all CANN tasks to be submitted to the task queue. * @brief Base class for all CANN tasks to be submitted to the task queue.
@@ -229,7 +225,7 @@ using aclnn_func_t = aclnnStatus (*)(void*, uint64_t, aclOpExecutor*, aclrtStrea
* Users should override the run_task() method with actual task logic. * Users should override the run_task() method with actual task logic.
*/ */
class cann_task { class cann_task {
public: public:
virtual void run_task() {} virtual void run_task() {}
}; };
@@ -237,16 +233,20 @@ public:
* @brief A lock-free ring-buffer based task queue for asynchronously executing cann_task instances. * @brief A lock-free ring-buffer based task queue for asynchronously executing cann_task instances.
*/ */
class cann_task_queue { class cann_task_queue {
public: public:
/** /**
* @brief Constructs a task queue with a fixed power-of-two capacity for a specific device. * @brief Constructs a task queue with a fixed power-of-two capacity for a specific device.
* *
* @param capacity Queue capacity. Must be a power of 2. * @param capacity Queue capacity. Must be a power of 2.
* @param device Target device ID (used for context setting). * @param device Target device ID (used for context setting).
*/ */
explicit cann_task_queue(size_t capacity, int32_t device) explicit cann_task_queue(size_t capacity, int32_t device) :
: buffer_(capacity), capacity_(capacity), head_(0), tail_(0), buffer_(capacity),
running_(false), device_(device) { capacity_(capacity),
head_(0),
tail_(0),
running_(false),
device_(device) {
GGML_ASSERT((capacity & (capacity - 1)) == 0 && "capacity must be power of 2"); GGML_ASSERT((capacity & (capacity - 1)) == 0 && "capacity must be power of 2");
mask_ = capacity_ - 1; mask_ = capacity_ - 1;
} }
@@ -257,7 +257,7 @@ public:
* @param item Unique pointer to the task. * @param item Unique pointer to the task.
* @return true if the task was successfully enqueued, false if the queue was full. * @return true if the task was successfully enqueued, false if the queue was full.
*/ */
bool enqueue(std::unique_ptr<cann_task>&& item) { bool enqueue(std::unique_ptr<cann_task> && item) {
size_t next_tail = (tail_ + 1) & mask_; size_t next_tail = (tail_ + 1) & mask_;
if (next_tail == head_) { if (next_tail == head_) {
@@ -276,17 +276,16 @@ public:
* *
* @param task Task to be submitted. * @param task Task to be submitted.
*/ */
void submit_task(std::unique_ptr<cann_task>&& task) { void submit_task(std::unique_ptr<cann_task> && task) {
while(!enqueue(std::move(task))) { while (!enqueue(std::move(task))) {
std::this_thread::yield(); std::this_thread::yield();
continue; continue;
} }
if (!running_) { if (!running_) {
running_ = true; running_ = true;
thread_ = std::thread(&cann_task_queue::execute, this); thread_ = std::thread(&cann_task_queue::execute, this);
} }
} }
/** /**
@@ -309,7 +308,7 @@ public:
} }
} }
private: private:
/** /**
* @brief Worker thread function that continuously dequeues and executes tasks. * @brief Worker thread function that continuously dequeues and executes tasks.
*/ */
@@ -317,7 +316,7 @@ private:
ggml_cann_set_device(device_); ggml_cann_set_device(device_);
while (running_) { while (running_) {
if(head_ == tail_) { if (head_ == tail_) {
std::this_thread::yield(); std::this_thread::yield();
continue; continue;
} }
@@ -330,24 +329,24 @@ private:
} }
std::vector<std::unique_ptr<cann_task>> buffer_; std::vector<std::unique_ptr<cann_task>> buffer_;
const size_t capacity_; const size_t capacity_;
size_t mask_; size_t mask_;
size_t head_; size_t head_;
size_t tail_; size_t tail_;
bool running_; bool running_;
std::thread thread_; std::thread thread_;
int32_t device_; int32_t device_;
}; };
#ifdef USE_ACL_GRAPH #ifdef USE_ACL_GRAPH
struct ggml_graph_node_properties { struct ggml_graph_node_properties {
// dst tensor // dst tensor
void * node_address; void * node_address;
int64_t ne[GGML_MAX_DIMS]; int64_t ne[GGML_MAX_DIMS];
size_t nb[GGML_MAX_DIMS]; size_t nb[GGML_MAX_DIMS];
// src tensor // src tensor
void * src_address[GGML_MAX_SRC]; void * src_address[GGML_MAX_SRC];
int64_t src_ne[GGML_MAX_SRC][GGML_MAX_DIMS]; int64_t src_ne[GGML_MAX_SRC][GGML_MAX_DIMS];
size_t src_nb[GGML_MAX_SRC][GGML_MAX_DIMS]; size_t src_nb[GGML_MAX_SRC][GGML_MAX_DIMS];
@@ -376,13 +375,11 @@ struct ggml_cann_graph {
* move existing graphs to the front (most recently used), and clear the cache. * move existing graphs to the front (most recently used), and clear the cache.
*/ */
struct ggml_cann_graph_lru_cache { struct ggml_cann_graph_lru_cache {
size_t capacity; /**< Maximum number of graphs in the cache. */ size_t capacity; /**< Maximum number of graphs in the cache. */
std::list<ggml_cann_graph*> cache_list; /**< List storing cached graphs as raw pointers. */ std::list<ggml_cann_graph *> cache_list; /**< List storing cached graphs as raw pointers. */
ggml_cann_graph_lru_cache() { ggml_cann_graph_lru_cache() { capacity = parse_integer(get_env("GGML_CANN_GRAPH_CACHE_CAPACITY").value_or("12")); }
capacity = parse_integer(get_env("GGML_CANN_GRAPH_CACHE_CAPACITY").value_or("12"));
}
/** /**
* @brief Push a new graph to the front of the cache. * @brief Push a new graph to the front of the cache.
@@ -390,11 +387,11 @@ struct ggml_cann_graph_lru_cache {
* @param new_node Pointer to the new ggml_cann_graph to cache. * @param new_node Pointer to the new ggml_cann_graph to cache.
* Ownership is transferred to the cache (cache will delete it). * Ownership is transferred to the cache (cache will delete it).
*/ */
void push(ggml_cann_graph* new_node) { void push(ggml_cann_graph * new_node) {
if (cache_list.size() >= capacity) { if (cache_list.size() >= capacity) {
ggml_cann_graph* old = cache_list.back(); ggml_cann_graph * old = cache_list.back();
cache_list.pop_back(); cache_list.pop_back();
delete old; // free the old graph delete old; // free the old graph
} }
cache_list.push_front(new_node); cache_list.push_front(new_node);
} }
@@ -403,7 +400,7 @@ struct ggml_cann_graph_lru_cache {
* @brief Move an existing graph to the front of the cache. * @brief Move an existing graph to the front of the cache.
* @param node Pointer to the ggml_cann_graph to move. * @param node Pointer to the ggml_cann_graph to move.
*/ */
void move_to_front(ggml_cann_graph* node) { void move_to_front(ggml_cann_graph * node) {
cache_list.remove(node); cache_list.remove(node);
cache_list.push_front(node); cache_list.push_front(node);
} }
@@ -421,92 +418,89 @@ struct ggml_cann_graph_lru_cache {
/** /**
* @brief Destructor that clears the cache and frees all cached graphs. * @brief Destructor that clears the cache and frees all cached graphs.
*/ */
~ggml_cann_graph_lru_cache() { ~ggml_cann_graph_lru_cache() { clear(); }
clear();
}
}; };
#endif // USE_ACL_GRAPH #endif // USE_ACL_GRAPH
struct ggml_cann_rope_cache { struct ggml_cann_rope_cache {
~ggml_cann_rope_cache() { ~ggml_cann_rope_cache() {
if(theta_scale_cache != nullptr) { if (theta_scale_cache != nullptr) {
ACL_CHECK(aclrtFree(theta_scale_cache)); ACL_CHECK(aclrtFree(theta_scale_cache));
} }
if(sin_cache != nullptr) { if (sin_cache != nullptr) {
ACL_CHECK(aclrtFree(sin_cache)); ACL_CHECK(aclrtFree(sin_cache));
} }
if(cos_cache != nullptr) { if (cos_cache != nullptr) {
ACL_CHECK(aclrtFree(cos_cache)); ACL_CHECK(aclrtFree(cos_cache));
} }
} }
void* theta_scale_cache = nullptr; void * theta_scale_cache = nullptr;
int64_t theta_scale_length = 0; int64_t theta_scale_length = 0;
// sin/cos cache, used only to accelerate first layer on each device // sin/cos cache, used only to accelerate first layer on each device
void* sin_cache = nullptr; void * sin_cache = nullptr;
void* cos_cache = nullptr; void * cos_cache = nullptr;
int64_t position_length = 0; int64_t position_length = 0;
// Properties to check before reusing the sincos cache // Properties to check before reusing the sincos cache
bool cached = false; bool cached = false;
float ext_factor = 0.0f; float ext_factor = 0.0f;
float theta_scale = 0.0f; float theta_scale = 0.0f;
float freq_scale = 0.0f; float freq_scale = 0.0f;
float attn_factor = 0.0f; float attn_factor = 0.0f;
bool is_neox = false; bool is_neox = false;
}; };
struct ggml_cann_tensor_cache { struct ggml_cann_tensor_cache {
~ggml_cann_tensor_cache() { ~ggml_cann_tensor_cache() {
if(cache != nullptr) { if (cache != nullptr) {
ACL_CHECK(aclrtFree(cache)); ACL_CHECK(aclrtFree(cache));
} }
} }
void* cache = nullptr; void * cache = nullptr;
int64_t size = 0; int64_t size = 0;
}; };
/** /**
* @brief Context for managing CANN backend operations. * @brief Context for managing CANN backend operations.
*/ */
struct ggml_backend_cann_context { struct ggml_backend_cann_context {
int32_t device; /**< Device ID. */ int32_t device; /**< Device ID. */
std::string name; /**< Name of the device. */ std::string name; /**< Name of the device. */
std::string description; /**< Description of the device. */ std::string description; /**< Description of the device. */
aclrtEvent copy_event = nullptr; /**< Event for managing copy operations. */ aclrtEvent copy_event = nullptr; /**< Event for managing copy operations. */
#ifdef USE_ACL_GRAPH #ifdef USE_ACL_GRAPH
/// Cached CANN ACL graph used for executing the current ggml computation graph. /// Cached CANN ACL graph used for executing the current ggml computation graph.
ggml_cann_graph_lru_cache graph_lru_cache; ggml_cann_graph_lru_cache graph_lru_cache;
bool acl_graph_mode = true; bool acl_graph_mode = true;
#endif #endif
cann_task_queue task_queue; cann_task_queue task_queue;
bool async_mode; bool async_mode;
// Rope Cache // Rope Cache
ggml_cann_rope_cache rope_cache; ggml_cann_rope_cache rope_cache;
// Constant Pool // Constant Pool
ggml_cann_tensor_cache rms_norm_one_tensor_cache; ggml_cann_tensor_cache rms_norm_one_tensor_cache;
ggml_cann_tensor_cache rms_norm_zero_tensor_cache; ggml_cann_tensor_cache rms_norm_zero_tensor_cache;
aclrtStream streams[GGML_CANN_MAX_STREAMS] = {nullptr}; /**< Array of streams for the device. */ aclrtStream streams[GGML_CANN_MAX_STREAMS] = { nullptr }; /**< Array of streams for the device. */
/** /**
* @brief Constructor for initializing the context with a given device. * @brief Constructor for initializing the context with a given device.
* @param device Device ID. * @param device Device ID.
*/ */
explicit ggml_backend_cann_context(int device) explicit ggml_backend_cann_context(int device) :
: device(device), name("CANN" + std::to_string(device)), task_queue(1024, device) { device(device),
name("CANN" + std::to_string(device)),
task_queue(1024, device) {
ggml_cann_set_device(device); ggml_cann_set_device(device);
description = aclrtGetSocName(); description = aclrtGetSocName();
async_mode = parse_bool(get_env("GGML_CANN_ASYNC_MODE").value_or("")); async_mode = parse_bool(get_env("GGML_CANN_ASYNC_MODE").value_or(""));
GGML_LOG_INFO("%s: device %d async operator submission is %s\n", __func__, GGML_LOG_INFO("%s: device %d async operator submission is %s\n", __func__, device, async_mode ? "ON" : "OFF");
device, async_mode ? "ON" : "OFF");
#ifdef USE_ACL_GRAPH #ifdef USE_ACL_GRAPH
acl_graph_mode = parse_bool(get_env("GGML_CANN_ACL_GRAPH").value_or("on")); acl_graph_mode = parse_bool(get_env("GGML_CANN_ACL_GRAPH").value_or("on"));
GGML_LOG_INFO("%s: device %d execution mode is %s (%s)\n", GGML_LOG_INFO("%s: device %d execution mode is %s (%s)\n", __func__, device, acl_graph_mode ? "GRAPH" : "EAGER",
__func__, device, acl_graph_mode ? "acl graph enabled" : "acl graph disabled");
acl_graph_mode ? "GRAPH" : "EAGER",
acl_graph_mode ? "acl graph enabled" : "acl graph disabled");
#endif #endif
} }
@@ -549,8 +543,7 @@ struct ggml_backend_cann_context {
aclrtStream stream() { return stream(0); } aclrtStream stream() { return stream(0); }
// TODO: each stream should have a memory pool. // TODO: each stream should have a memory pool.
std::unique_ptr<ggml_cann_pool> std::unique_ptr<ggml_cann_pool> mem_pool; /**< Memory pool for the device. */
mem_pool; /**< Memory pool for the device. */
/** /**
* @brief Create a new memory pool for a given device. * @brief Create a new memory pool for a given device.
@@ -563,7 +556,7 @@ struct ggml_backend_cann_context {
* @brief Get or create the memory pool for the context. * @brief Get or create the memory pool for the context.
* @return Reference to the memory pool. * @return Reference to the memory pool.
*/ */
ggml_cann_pool& pool() { ggml_cann_pool & pool() {
if (mem_pool == nullptr) { if (mem_pool == nullptr) {
mem_pool = new_pool_for_device(device); mem_pool = new_pool_for_device(device);
} }

1109
ggml/src/ggml-cann/ggml-cann.cpp Executable file → Normal file

File diff suppressed because it is too large Load Diff