CANN: format code using .clang-format (#15863)

This commit applies .clang-format rules to all source files under the
ggml-cann directory to ensure consistent coding style and readability.
The .clang-format option `SortIncludes: false` has been set to disable
automatic reordering of include directives.
No functional changes are introduced.

Co-authored-by: hipudding <huafengchun@gmail.com>
This commit is contained in:
Chenguang Li
2025-10-16 16:41:11 +08:00
committed by GitHub
parent 6f5d924637
commit 7a50cf388a
6 changed files with 2063 additions and 2332 deletions

49
ggml/src/ggml-cann/acl_tensor.cpp Executable file → Normal file
View File

@@ -51,8 +51,11 @@ aclDataType ggml_cann_type_mapping(ggml_type type) {
return ACL_DT_UNDEFINED;
}
aclTensor* ggml_cann_create_tensor(const ggml_tensor* tensor, int64_t* ne,
size_t* nb, int64_t dims, aclFormat format,
aclTensor * ggml_cann_create_tensor(const ggml_tensor * tensor,
int64_t * ne,
size_t * nb,
int64_t dims,
aclFormat format,
size_t offset) {
// If tensor is bcasted, Up to GGML_MAX_DIMS additional dimensions will be
// added.
@@ -84,10 +87,8 @@ aclTensor* ggml_cann_create_tensor(const ggml_tensor* tensor, int64_t* ne,
std::reverse(acl_ne, acl_ne + final_dims);
std::reverse(acl_stride, acl_stride + final_dims);
aclTensor* acl_tensor = aclCreateTensor(
acl_ne, final_dims, ggml_cann_type_mapping(tensor->type), acl_stride,
elem_offset, format, &acl_storage_len, 1,
tensor->data);
aclTensor * acl_tensor = aclCreateTensor(acl_ne, final_dims, ggml_cann_type_mapping(tensor->type), acl_stride,
elem_offset, format, &acl_storage_len, 1, tensor->data);
return acl_tensor;
}
@@ -104,7 +105,8 @@ bool ggml_cann_need_bcast(const ggml_tensor* t0, const ggml_tensor* t1) {
int64_t ggml_cann_get_bcast_shape(const ggml_tensor * src0,
const ggml_tensor * src1,
int64_t * bcast_src0_ne,
int64_t* bcast_src1_ne, size_t* bcast_src0_nb,
int64_t * bcast_src1_ne,
size_t * bcast_src0_nb,
size_t * bcast_src1_nb) {
GGML_ASSERT(ggml_can_repeat(src1, src0));
int bcast_dim_cnt = 0;
@@ -119,21 +121,26 @@ int64_t ggml_cann_get_bcast_shape(const ggml_tensor* src0,
// Need to add an extra dim.
bcast_src0_ne[bcast_dim_cnt] = nr;
bcast_src1_ne[bcast_dim_cnt] = 1;
bcast_src0_nb[bcast_dim_cnt] = bcast_src0_nb[bcast_dim_cnt - 1] *
bcast_src0_ne[bcast_dim_cnt - 1];
bcast_src1_nb[bcast_dim_cnt] = bcast_src1_nb[bcast_dim_cnt - 1] *
bcast_src1_ne[bcast_dim_cnt - 1];
bcast_src0_nb[bcast_dim_cnt] = bcast_src0_nb[bcast_dim_cnt - 1] * bcast_src0_ne[bcast_dim_cnt - 1];
bcast_src1_nb[bcast_dim_cnt] = bcast_src1_nb[bcast_dim_cnt - 1] * bcast_src1_ne[bcast_dim_cnt - 1];
bcast_dim_cnt++;
}
}
return bcast_dim_cnt;
}
int64_t ggml_cann_get_mulmat_bcast_shape(
const int64_t* input_ne, const int64_t* weight_ne, const int64_t* dst_ne,
const size_t* input_nb, const size_t* weight_nb, const size_t* dst_nb,
int64_t* bcast_input_ne, int64_t* bcast_weight_ne, int64_t* bcast_dst_ne,
size_t* bcast_input_nb, size_t* bcast_weight_nb, size_t* bcast_dst_nb) {
int64_t ggml_cann_get_mulmat_bcast_shape(const int64_t * input_ne,
const int64_t * weight_ne,
const int64_t * dst_ne,
const size_t * input_nb,
const size_t * weight_nb,
const size_t * dst_nb,
int64_t * bcast_input_ne,
int64_t * bcast_weight_ne,
int64_t * bcast_dst_ne,
size_t * bcast_input_nb,
size_t * bcast_weight_nb,
size_t * bcast_dst_nb) {
// input and dst shoule in same shape, except first two dims.
GGML_ASSERT(input_ne[2] == dst_ne[2]);
GGML_ASSERT(input_ne[3] == dst_ne[3]);
@@ -169,13 +176,9 @@ int64_t ggml_cann_get_mulmat_bcast_shape(
bcast_input_ne[bcast_dim_cnt] = input_ne[i] / nr;
bcast_dst_ne[bcast_dim_cnt] = dst_ne[i] / nr;
bcast_weight_ne[bcast_dim_cnt] = weight_ne[i];
bcast_input_nb[bcast_dim_cnt] = bcast_input_nb[bcast_dim_cnt - 1] *
bcast_input_ne[bcast_dim_cnt - 1];
bcast_dst_nb[bcast_dim_cnt] = bcast_dst_nb[bcast_dim_cnt - 1] *
bcast_dst_ne[bcast_dim_cnt - 1];
bcast_weight_nb[bcast_dim_cnt] =
bcast_weight_nb[bcast_dim_cnt - 1] *
bcast_weight_ne[bcast_dim_cnt - 1];
bcast_input_nb[bcast_dim_cnt] = bcast_input_nb[bcast_dim_cnt - 1] * bcast_input_ne[bcast_dim_cnt - 1];
bcast_dst_nb[bcast_dim_cnt] = bcast_dst_nb[bcast_dim_cnt - 1] * bcast_dst_ne[bcast_dim_cnt - 1];
bcast_weight_nb[bcast_dim_cnt] = bcast_weight_nb[bcast_dim_cnt - 1] * bcast_weight_ne[bcast_dim_cnt - 1];
bcast_dim_cnt++;
}
}

55
ggml/src/ggml-cann/acl_tensor.h Executable file → Normal file
View File

@@ -62,8 +62,10 @@ aclDataType ggml_cann_type_mapping(ggml_type type);
* @param offset Offset in bytes for the ACL tensor data. Defaults to 0.
* @return Pointer to the created ACL tensor.
*/
aclTensor* ggml_cann_create_tensor(const ggml_tensor* tensor, int64_t* ne = nullptr,
size_t* nb = nullptr, int64_t dims = 0,
aclTensor * ggml_cann_create_tensor(const ggml_tensor * tensor,
int64_t * ne = nullptr,
size_t * nb = nullptr,
int64_t dims = 0,
aclFormat format = ACL_FORMAT_ND,
size_t offset = 0);
@@ -88,8 +90,11 @@ aclTensor* ggml_cann_create_tensor(const ggml_tensor* tensor, int64_t* ne = null
* @return Pointer to the created ACL tensor.
*/
template <typename TYPE>
aclTensor* ggml_cann_create_tensor(void* data_ptr, aclDataType dtype,
TYPE type_size, int64_t* ne, TYPE* nb,
aclTensor * ggml_cann_create_tensor(void * data_ptr,
aclDataType dtype,
TYPE type_size,
int64_t * ne,
TYPE * nb,
int64_t dims,
aclFormat format = ACL_FORMAT_ND,
size_t offset = 0) {
@@ -110,8 +115,7 @@ aclTensor* ggml_cann_create_tensor(void* data_ptr, aclDataType dtype,
std::reverse(tmp_stride, tmp_stride + dims);
aclTensor * acl_tensor =
aclCreateTensor(tmp_ne, dims, dtype, tmp_stride, offset / type_size,
format, &acl_storage_len, 1, data_ptr);
aclCreateTensor(tmp_ne, dims, dtype, tmp_stride, offset / type_size, format, &acl_storage_len, 1, data_ptr);
return acl_tensor;
}
@@ -187,9 +191,12 @@ bool ggml_cann_need_bcast(const ggml_tensor* t0, const ggml_tensor* t1);
* dim1 in a inserted dim, should add nb for dim1,
* and all other nb moves to next in order.
*/
int64_t ggml_cann_get_bcast_shape(const ggml_tensor* src0, const ggml_tensor* src1,
int64_t* bcast_ne_src0, int64_t* bcast_ne_src1,
size_t* bcast_nb_src0, size_t* bcast_nb_src1);
int64_t ggml_cann_get_bcast_shape(const ggml_tensor * src0,
const ggml_tensor * src1,
int64_t * bcast_ne_src0,
int64_t * bcast_ne_src1,
size_t * bcast_nb_src0,
size_t * bcast_nb_src1);
// Bcast macro to avoid duplicate code.
#define BCAST_SHAPE(src0, src1) \
@@ -197,9 +204,8 @@ int64_t ggml_cann_get_bcast_shape(const ggml_tensor* src0, const ggml_tensor* sr
int64_t bcast_##src1##_ne[GGML_MAX_DIMS * 2]; \
size_t bcast_##src0##_nb[GGML_MAX_DIMS * 2]; \
size_t bcast_##src1##_nb[GGML_MAX_DIMS * 2]; \
int64_t bcast_dims = ggml_cann_get_bcast_shape( \
src0, src1, bcast_##src0##_ne, bcast_##src1##_ne, bcast_##src0##_nb, \
bcast_##src1##_nb);
int64_t bcast_dims = ggml_cann_get_bcast_shape(src0, src1, bcast_##src0##_ne, bcast_##src1##_ne, \
bcast_##src0##_nb, bcast_##src1##_nb);
#define BCAST_PARAM(tensor) bcast_##tensor##_ne, bcast_##tensor##_nb, bcast_dims
@@ -233,11 +239,18 @@ int64_t ggml_cann_get_bcast_shape(const ggml_tensor* src0, const ggml_tensor* sr
* before cast dim.
* @sa ggml_cann_get_bcast_shape
*/
int64_t ggml_cann_get_mulmat_bcast_shape(
const int64_t* input_ne, const int64_t* weight_ne, const int64_t* dst_ne,
const size_t* input_nb, const size_t* weight_nb, const size_t* dst_nb,
int64_t* bcast_input_ne, int64_t* bcast_weight_ne, int64_t* bcast_dst_ne,
size_t* bcast_input_nb, size_t* bcast_weight_nb, size_t* bcast_dst_nb);
int64_t ggml_cann_get_mulmat_bcast_shape(const int64_t * input_ne,
const int64_t * weight_ne,
const int64_t * dst_ne,
const size_t * input_nb,
const size_t * weight_nb,
const size_t * dst_nb,
int64_t * bcast_input_ne,
int64_t * bcast_weight_ne,
int64_t * bcast_dst_ne,
size_t * bcast_input_nb,
size_t * bcast_weight_nb,
size_t * bcast_dst_nb);
// Bcast macro to avoid duplicate code.
#define BCAST_MUL_MAT_SHAPE(input, weight, dst) \
@@ -248,11 +261,9 @@ int64_t ggml_cann_get_mulmat_bcast_shape(
size_t bcast_##weight##_nb[GGML_MAX_DIMS * 2]; \
size_t bcast_##dst##_nb[GGML_MAX_DIMS * 2]; \
int64_t bcast_dims = ggml_cann_get_mulmat_bcast_shape( \
input->ne, weight->ne, dst->ne, input->nb, weight->nb, dst->nb, \
bcast_##input##_ne, bcast_##weight##_ne, bcast_##dst##_ne, \
bcast_##input##_nb, bcast_##weight##_nb, bcast_##dst##_nb);
input->ne, weight->ne, dst->ne, input->nb, weight->nb, dst->nb, bcast_##input##_ne, bcast_##weight##_ne, \
bcast_##dst##_ne, bcast_##input##_nb, bcast_##weight##_nb, bcast_##dst##_nb);
#define BCAST_MUL_MAT_PARAM(tensor) \
bcast_##tensor##_ne, bcast_##tensor##_nb, bcast_dims
#define BCAST_MUL_MAT_PARAM(tensor) bcast_##tensor##_ne, bcast_##tensor##_nb, bcast_dims
#endif // CANN_ACL_TENSOR_H

1134
ggml/src/ggml-cann/aclnn_ops.cpp Executable file → Normal file

File diff suppressed because it is too large Load Diff

195
ggml/src/ggml-cann/aclnn_ops.h Executable file → Normal file
View File

@@ -274,8 +274,7 @@ void ggml_cann_sum(ggml_backend_cann_context& ctx, ggml_tensor* dst);
* @param dst The destination tensor where the upsampled values will be stored.
* dst->op is `GGML_OP_UPSCALE`.
*/
void ggml_cann_upsample_nearest2d(ggml_backend_cann_context& ctx,
ggml_tensor* dst);
void ggml_cann_upsample_nearest2d(ggml_backend_cann_context & ctx, ggml_tensor * dst);
/**
* @brief Pads a ggml tensor to match the dimensions of the destination tensor
@@ -509,8 +508,10 @@ void ggml_cann_argmax(ggml_backend_cann_context& ctx, ggml_tensor* dst);
* @param acl_src1 The second source tensor.
* @param acl_dst The destination tensor where the result will be stored.
*/
void aclnn_add(ggml_backend_cann_context& ctx, aclTensor* acl_src0,
aclTensor* acl_src1, aclTensor* acl_dst = nullptr);
void aclnn_add(ggml_backend_cann_context & ctx,
aclTensor * acl_src0,
aclTensor * acl_src1,
aclTensor * acl_dst = nullptr);
/**
* @brief Sub two tensors element-wise and stores the result in a destination
@@ -527,8 +528,10 @@ void aclnn_add(ggml_backend_cann_context& ctx, aclTensor* acl_src0,
* @param acl_src1 The second source tensor.
* @param acl_dst The destination tensor where the result will be stored.
*/
void aclnn_sub(ggml_backend_cann_context& ctx, aclTensor* acl_src0,
aclTensor* acl_src1, aclTensor* acl_dst = nullptr);
void aclnn_sub(ggml_backend_cann_context & ctx,
aclTensor * acl_src0,
aclTensor * acl_src1,
aclTensor * acl_dst = nullptr);
/**
* @brief Performs element-wise multiplication of two tensors and stores the
@@ -546,8 +549,10 @@ void aclnn_sub(ggml_backend_cann_context& ctx, aclTensor* acl_src0,
* @param acl_other The second tensor for element-wise multiplication.
* @param acl_dst The destination tensor where the result will be stored.
*/
void aclnn_mul(ggml_backend_cann_context& ctx, aclTensor* acl_src,
aclTensor* acl_other, aclTensor* acl_dst = nullptr);
void aclnn_mul(ggml_backend_cann_context & ctx,
aclTensor * acl_src,
aclTensor * acl_other,
aclTensor * acl_dst = nullptr);
/**
* @brief Matrix division, optionally in-place.
@@ -567,8 +572,10 @@ void aclnn_mul(ggml_backend_cann_context& ctx, aclTensor* acl_src,
* @param inplace Flag indicating whether to perform the operation in-place on
* `acl_src`.
*/
void aclnn_div(ggml_backend_cann_context& ctx, aclTensor* acl_src,
aclTensor* acl_other, aclTensor* acl_dst = nullptr);
void aclnn_div(ggml_backend_cann_context & ctx,
aclTensor * acl_src,
aclTensor * acl_other,
aclTensor * acl_dst = nullptr);
/**
* @brief Applies element-wise cosine function to the elements of a tensor.
@@ -584,8 +591,7 @@ void aclnn_div(ggml_backend_cann_context& ctx, aclTensor* acl_src,
* @param acl_dst The destination tensor where the cosine results will be
* stored.
*/
void aclnn_cos(ggml_backend_cann_context& ctx, aclTensor* acl_src,
aclTensor* acl_dst);
void aclnn_cos(ggml_backend_cann_context & ctx, aclTensor * acl_src, aclTensor * acl_dst);
/**
* @brief Applies element-wise sine function to the elements of a tensor.
@@ -602,8 +608,7 @@ void aclnn_cos(ggml_backend_cann_context& ctx, aclTensor* acl_src,
* @param acl_src The source tensor on which the sine function will be applied.
* @param acl_dst The destination tensor where the sine results will be stored.
*/
void aclnn_sin(ggml_backend_cann_context& ctx, aclTensor* acl_src,
aclTensor* acl_dst);
void aclnn_sin(ggml_backend_cann_context & ctx, aclTensor * acl_src, aclTensor * acl_dst);
/**
* @brief Prepares broadcast-compatible ACL tensors for two input tensors and one
@@ -621,8 +626,12 @@ void aclnn_sin(ggml_backend_cann_context& ctx, aclTensor* acl_src,
* @param acl_src1 Output pointer to the created ACL tensor corresponding to src1.
* @param acl_dst Output pointer to the created ACL tensor corresponding to dst.
*/
void bcast_shape(ggml_tensor * src0, ggml_tensor * src1, ggml_tensor * dst,
aclTensor ** acl_src0, aclTensor ** acl_src1, aclTensor ** acl_dst);
void bcast_shape(ggml_tensor * src0,
ggml_tensor * src1,
ggml_tensor * dst,
aclTensor ** acl_src0,
aclTensor ** acl_src1,
aclTensor ** acl_dst);
/**
* @brief Computes the 1D transposed convolution (deconvolution) of a ggml
@@ -750,47 +759,34 @@ using any_acl_resource = std::unique_ptr<void, std::function<void(void*)>>;
*
* @tparam T ACL resource type.
*/
template<typename T>
struct acl_resource_traits;
template <typename T> struct acl_resource_traits;
/**
* @brief Specialization for aclTensor, defines how to destroy an aclTensor resource.
*/
template<>
struct acl_resource_traits<aclTensor> {
static void destroy(void* p) {
ACL_CHECK(aclDestroyTensor(static_cast<aclTensor*>(p)));
}
template <> struct acl_resource_traits<aclTensor> {
static void destroy(void * p) { ACL_CHECK(aclDestroyTensor(static_cast<aclTensor *>(p))); }
};
/**
* @brief Specialization for aclIntArray, defines how to destroy an aclIntArray resource.
*/
template<>
struct acl_resource_traits<aclIntArray> {
static void destroy(void* p) {
ACL_CHECK(aclDestroyIntArray(static_cast<aclIntArray*>(p)));
}
template <> struct acl_resource_traits<aclIntArray> {
static void destroy(void * p) { ACL_CHECK(aclDestroyIntArray(static_cast<aclIntArray *>(p))); }
};
/**
* @brief Specialization for aclScalar, defines how to destroy an aclScalar resource.
*/
template<>
struct acl_resource_traits<aclScalar> {
static void destroy(void* p) {
ACL_CHECK(aclDestroyScalar(static_cast<aclScalar*>(p)));
}
template <> struct acl_resource_traits<aclScalar> {
static void destroy(void * p) { ACL_CHECK(aclDestroyScalar(static_cast<aclScalar *>(p))); }
};
/**
* @brief Specialization for aclTensorList, defines how to destroy an aclTensorList resource.
*/
template<>
struct acl_resource_traits<aclTensorList> {
static void destroy(void* p) {
ACL_CHECK(aclDestroyTensorList(static_cast<aclTensorList*>(p)));
}
template <> struct acl_resource_traits<aclTensorList> {
static void destroy(void * p) { ACL_CHECK(aclDestroyTensorList(static_cast<aclTensorList *>(p))); }
};
/**
@@ -800,14 +796,8 @@ struct acl_resource_traits<aclTensorList> {
* @param ptr Raw pointer to ACL resource.
* @return any_acl_resource Smart pointer that handles destruction.
*/
template<typename T>
any_acl_resource make_acl_resource(T* ptr) {
return any_acl_resource(
static_cast<void*>(ptr),
[](void* p) {
acl_resource_traits<T>::destroy(p);
}
);
template <typename T> any_acl_resource make_acl_resource(T * ptr) {
return any_acl_resource(static_cast<void *>(ptr), [](void * p) { acl_resource_traits<T>::destroy(p); });
}
/**
@@ -817,8 +807,7 @@ any_acl_resource make_acl_resource(T* ptr) {
* @param vec Target vector to hold ACL resources.
* @param args Raw pointers to ACL resources.
*/
template<typename... Args>
void register_acl_resources(std::vector<any_acl_resource>& vec, Args*... args) {
template <typename... Args> void register_acl_resources(std::vector<any_acl_resource> & vec, Args *... args) {
(vec.emplace_back(make_acl_resource(args)), ...);
}
@@ -827,17 +816,18 @@ void register_acl_resources(std::vector<any_acl_resource>& vec, Args*... args) {
*/
class aclnn_task : public cann_task {
public:
aclnn_task(aclnn_func_t aclnn_func, void * workspace_addr,
uint64_t workspace_size, aclOpExecutor * executor,
aclnn_task(aclnn_func_t aclnn_func,
void * workspace_addr,
uint64_t workspace_size,
aclOpExecutor * executor,
aclrtStream stream) :
aclnn_func_(aclnn_func),
workspace_addr_(workspace_addr),
workspace_size_(workspace_size),
executor_(executor),
stream_(stream) {}
virtual void run_task() override {
ACL_CHECK(aclnn_func_(workspace_addr_, workspace_size_, executor_, stream_));
}
virtual void run_task() override { ACL_CHECK(aclnn_func_(workspace_addr_, workspace_size_, executor_, stream_)); }
private:
aclnn_func_t aclnn_func_;
void * workspace_addr_;
@@ -851,13 +841,9 @@ class aclnn_task : public cann_task {
*/
class release_resource_task : public cann_task {
public:
release_resource_task(std::vector<any_acl_resource>&& resources){
resource_ = std::move(resources);
}
release_resource_task(std::vector<any_acl_resource> && resources) { resource_ = std::move(resources); }
virtual void run_task() override {
resource_.clear();
}
virtual void run_task() override { resource_.clear(); }
private:
std::vector<any_acl_resource> resource_;
};
@@ -867,13 +853,14 @@ private:
*/
class async_memcpy_task : public cann_task {
public:
async_memcpy_task(void* dst, const void* src, size_t size,
aclrtMemcpyKind kind, aclrtStream stream)
: dst_(dst), src_(src), size_(size), kind_(kind), stream_(stream) {}
async_memcpy_task(void * dst, const void * src, size_t size, aclrtMemcpyKind kind, aclrtStream stream) :
dst_(dst),
src_(src),
size_(size),
kind_(kind),
stream_(stream) {}
virtual void run_task() override {
ACL_CHECK(aclrtMemcpyAsync(dst_, size_, src_, size_, kind_, stream_));
}
virtual void run_task() override { ACL_CHECK(aclrtMemcpyAsync(dst_, size_, src_, size_, kind_, stream_)); }
private:
void * dst_;
const void * src_;
@@ -887,12 +874,13 @@ private:
*/
class async_memset_task : public cann_task {
public:
async_memset_task(void* buffer, size_t size, int32_t value, aclrtStream stream)
: buffer_(buffer), size_(size), value_(value), stream_(stream) {}
async_memset_task(void * buffer, size_t size, int32_t value, aclrtStream stream) :
buffer_(buffer),
size_(size),
value_(value),
stream_(stream) {}
virtual void run_task() override {
ACL_CHECK(aclrtMemsetAsync(buffer_, size_, value_, size_, stream_));
}
virtual void run_task() override { ACL_CHECK(aclrtMemsetAsync(buffer_, size_, value_, size_, stream_)); }
private:
void * buffer_;
size_t size_;
@@ -931,8 +919,7 @@ class async_memset_task : public cann_task {
} \
if (CTX.async_mode) { \
auto task = \
std::make_unique<aclnn_task>(aclnn##OP_NAME, workspaceAddr, workspaceSize, \
executor, CTX.stream()); \
std::make_unique<aclnn_task>(aclnn##OP_NAME, workspaceAddr, workspaceSize, executor, CTX.stream()); \
CTX.task_queue.submit_task(std::move(task)); \
} else { \
ACL_CHECK(aclnn##OP_NAME(workspaceAddr, workspaceSize, executor, CTX.stream())); \
@@ -947,8 +934,7 @@ class async_memset_task : public cann_task {
* @param ctx Backend context which manages task submission and async mode.
* @param args Pointers to ACL resources to be released.
*/
template <typename... Args>
void ggml_cann_release_resources(ggml_backend_cann_context & ctx, Args &&... args) {
template <typename... Args> void ggml_cann_release_resources(ggml_backend_cann_context & ctx, Args &&... args) {
std::vector<any_acl_resource> resources;
register_acl_resources(resources, std::forward<Args>(args)...);
if (ctx.async_mode) {
@@ -966,8 +952,11 @@ void ggml_cann_release_resources(ggml_backend_cann_context & ctx, Args &&... arg
* @param len Size of memory to copy (in bytes).
* @param kind Type of memory copy (host-to-device, device-to-host, etc).
*/
inline void ggml_cann_async_memcpy(ggml_backend_cann_context & ctx, void * dst,
const void * src, size_t len, aclrtMemcpyKind kind) {
inline void ggml_cann_async_memcpy(ggml_backend_cann_context & ctx,
void * dst,
const void * src,
size_t len,
aclrtMemcpyKind kind) {
if (ctx.async_mode) {
auto task = std::make_unique<async_memcpy_task>(dst, const_cast<void *>(src), len, kind, ctx.stream());
ctx.task_queue.submit_task(std::move(task));
@@ -976,8 +965,11 @@ inline void ggml_cann_async_memcpy(ggml_backend_cann_context & ctx, void * dst,
}
}
inline void ggml_cann_async_memcpy(ggml_backend_cann_context * ctx, void * dst,
const void * src, size_t len, aclrtMemcpyKind kind) {
inline void ggml_cann_async_memcpy(ggml_backend_cann_context * ctx,
void * dst,
const void * src,
size_t len,
aclrtMemcpyKind kind) {
if (ctx->async_mode) {
auto task = std::make_unique<async_memcpy_task>(dst, const_cast<void *>(src), len, kind, ctx->stream());
ctx->task_queue.submit_task(std::move(task));
@@ -994,8 +986,7 @@ inline void ggml_cann_async_memcpy(ggml_backend_cann_context * ctx, void * dst,
* @param size Size of the memory buffer (in bytes).
* @param value Value to set in the buffer.
*/
inline void ggml_cann_async_memset(ggml_backend_cann_context & ctx, void * buffer,
size_t size, int value) {
inline void ggml_cann_async_memset(ggml_backend_cann_context & ctx, void * buffer, size_t size, int value) {
if (ctx.async_mode) {
auto task = std::make_unique<async_memset_task>(buffer, size, value, ctx.stream());
ctx.task_queue.submit_task(std::move(task));
@@ -1043,16 +1034,10 @@ void ggml_cann_mul_mat_id(ggml_backend_cann_context& ctx, ggml_tensor* dst);
*/
static bool is_matmul_weight(const ggml_tensor * tensor) {
std::string name = ggml_get_name(tensor);
static const std::unordered_set<std::string> weight_suffixes{
"output.weight",
"attn_q.weight",
"attn_k.weight",
"attn_v.weight",
"attn_output.weight",
"ffn_gate.weight",
"ffn_up.weight",
"ffn_down.weight"
};
static const std::unordered_set<std::string> weight_suffixes{ "output.weight", "attn_q.weight",
"attn_k.weight", "attn_v.weight",
"attn_output.weight", "ffn_gate.weight",
"ffn_up.weight", "ffn_down.weight" };
for (const auto & suffix : weight_suffixes) {
if (name.find(suffix) != std::string::npos) {
@@ -1078,8 +1063,7 @@ static bool is_matmul_weight(const ggml_tensor* tensor) {
* @param ctx The CANN backend context used to manage execution and resources.
* @param dst The destination tensor.
*/
template <auto binary_op>
void ggml_cann_binary_op(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
template <auto binary_op> void ggml_cann_binary_op(ggml_backend_cann_context & ctx, ggml_tensor * dst) {
ggml_tensor * src0 = dst->src[0];
ggml_tensor * src1 = dst->src[1];
@@ -1094,7 +1078,6 @@ void ggml_cann_binary_op(ggml_backend_cann_context& ctx, ggml_tensor* dst) {
ggml_cann_release_resources(ctx, acl_src0, acl_src1, acl_dst);
}
/**
* @brief Applies a unary operation to an input tensor using the CANN backend.
*
@@ -1138,9 +1121,9 @@ template <void unary_op(ggml_backend_cann_context&, aclTensor*, aclTensor*)>
*
* @see GGML_CANN_CALL_OP_UNARY
*/
void ggml_cann_op_unary(
std::function<void(ggml_backend_cann_context&, aclTensor*, aclTensor*)> unary_op,
ggml_backend_cann_context& ctx, ggml_tensor* dst);
void ggml_cann_op_unary(std::function<void(ggml_backend_cann_context &, aclTensor *, aclTensor *)> unary_op,
ggml_backend_cann_context & ctx,
ggml_tensor * dst);
/**
* @brief Applies a gated (GLU-style) unary operation using the CANN backend.
@@ -1172,9 +1155,9 @@ void ggml_cann_op_unary(
*
* @see GGML_CANN_CALL_OP_UNARY_GATED
*/
void ggml_cann_op_unary_gated(
std::function<void(ggml_backend_cann_context&, aclTensor*, aclTensor*)> unary_op,
ggml_backend_cann_context& ctx, ggml_tensor* dst);
void ggml_cann_op_unary_gated(std::function<void(ggml_backend_cann_context &, aclTensor *, aclTensor *)> unary_op,
ggml_backend_cann_context & ctx,
ggml_tensor * dst);
/**
* @brief Helper macro to call a unary ACL operator via ggml_cann_op_unary.
@@ -1199,14 +1182,11 @@ void ggml_cann_op_unary_gated(
*/
#define GGML_CANN_CALL_OP_UNARY(OP_NAME) \
do { \
auto lambda = [](ggml_backend_cann_context& ctx, \
aclTensor* acl_src, \
aclTensor* acl_dst) { \
auto lambda = [](ggml_backend_cann_context & ctx, aclTensor * acl_src, aclTensor * acl_dst) { \
GGML_CANN_CALL_ACLNN_OP(ctx, OP_NAME, acl_src, acl_dst); \
}; \
ggml_cann_op_unary(lambda, ctx, dst); \
} \
while (0)
} while (0)
/**
* @brief Helper macro to call a gated unary ACL operator via ggml_cann_op_unary_gated.
@@ -1231,13 +1211,10 @@ void ggml_cann_op_unary_gated(
*/
#define GGML_CANN_CALL_OP_UNARY_GATED(OP_NAME) \
do { \
auto lambda = [](ggml_backend_cann_context& ctx, \
aclTensor* acl_src, \
aclTensor* acl_dst) { \
auto lambda = [](ggml_backend_cann_context & ctx, aclTensor * acl_src, aclTensor * acl_dst) { \
GGML_CANN_CALL_ACLNN_OP(ctx, OP_NAME, acl_src, acl_dst); \
}; \
ggml_cann_op_unary_gated(lambda, ctx, dst); \
} \
while (0)
} while (0)
#endif // CANN_ACLNN_OPS

45
ggml/src/ggml-cann/common.h Executable file → Normal file
View File

@@ -56,8 +56,7 @@
* @param line The line number at which the error occurred.
* @param msg The error message.
*/
[[noreturn]] void ggml_cann_error(const char* stmt, const char* func,
const char* file, int line, const char* msg);
[[noreturn]] void ggml_cann_error(const char * stmt, const char * func, const char * file, int line, const char * msg);
/**
* @brief Checks the result of a CANN function call and invokes the error
@@ -96,8 +95,7 @@ struct ggml_cann_device_info {
size_t total_vram; /**< Total video RAM available on the device. */
};
cann_device_info devices[GGML_CANN_MAX_DEVICES] =
{}; /**< Array of CANN device information. */
cann_device_info devices[GGML_CANN_MAX_DEVICES] = {}; /**< Array of CANN device information. */
};
const ggml_cann_device_info & ggml_cann_info();
@@ -163,9 +161,7 @@ struct ggml_cann_pool_alloc {
* @param pool Reference to the memory pool.
* @param size Size of the memory block to allocate.
*/
ggml_cann_pool_alloc(ggml_cann_pool& pool, size_t size) : pool(&pool) {
alloc(size);
}
ggml_cann_pool_alloc(ggml_cann_pool & pool, size_t size) : pool(&pool) { alloc(size); }
/**
* @brief Destructor that frees the allocated memory block.
@@ -244,9 +240,13 @@ public:
* @param capacity Queue capacity. Must be a power of 2.
* @param device Target device ID (used for context setting).
*/
explicit cann_task_queue(size_t capacity, int32_t device)
: buffer_(capacity), capacity_(capacity), head_(0), tail_(0),
running_(false), device_(device) {
explicit cann_task_queue(size_t capacity, int32_t device) :
buffer_(capacity),
capacity_(capacity),
head_(0),
tail_(0),
running_(false),
device_(device) {
GGML_ASSERT((capacity & (capacity - 1)) == 0 && "capacity must be power of 2");
mask_ = capacity_ - 1;
}
@@ -286,7 +286,6 @@ public:
running_ = true;
thread_ = std::thread(&cann_task_queue::execute, this);
}
}
/**
@@ -380,9 +379,7 @@ struct ggml_cann_graph_lru_cache {
std::list<ggml_cann_graph *> cache_list; /**< List storing cached graphs as raw pointers. */
ggml_cann_graph_lru_cache() {
capacity = parse_integer(get_env("GGML_CANN_GRAPH_CACHE_CAPACITY").value_or("12"));
}
ggml_cann_graph_lru_cache() { capacity = parse_integer(get_env("GGML_CANN_GRAPH_CACHE_CAPACITY").value_or("12")); }
/**
* @brief Push a new graph to the front of the cache.
@@ -421,9 +418,7 @@ struct ggml_cann_graph_lru_cache {
/**
* @brief Destructor that clears the cache and frees all cached graphs.
*/
~ggml_cann_graph_lru_cache() {
clear();
}
~ggml_cann_graph_lru_cache() { clear(); }
};
#endif // USE_ACL_GRAPH
@@ -493,19 +488,18 @@ struct ggml_backend_cann_context {
* @brief Constructor for initializing the context with a given device.
* @param device Device ID.
*/
explicit ggml_backend_cann_context(int device)
: device(device), name("CANN" + std::to_string(device)), task_queue(1024, device) {
explicit ggml_backend_cann_context(int device) :
device(device),
name("CANN" + std::to_string(device)),
task_queue(1024, device) {
ggml_cann_set_device(device);
description = aclrtGetSocName();
async_mode = parse_bool(get_env("GGML_CANN_ASYNC_MODE").value_or(""));
GGML_LOG_INFO("%s: device %d async operator submission is %s\n", __func__,
device, async_mode ? "ON" : "OFF");
GGML_LOG_INFO("%s: device %d async operator submission is %s\n", __func__, device, async_mode ? "ON" : "OFF");
#ifdef USE_ACL_GRAPH
acl_graph_mode = parse_bool(get_env("GGML_CANN_ACL_GRAPH").value_or("on"));
GGML_LOG_INFO("%s: device %d execution mode is %s (%s)\n",
__func__, device,
acl_graph_mode ? "GRAPH" : "EAGER",
GGML_LOG_INFO("%s: device %d execution mode is %s (%s)\n", __func__, device, acl_graph_mode ? "GRAPH" : "EAGER",
acl_graph_mode ? "acl graph enabled" : "acl graph disabled");
#endif
}
@@ -549,8 +543,7 @@ struct ggml_backend_cann_context {
aclrtStream stream() { return stream(0); }
// TODO: each stream should have a memory pool.
std::unique_ptr<ggml_cann_pool>
mem_pool; /**< Memory pool for the device. */
std::unique_ptr<ggml_cann_pool> mem_pool; /**< Memory pool for the device. */
/**
* @brief Create a new memory pool for a given device.

511
ggml/src/ggml-cann/ggml-cann.cpp Executable file → Normal file

File diff suppressed because it is too large Load Diff