mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	CANN: Refactor to reduce duplicate code (#12731)
* CANN: Refactor to reduce duplicate code * CANN: fix review comment
This commit is contained in:
		
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -31,20 +31,25 @@ | |||||||
|  * IN THE SOFTWARE. |  * IN THE SOFTWARE. | ||||||
|  */ |  */ | ||||||
|  |  | ||||||
| #include <aclnnop/aclnn_add.h> | #include <aclnnop/aclnn_abs.h> | ||||||
|  | #include <aclnnop/aclnn_neg.h> | ||||||
|  | #include <aclnnop/aclnn_exp.h> | ||||||
| #include <aclnnop/aclnn_arange.h> | #include <aclnnop/aclnn_arange.h> | ||||||
| #include <aclnnop/aclnn_argsort.h> | #include <aclnnop/aclnn_argsort.h> | ||||||
| #include <aclnnop/aclnn_cat.h> | #include <aclnnop/aclnn_cat.h> | ||||||
| #include <aclnnop/aclnn_clamp.h> | #include <aclnnop/aclnn_clamp.h> | ||||||
| #include <aclnnop/aclnn_div.h> |  | ||||||
| #include <aclnnop/aclnn_gelu.h> | #include <aclnnop/aclnn_gelu.h> | ||||||
|  | #include <aclnnop/aclnn_gelu_v2.h> | ||||||
|  | #include <aclnnop/aclnn_sigmoid.h> | ||||||
| #include <aclnnop/aclnn_hardsigmoid.h> | #include <aclnnop/aclnn_hardsigmoid.h> | ||||||
| #include <aclnnop/aclnn_hardswish.h> | #include <aclnnop/aclnn_hardswish.h> | ||||||
| #include <aclnnop/aclnn_leaky_relu.h> | #include <aclnnop/aclnn_leaky_relu.h> | ||||||
| #include <aclnnop/aclnn_mul.h> |  | ||||||
| #include <aclnnop/aclnn_relu.h> | #include <aclnnop/aclnn_relu.h> | ||||||
| #include <aclnnop/aclnn_silu.h> | #include <aclnnop/aclnn_silu.h> | ||||||
| #include <aclnnop/aclnn_tanh.h> | #include <aclnnop/aclnn_tanh.h> | ||||||
|  | #include <aclnnop/aclnn_sqrt.h> | ||||||
|  | #include <aclnnop/aclnn_sin.h> | ||||||
|  | #include <aclnnop/aclnn_cos.h> | ||||||
| #include "acl_tensor.h" | #include "acl_tensor.h" | ||||||
| #include "common.h" | #include "common.h" | ||||||
|  |  | ||||||
| @@ -63,23 +68,6 @@ | |||||||
|  */ |  */ | ||||||
| void ggml_cann_repeat(ggml_backend_cann_context& ctx, ggml_tensor* dst); | void ggml_cann_repeat(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * @brief   Adds two ggml tensors using the CANN backend. |  | ||||||
|  * |  | ||||||
|  * @details This function performs an element-wise addition of two tensors. In |  | ||||||
|  *          case the tensors do not have the same shape, one or both tensors |  | ||||||
|  *          will be broadcasted to match the shape of the other before the |  | ||||||
|  *          addition is performed.The formula for the operation is given by: |  | ||||||
|  *          \f[ |  | ||||||
|  *              \text{dst} = \text{acl_src0} + \alpha \cdot \text{acl_src1} |  | ||||||
|  *          \f] |  | ||||||
|  * |  | ||||||
|  * @param ctx The CANN context used for operations. |  | ||||||
|  * @param dst The ggml tensor representing the destination, result of the |  | ||||||
|  *            addition is stored at dst->data, and dst->op is `GGML_OP_ADD` |  | ||||||
|  */ |  | ||||||
| void ggml_cann_add(ggml_backend_cann_context& ctx, ggml_tensor* dst); |  | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  * @brief   Applies the Leaky ReLU activation function to a tensor using the CANN |  * @brief   Applies the Leaky ReLU activation function to a tensor using the CANN | ||||||
|  *          backend. |  *          backend. | ||||||
| @@ -131,19 +119,6 @@ void ggml_cann_concat(ggml_backend_cann_context& ctx, ggml_tensor* dst); | |||||||
|  */ |  */ | ||||||
| void ggml_cann_arange(ggml_backend_cann_context& ctx, ggml_tensor* dst); | void ggml_cann_arange(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||||
|  |  | ||||||
| /** |  | ||||||
|  * @brief   Computes the square of the elements of a ggml tensor using the CANN |  | ||||||
|  *          backend. |  | ||||||
|  * @details The function sets the second source tensor of the destination |  | ||||||
|  *          tensor `dst` to be equal to the first source tensor. This is |  | ||||||
|  *          effectively squaring the elements since the multiplication becomes |  | ||||||
|  *          `element * element`. |  | ||||||
|  * @param ctx The CANN context used for operations. |  | ||||||
|  * @param dst The destination tensor where the squared values will be stored, |  | ||||||
|  *            which dst->op is `GGML_OP_SQR`. |  | ||||||
|  */ |  | ||||||
| void ggml_cann_sqr(ggml_backend_cann_context& ctx, ggml_tensor* dst); |  | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  * @brief   Applies a clamp operation to the elements of a ggml tensor using the |  * @brief   Applies a clamp operation to the elements of a ggml tensor using the | ||||||
|  *          CANN backend. |  *          CANN backend. | ||||||
| @@ -275,6 +250,20 @@ void ggml_cann_acc(ggml_backend_cann_context& ctx, ggml_tensor* dst); | |||||||
|  */ |  */ | ||||||
| void ggml_cann_sum_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst); | void ggml_cann_sum_rows(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||||
|  |  | ||||||
|  | /** | ||||||
|  |  * @brief   Computes the sum of elements in a ggml tensor. | ||||||
|  |  * | ||||||
|  |  * @details This function performs a reduction sum operation along the last | ||||||
|  |  *          dimension of the input tensor `src`. The result of the sum is stored | ||||||
|  |  *          in the destination tensor `dst`. | ||||||
|  |  * | ||||||
|  |  * @param ctx The CANN context used for operations. | ||||||
|  |  * @param dst The destination tensor where the reduced values will be stored。 | ||||||
|  |  * | ||||||
|  |  */ | ||||||
|  |  | ||||||
|  | void ggml_cann_sum(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  * @brief   Upsamples a ggml tensor using nearest neighbor interpolation using |  * @brief   Upsamples a ggml tensor using nearest neighbor interpolation using | ||||||
|  *          the CANN backend. |  *          the CANN backend. | ||||||
| @@ -500,128 +489,247 @@ void ggml_cann_rope(ggml_backend_cann_context& ctx, ggml_tensor* dst); | |||||||
| void ggml_cann_argmax(ggml_backend_cann_context& ctx, ggml_tensor* dst); | void ggml_cann_argmax(ggml_backend_cann_context& ctx, ggml_tensor* dst); | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  * @brief   Computes the cosine of each element in a ggml tensor using the CANN backend. |  * @brief Adds two tensors element-wise and stores the result in a destination | ||||||
|  |  * tensor. | ||||||
|  * |  * | ||||||
|  * @details This function applies the cosine function element-wise to the input tensor. |  * This function performs the operation: | ||||||
|  *          The computed cosine values are stored in the destination tensor `dst`. |  * \f[ | ||||||
|  *          The operation is optimized using the CANN backend for improved performance. |  *    dst = acl\_src0 + alpha \times acl\_src1 | ||||||
|  |  * \f] | ||||||
|  |  * where alpha is a scalar value and defaults to 1.0f. | ||||||
|  * |  * | ||||||
|  * @param ctx The CANN context used for operations. |  * @param ctx The context for the CANN backend operations. | ||||||
|  * @param dst The destination tensor where the cosine values will be stored. |  * @param acl_src0 The first source tensor. | ||||||
|  *            dst->op is `GGML_OP_COS`. |  * @param acl_src1 The second source tensor. | ||||||
|  |  * @param acl_dst The destination tensor where the result will be stored. | ||||||
|  */ |  */ | ||||||
| void ggml_cann_cos(ggml_backend_cann_context& ctx, ggml_tensor* dst); | void aclnn_add(ggml_backend_cann_context& ctx, aclTensor* acl_src0, | ||||||
|  |     aclTensor* acl_src1, aclTensor* acl_dst = nullptr); | ||||||
|  |  | ||||||
| /** | /** | ||||||
|  * @brief   Computes the sine of each element in a ggml tensor using the CANN backend. |  * @brief Sub two tensors element-wise and stores the result in a destination | ||||||
|  |  * tensor. | ||||||
|  * |  * | ||||||
|  * @details This function applies the sine function element-wise to the input tensor. |  * This function performs the operation: | ||||||
|  *          The computed sine values are stored in the destination tensor `dst`. |  * \f[ | ||||||
|  *          The operation is optimized using the CANN backend for improved performance. |  *    dst = acl\_src0 - alpha \times acl\_src1 | ||||||
|  |  * \f] | ||||||
|  |  * where alpha is a scalar value and defaults to 1.0f. | ||||||
|  * |  * | ||||||
|  * @param ctx The CANN context used for operations. |  * @param ctx The context for the CANN backend operations. | ||||||
|  * @param dst The destination tensor where the sine values will be stored. |  * @param acl_src0 The first source tensor. | ||||||
|  *            dst->op is `GGML_OP_SIN`. |  * @param acl_src1 The second source tensor. | ||||||
|  |  * @param acl_dst The destination tensor where the result will be stored. | ||||||
|  */ |  */ | ||||||
| void ggml_cann_sin(ggml_backend_cann_context& ctx, ggml_tensor* dst); | void aclnn_sub(ggml_backend_cann_context& ctx, aclTensor* acl_src0, | ||||||
|  |     aclTensor* acl_src1, aclTensor* acl_dst = nullptr); | ||||||
|  |  | ||||||
| template <aclnnStatus getWorkspaceSize(const aclTensor*, const aclTensor*, | /** | ||||||
|                                        aclTensor*, uint64_t*, aclOpExecutor**), |  * @brief Performs element-wise multiplication of two tensors and stores the | ||||||
|           aclnnStatus execute(void*, uint64_t, aclOpExecutor*, aclrtStream)> |  * result in a destination tensor. | ||||||
| void ggml_cann_mul_div(ggml_backend_cann_context& ctx, ggml_tensor* dst) { |  * | ||||||
|  |  * This function performs element-wise multiplication of the tensors `acl_src` | ||||||
|  |  * and `acl_other` and stores the result in the destination tensor `acl_dst`. | ||||||
|  |  * The operation is defined as: | ||||||
|  |  * \f[ | ||||||
|  |  *     \text {acl_dst }_i=\text {acl_src }_i \times \text {acl_other }_i | ||||||
|  |  * \f] | ||||||
|  |  * | ||||||
|  |  * @param ctx The context for the CANN backend operations. | ||||||
|  |  * @param acl_src The first tensor for element-wise multiplication. | ||||||
|  |  * @param acl_other The second tensor for element-wise multiplication. | ||||||
|  |  * @param acl_dst The destination tensor where the result will be stored. | ||||||
|  |  */ | ||||||
|  | void aclnn_mul(ggml_backend_cann_context& ctx, aclTensor* acl_src, | ||||||
|  |     aclTensor* acl_other, aclTensor* acl_dst = nullptr); | ||||||
|  |  | ||||||
|  | /** | ||||||
|  |  * @brief Matrix division, optionally in-place. | ||||||
|  |  * | ||||||
|  |  * This function division each element of the source tensor `acl_src` by the | ||||||
|  |  * tensor `acl_other` and stores the result in the destination tensor `acl_dst`. | ||||||
|  |  * If `inplace` is true, `acl_dst` will not be used and the operation is | ||||||
|  |  * performed in-place on `acl_src`. The operation is defined as: \f[ | ||||||
|  |  *     \text{dst}_i = \frac{\text{acl_src}_i}{\text{acl_other}_i} | ||||||
|  |  * \f] | ||||||
|  |  * | ||||||
|  |  * @param ctx The context for the CANN backend operations. | ||||||
|  |  * @param acl_src Numerator tensor.. | ||||||
|  |  * @param acl_other Denominator tensor. | ||||||
|  |  * @param acl_dst The destination tensor where the result will be stored if | ||||||
|  |  * `inplace` is false. | ||||||
|  |  * @param inplace Flag indicating whether to perform the operation in-place on | ||||||
|  |  * `acl_src`. | ||||||
|  |  */ | ||||||
|  | void aclnn_div(ggml_backend_cann_context& ctx, aclTensor* acl_src, | ||||||
|  |     aclTensor* acl_other, aclTensor* acl_dst = nullptr); | ||||||
|  |  | ||||||
|  | /** | ||||||
|  |  * @brief Applies element-wise cosine function to the elements of a tensor. | ||||||
|  |  * | ||||||
|  |  * This function computes the cosine of each element in the source tensor | ||||||
|  |  * `acl_src` and stores the result in the destination tensor `acl_dst`. The | ||||||
|  |  * operation is defined as: \f[ \text {acl_dst }_i=\cos \left(\text {acl_src | ||||||
|  |  * }_i\right) \f] | ||||||
|  |  * | ||||||
|  |  * @param ctx The context for the CANN backend operations. | ||||||
|  |  * @param acl_src The source tensor on which the cosine function will be | ||||||
|  |  * applied. | ||||||
|  |  * @param acl_dst The destination tensor where the cosine results will be | ||||||
|  |  * stored. | ||||||
|  |  */ | ||||||
|  | void aclnn_cos(ggml_backend_cann_context& ctx, aclTensor* acl_src, | ||||||
|  |     aclTensor* acl_dst); | ||||||
|  |  | ||||||
|  | /** | ||||||
|  |  * @brief Applies element-wise sine function to the elements of a tensor. | ||||||
|  |  * | ||||||
|  |  * This function computes the sine of each element in the source tensor | ||||||
|  |  `acl_src` | ||||||
|  |  * and stores the result in the destination tensor `acl_dst`. | ||||||
|  |  * The operation is defined as: | ||||||
|  |  * \f[ | ||||||
|  |  *     \text {acl_dst }_i=\sin \left(\text {acl_src }_i\right) | ||||||
|  |  * \f] | ||||||
|  |  | ||||||
|  |  * @param ctx The context for the CANN backend operations. | ||||||
|  |  * @param acl_src The source tensor on which the sine function will be applied. | ||||||
|  |  * @param acl_dst The destination tensor where the sine results will be stored. | ||||||
|  |  */ | ||||||
|  | void aclnn_sin(ggml_backend_cann_context& ctx, aclTensor* acl_src, | ||||||
|  |     aclTensor* acl_dst); | ||||||
|  |  | ||||||
|  | /** | ||||||
|  |  * @brief Launches an asynchronous task using the memory allocator. | ||||||
|  |  * | ||||||
|  |  * This macro submit an asynchronous task on the specified stream. | ||||||
|  |  * The task uses memory allocated by the allocator. It is guaranteed | ||||||
|  |  * that the memory will not be accessed by other tasks until this task | ||||||
|  |  * completes, due to the sequential execution order within the same stream. | ||||||
|  |  * | ||||||
|  |  * @param OP_NAME aclnn operator name. | ||||||
|  |  * @param args Additional arguments required by the task. | ||||||
|  |  * | ||||||
|  |  * @note | ||||||
|  |  * Memory from the allocator will be "freed" immediately and can be | ||||||
|  |  * reallocated to other pointers. However, it won't be accessed by any | ||||||
|  |  * other task before this asynchronous task ends, because all tasks in the | ||||||
|  |  * same stream are executed in queue order. | ||||||
|  |  */ | ||||||
|  | #define GGML_CANN_CALL_ACLNN_OP(OP_NAME, ...)                                                \ | ||||||
|  |     do {                                                                                     \ | ||||||
|  |         uint64_t        workspaceSize = 0;                                                   \ | ||||||
|  |         aclOpExecutor * executor;                                                            \ | ||||||
|  |         void *          workspaceAddr = nullptr;                                             \ | ||||||
|  |                                                                                              \ | ||||||
|  |         ACL_CHECK(aclnn##OP_NAME##GetWorkspaceSize(__VA_ARGS__, &workspaceSize, &executor)); \ | ||||||
|  |                                                                                              \ | ||||||
|  |         if (workspaceSize > 0) {                                                             \ | ||||||
|  |             ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize);             \ | ||||||
|  |             workspaceAddr = workspace_allocator.get();                                       \ | ||||||
|  |         }                                                                                    \ | ||||||
|  |         ACL_CHECK(aclnn##OP_NAME(workspaceAddr, workspaceSize, executor, ctx.stream()));     \ | ||||||
|  |     } while (0) | ||||||
|  |  | ||||||
|  |  | ||||||
|  | /** | ||||||
|  |  * @brief Prepares broadcast-compatible ACL tensors for two input tensors and one output tensor. | ||||||
|  |  * | ||||||
|  |  * This function checks whether broadcasting is needed between `src0` and `src1`. | ||||||
|  |  * If broadcasting is required, it calculates the proper shapes and creates | ||||||
|  |  * ACL tensors with broadcast parameters. Otherwise, it directly creates ACL tensors | ||||||
|  |  * based on the original tensor shapes. | ||||||
|  |  * | ||||||
|  |  * @param src0     The first input tensor (reference shape). | ||||||
|  |  * @param src1     The second input tensor (possibly broadcasted). | ||||||
|  |  * @param dst      The destination/output tensor. | ||||||
|  |  * @param acl_src0 Output pointer to the created ACL tensor corresponding to src0. | ||||||
|  |  * @param acl_src1 Output pointer to the created ACL tensor corresponding to src1. | ||||||
|  |  * @param acl_dst  Output pointer to the created ACL tensor corresponding to dst. | ||||||
|  |  */ | ||||||
|  | void bcast_shape(ggml_tensor * src0, ggml_tensor * src1, ggml_tensor * dst, aclTensor ** acl_src0, | ||||||
|  |                         aclTensor ** acl_src1, aclTensor ** acl_dst); | ||||||
|  |  | ||||||
|  | /** | ||||||
|  |  * @brief Applies a element-wise operation to two input tensors using the CANN backend. | ||||||
|  |  * | ||||||
|  |  * This templated function takes a binary operator and applies it to two source tensors | ||||||
|  |  * associated with the destination tensor. The function handles broadcasting as needed. | ||||||
|  |  * | ||||||
|  |  * @tparam binary_op A callable object (e.g., lambda or function pointer) representing | ||||||
|  |  *         the binary operation to be performed. It must take three arguments: | ||||||
|  |  *         (ggml_backend_cann_context&, aclTensor*, aclTensor*, aclTensor*). | ||||||
|  |  * | ||||||
|  |  * @param ctx The CANN backend context used to manage execution and resources. | ||||||
|  |  * @param dst The destination tensor. | ||||||
|  |  */ | ||||||
|  | template <auto binary_op> | ||||||
|  | void ggml_cann_binary_op(ggml_backend_cann_context& ctx, ggml_tensor* dst) { | ||||||
|     ggml_tensor* src0 = dst->src[0]; |     ggml_tensor* src0 = dst->src[0]; | ||||||
|     ggml_tensor* src1 = dst->src[1]; |     ggml_tensor* src1 = dst->src[1]; | ||||||
|     GGML_ASSERT(ggml_can_repeat(src1, src0) && ggml_are_same_shape(src0, dst)); |  | ||||||
|  |  | ||||||
|     aclTensor* acl_src0; |     aclTensor* acl_src0; | ||||||
|     aclTensor* acl_src1; |     aclTensor* acl_src1; | ||||||
|     aclTensor* acl_dst; |     aclTensor* acl_dst; | ||||||
|  |  | ||||||
|     // Need bcast |     // Need bcast | ||||||
|     if (!ggml_are_same_shape(src0, src1) && ggml_cann_need_bcast(src0, src1)) { |     bcast_shape(src0, src1, dst, &acl_src0, &acl_src1, &acl_dst); | ||||||
|         BCAST_SHAPE(src0, src1) |     binary_op(ctx, acl_src0, acl_src1, acl_dst); | ||||||
|         acl_src0 = ggml_cann_create_tensor(src0, BCAST_PARAM(src0)); |  | ||||||
|         acl_src1 = ggml_cann_create_tensor(src1, BCAST_PARAM(src1)); |  | ||||||
|         acl_dst = ggml_cann_create_tensor(dst, BCAST_PARAM(src0)); |  | ||||||
|     } else { |  | ||||||
|         acl_src0 = ggml_cann_create_tensor(src0); |  | ||||||
|         acl_src1 = ggml_cann_create_tensor(src1); |  | ||||||
|         acl_dst = ggml_cann_create_tensor(dst); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     uint64_t workspaceSize = 0; |  | ||||||
|     aclOpExecutor* executor; |  | ||||||
|     void* workspaceAddr = nullptr; |  | ||||||
|  |  | ||||||
|     ACL_CHECK(getWorkspaceSize(acl_src0, acl_src1, acl_dst, &workspaceSize, |  | ||||||
|                                &executor)); |  | ||||||
|     if (workspaceSize > 0) { |  | ||||||
|         ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize); |  | ||||||
|         workspaceAddr = workspace_allocator.get(); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     aclrtStream main_stream = ctx.stream(); |  | ||||||
|     ACL_CHECK(execute(workspaceAddr, workspaceSize, executor, main_stream)); |  | ||||||
|  |  | ||||||
|     ACL_CHECK(aclDestroyTensor(acl_src0)); |     ACL_CHECK(aclDestroyTensor(acl_src0)); | ||||||
|     ACL_CHECK(aclDestroyTensor(acl_src1)); |     ACL_CHECK(aclDestroyTensor(acl_src1)); | ||||||
|     ACL_CHECK(aclDestroyTensor(acl_dst)); |     ACL_CHECK(aclDestroyTensor(acl_dst)); | ||||||
| } | } | ||||||
|  |  | ||||||
| // Activation functions template. | /** | ||||||
| template <aclnnStatus getWorkspaceSize(const aclTensor*, aclTensor*, uint64_t*, |  * @brief Applies a unary operation to an input tensor using the CANN backend. | ||||||
|                                        aclOpExecutor**), |  * | ||||||
|           aclnnStatus execute(void*, uint64_t, aclOpExecutor*, |  * This templated function applies a unary operator to the source tensor of `dst` | ||||||
|                               const aclrtStream)> |  * and stores the result in the destination tensor. | ||||||
| void ggml_cann_activation(ggml_backend_cann_context& ctx, ggml_tensor* dst) { |  * | ||||||
|  |  * @tparam unary_op A callable with the signature: | ||||||
|  |  *         void(ggml_backend_cann_context&, aclTensor*, aclTensor*) | ||||||
|  |  *         where the first aclTensor is the source and the second is the destination. | ||||||
|  |  * | ||||||
|  |  * @param ctx The CANN backend context for managing resources and execution. | ||||||
|  |  * @param dst The destination tensor. Its src[0] is treated as the input tensor. | ||||||
|  |  */ | ||||||
|  | template <void unary_op(ggml_backend_cann_context&, aclTensor*, aclTensor*)> | ||||||
|  |     void ggml_cann_unary_op(ggml_backend_cann_context& ctx, ggml_tensor* dst) { | ||||||
|     ggml_tensor* src = dst->src[0]; |     ggml_tensor* src = dst->src[0]; | ||||||
|  |  | ||||||
|     aclTensor* acl_src = ggml_cann_create_tensor(src); |     aclTensor* acl_src = ggml_cann_create_tensor(src); | ||||||
|     aclTensor* acl_dst = ggml_cann_create_tensor(dst); |     aclTensor* acl_dst = ggml_cann_create_tensor(dst); | ||||||
|  |  | ||||||
|     uint64_t workspaceSize = 0; |     unary_op(ctx, acl_src, acl_dst); | ||||||
|     aclOpExecutor* executor; |  | ||||||
|     void* workspaceAddr = nullptr; |  | ||||||
|  |  | ||||||
|     ACL_CHECK(getWorkspaceSize(acl_src, acl_dst, &workspaceSize, &executor)); |  | ||||||
|     if (workspaceSize > 0) { |  | ||||||
|         ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize); |  | ||||||
|         workspaceAddr = workspace_allocator.get(); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     aclrtStream main_stream = ctx.stream(); |  | ||||||
|     ACL_CHECK(execute(workspaceAddr, workspaceSize, executor, main_stream)); |  | ||||||
|  |  | ||||||
|     ACL_CHECK(aclDestroyTensor(acl_src)); |     ACL_CHECK(aclDestroyTensor(acl_src)); | ||||||
|     ACL_CHECK(aclDestroyTensor(acl_dst)); |     ACL_CHECK(aclDestroyTensor(acl_dst)); | ||||||
| } | } | ||||||
|  |  | ||||||
| // Activation functions template for const aclTensors. | /** | ||||||
| template <aclnnStatus getWorkspaceSize(const aclTensor*, const aclTensor*, |  * @brief Helper macro to invoke a unary ACL operation using ggml_cann_unary_op. | ||||||
|                                        uint64_t*, aclOpExecutor**), |  * | ||||||
|           aclnnStatus execute(void*, uint64_t, aclOpExecutor*, |  * This macro defines an inline lambda wrapping a specific ACL operation name, | ||||||
|                               const aclrtStream)> |  * and passes it to the templated ggml_cann_unary_op function. It simplifies | ||||||
| void ggml_cann_activation(ggml_backend_cann_context& ctx, ggml_tensor* dst) { |  * calling unary ops by hiding the lambda boilerplate. | ||||||
|     ggml_tensor* src = dst->src[0]; |  * | ||||||
|  |  * Internally, the lambda will call: | ||||||
|     aclTensor* acl_src = ggml_cann_create_tensor(src); |  * @code | ||||||
|     aclTensor* acl_dst = ggml_cann_create_tensor(dst); |  * GGML_CANN_CALL_ACLNN_OP(OP_NAME, acl_src, acl_dst); | ||||||
|  |  * @endcode | ||||||
|     uint64_t workspaceSize = 0; |  * | ||||||
|     aclOpExecutor* executor; |  * @param OP_NAME The name of the ACL unary operator to invoke via GGML_CANN_CALL_ACLNN_OP. | ||||||
|     void* workspaceAddr = nullptr; |  * | ||||||
|  |  * @see ggml_cann_unary_op | ||||||
|     ACL_CHECK(getWorkspaceSize(acl_src, acl_dst, &workspaceSize, &executor)); |  * @see GGML_CANN_CALL_ACLNN_OP | ||||||
|     if (workspaceSize > 0) { |  */ | ||||||
|         ggml_cann_pool_alloc workspace_allocator(ctx.pool(), workspaceSize); | #define GGML_CANN_CALL_UNARY_OP(OP_NAME)                         \ | ||||||
|         workspaceAddr = workspace_allocator.get(); |     do {                                                         \ | ||||||
|     } |         auto lambda = [](auto ctx, auto acl_src, auto acl_dst) { \ | ||||||
|  |             GGML_CANN_CALL_ACLNN_OP(OP_NAME, acl_src, acl_dst);  \ | ||||||
|     aclrtStream main_stream = ctx.stream(); |         };                                                       \ | ||||||
|     ACL_CHECK(execute(workspaceAddr, workspaceSize, executor, main_stream)); |         ggml_cann_unary_op<lambda>(ctx, dst);                    \ | ||||||
|  |     }                                                            \ | ||||||
|     ACL_CHECK(aclDestroyTensor(acl_src)); |     while (0) | ||||||
|     ACL_CHECK(aclDestroyTensor(acl_dst)); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| #endif  // CANN_ACLNN_OPS | #endif  // CANN_ACLNN_OPS | ||||||
|   | |||||||
| @@ -1300,47 +1300,59 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx, | |||||||
|             ggml_cann_dup(ctx, dst); |             ggml_cann_dup(ctx, dst); | ||||||
|             break; |             break; | ||||||
|         case GGML_OP_ADD: |         case GGML_OP_ADD: | ||||||
|             ggml_cann_add(ctx, dst); |         case GGML_OP_ADD1: | ||||||
|  |             ggml_cann_binary_op<aclnn_add>(ctx, dst); | ||||||
|  |             break; | ||||||
|  |         case GGML_OP_SUB: | ||||||
|  |             ggml_cann_binary_op<aclnn_sub>(ctx, dst); | ||||||
|             break; |             break; | ||||||
|         case GGML_OP_ACC: |         case GGML_OP_ACC: | ||||||
|             ggml_cann_acc(ctx, dst); |             ggml_cann_acc(ctx, dst); | ||||||
|             break; |             break; | ||||||
|         case GGML_OP_MUL: |         case GGML_OP_MUL: | ||||||
|             ggml_cann_mul_div<aclnnMulGetWorkspaceSize, aclnnMul>(ctx, dst); |             ggml_cann_binary_op<aclnn_mul>(ctx, dst); | ||||||
|             break; |             break; | ||||||
|         case GGML_OP_DIV: |         case GGML_OP_DIV: | ||||||
|             ggml_cann_mul_div<aclnnDivGetWorkspaceSize, aclnnDiv>(ctx, dst); |             ggml_cann_binary_op<aclnn_div>(ctx, dst); | ||||||
|             break; |             break; | ||||||
|         case GGML_OP_UNARY: |         case GGML_OP_UNARY: | ||||||
|             switch (ggml_get_unary_op(dst)) { |             switch (ggml_get_unary_op(dst)) { | ||||||
|  |                 case GGML_UNARY_OP_ABS: | ||||||
|  |                     GGML_CANN_CALL_UNARY_OP(Abs); | ||||||
|  |                     break; | ||||||
|  |                 case GGML_UNARY_OP_NEG: | ||||||
|  |                     GGML_CANN_CALL_UNARY_OP(Neg); | ||||||
|  |                     break; | ||||||
|                 case GGML_UNARY_OP_GELU: |                 case GGML_UNARY_OP_GELU: | ||||||
|                     ggml_cann_activation<aclnnGeluGetWorkspaceSize, aclnnGelu>( |                     GGML_CANN_CALL_UNARY_OP(Gelu); | ||||||
|                         ctx, dst); |  | ||||||
|                     break; |                     break; | ||||||
|                 case GGML_UNARY_OP_SILU: |                 case GGML_UNARY_OP_SILU: | ||||||
|                     ggml_cann_activation<aclnnSiluGetWorkspaceSize, aclnnSilu>( |                     GGML_CANN_CALL_UNARY_OP(Silu); | ||||||
|                         ctx, dst); |  | ||||||
|                     break; |                     break; | ||||||
|                 // TODO: Use faster gelu?? |                 case GGML_UNARY_OP_GELU_QUICK: { | ||||||
|                 case GGML_UNARY_OP_GELU_QUICK: |                         auto lambda = [](auto ctx, auto acl_src, auto acl_dst) { | ||||||
|                     ggml_cann_activation<aclnnGeluGetWorkspaceSize, aclnnGelu>( |                             GGML_CANN_CALL_ACLNN_OP(GeluV2, acl_src, 0, acl_dst); | ||||||
|                         ctx, dst); |                         }; | ||||||
|  |                         ggml_cann_unary_op<lambda>(ctx, dst); | ||||||
|  |                     } | ||||||
|                     break; |                     break; | ||||||
|                 case GGML_UNARY_OP_TANH: |                 case GGML_UNARY_OP_TANH: | ||||||
|                     ggml_cann_activation<aclnnTanhGetWorkspaceSize, aclnnTanh>( |                     GGML_CANN_CALL_UNARY_OP(Tanh); | ||||||
|                         ctx, dst); |  | ||||||
|                     break; |                     break; | ||||||
|                 case GGML_UNARY_OP_RELU: |                 case GGML_UNARY_OP_RELU: | ||||||
|                     ggml_cann_activation<aclnnReluGetWorkspaceSize, aclnnRelu>( |                     GGML_CANN_CALL_UNARY_OP(Relu); | ||||||
|                         ctx, dst); |                     break; | ||||||
|  |                 case GGML_UNARY_OP_SIGMOID: | ||||||
|  |                     GGML_CANN_CALL_UNARY_OP(Sigmoid); | ||||||
|                     break; |                     break; | ||||||
|                 case GGML_UNARY_OP_HARDSIGMOID: |                 case GGML_UNARY_OP_HARDSIGMOID: | ||||||
|                     ggml_cann_activation<aclnnHardsigmoidGetWorkspaceSize, |                     GGML_CANN_CALL_UNARY_OP(Hardsigmoid); | ||||||
|                                          aclnnHardsigmoid>(ctx, dst); |  | ||||||
|                     break; |                     break; | ||||||
|                 case GGML_UNARY_OP_HARDSWISH: |                 case GGML_UNARY_OP_HARDSWISH: | ||||||
|                     ggml_cann_activation<aclnnHardswishGetWorkspaceSize, |                     GGML_CANN_CALL_UNARY_OP(Hardswish); | ||||||
|                                          aclnnHardswish>(ctx, dst); |                     break; | ||||||
|  |                 case GGML_UNARY_OP_EXP: | ||||||
|  |                     GGML_CANN_CALL_UNARY_OP(Exp); | ||||||
|                     break; |                     break; | ||||||
|                 default: |                 default: | ||||||
|                     return false; |                     return false; | ||||||
| @@ -1382,7 +1394,12 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx, | |||||||
|             ggml_cann_scale(ctx, dst); |             ggml_cann_scale(ctx, dst); | ||||||
|             break; |             break; | ||||||
|         case GGML_OP_SQR: |         case GGML_OP_SQR: | ||||||
|             ggml_cann_sqr(ctx, dst); |             GGML_ASSERT(dst->src[1] == nullptr); | ||||||
|  |             dst->src[1] = dst->src[0]; | ||||||
|  |             ggml_cann_binary_op<aclnn_mul>(ctx, dst); | ||||||
|  |             break; | ||||||
|  |         case GGML_OP_SQRT: | ||||||
|  |             GGML_CANN_CALL_UNARY_OP(Sqrt); | ||||||
|             break; |             break; | ||||||
|         case GGML_OP_CLAMP: |         case GGML_OP_CLAMP: | ||||||
|             ggml_cann_clamp(ctx, dst); |             ggml_cann_clamp(ctx, dst); | ||||||
| @@ -1414,6 +1431,9 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx, | |||||||
|         case GGML_OP_POOL_2D: |         case GGML_OP_POOL_2D: | ||||||
|             ggml_cann_pool2d(ctx, dst); |             ggml_cann_pool2d(ctx, dst); | ||||||
|             break; |             break; | ||||||
|  |         case GGML_OP_SUM: | ||||||
|  |             ggml_cann_sum(ctx, dst); | ||||||
|  |             break; | ||||||
|         case GGML_OP_SUM_ROWS: |         case GGML_OP_SUM_ROWS: | ||||||
|             ggml_cann_sum_rows(ctx, dst); |             ggml_cann_sum_rows(ctx, dst); | ||||||
|             break; |             break; | ||||||
| @@ -1424,11 +1444,11 @@ static bool ggml_cann_compute_forward(ggml_backend_cann_context& ctx, | |||||||
|             ggml_cann_argmax(ctx, dst); |             ggml_cann_argmax(ctx, dst); | ||||||
|             break; |             break; | ||||||
|         case GGML_OP_COS: |         case GGML_OP_COS: | ||||||
|             ggml_cann_cos(ctx, dst); |             ggml_cann_unary_op<aclnn_cos>(ctx, dst); | ||||||
|             break; |             break; | ||||||
|         case GGML_OP_SIN: |         case GGML_OP_SIN: | ||||||
|             ggml_cann_sin(ctx, dst); |             ggml_cann_unary_op<aclnn_sin>(ctx, dst); | ||||||
|             break; |         break; | ||||||
|         default: |         default: | ||||||
|             return false; |             return false; | ||||||
|     } |     } | ||||||
| @@ -1679,13 +1699,17 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, | |||||||
|     switch (op->op) { |     switch (op->op) { | ||||||
|         case GGML_OP_UNARY: |         case GGML_OP_UNARY: | ||||||
|             switch (ggml_get_unary_op(op)) { |             switch (ggml_get_unary_op(op)) { | ||||||
|  |                 case GGML_UNARY_OP_ABS: | ||||||
|  |                 case GGML_UNARY_OP_NEG: | ||||||
|                 case GGML_UNARY_OP_GELU: |                 case GGML_UNARY_OP_GELU: | ||||||
|                 case GGML_UNARY_OP_SILU: |                 case GGML_UNARY_OP_SILU: | ||||||
|                 case GGML_UNARY_OP_RELU: |                 case GGML_UNARY_OP_RELU: | ||||||
|  |                 case GGML_UNARY_OP_SIGMOID: | ||||||
|                 case GGML_UNARY_OP_HARDSIGMOID: |                 case GGML_UNARY_OP_HARDSIGMOID: | ||||||
|                 case GGML_UNARY_OP_HARDSWISH: |                 case GGML_UNARY_OP_HARDSWISH: | ||||||
|                 case GGML_UNARY_OP_GELU_QUICK: |                 case GGML_UNARY_OP_GELU_QUICK: | ||||||
|                 case GGML_UNARY_OP_TANH: |                 case GGML_UNARY_OP_TANH: | ||||||
|  |                 case GGML_UNARY_OP_EXP: | ||||||
|                     return true; |                     return true; | ||||||
|                 default: |                 default: | ||||||
|                     return false; |                     return false; | ||||||
| @@ -1784,6 +1808,7 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, | |||||||
|             // value of paddingW should be at most half of kernelW |             // value of paddingW should be at most half of kernelW | ||||||
|             return (p0 <= (k0 / 2)) && (p1 <= (k1 / 2)); |             return (p0 <= (k0 / 2)) && (p1 <= (k1 / 2)); | ||||||
|         } |         } | ||||||
|  |         case GGML_OP_SUM: | ||||||
|         case GGML_OP_DUP: |         case GGML_OP_DUP: | ||||||
|         case GGML_OP_IM2COL: |         case GGML_OP_IM2COL: | ||||||
|         case GGML_OP_CONCAT: |         case GGML_OP_CONCAT: | ||||||
| @@ -1795,11 +1820,14 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev, | |||||||
|         case GGML_OP_TRANSPOSE: |         case GGML_OP_TRANSPOSE: | ||||||
|         case GGML_OP_NORM: |         case GGML_OP_NORM: | ||||||
|         case GGML_OP_ADD: |         case GGML_OP_ADD: | ||||||
|  |         case GGML_OP_ADD1: | ||||||
|  |         case GGML_OP_SUB: | ||||||
|         case GGML_OP_MUL: |         case GGML_OP_MUL: | ||||||
|         case GGML_OP_DIV: |         case GGML_OP_DIV: | ||||||
|         case GGML_OP_RMS_NORM: |         case GGML_OP_RMS_NORM: | ||||||
|         case GGML_OP_SCALE: |         case GGML_OP_SCALE: | ||||||
|         case GGML_OP_SQR: |         case GGML_OP_SQR: | ||||||
|  |         case GGML_OP_SQRT: | ||||||
|         case GGML_OP_CLAMP: |         case GGML_OP_CLAMP: | ||||||
|         case GGML_OP_DIAG_MASK_INF: |         case GGML_OP_DIAG_MASK_INF: | ||||||
|         case GGML_OP_SOFT_MAX: |         case GGML_OP_SOFT_MAX: | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 hipudding
					hipudding