mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-11 10:36:54 +00:00
* CUDA: fused add with rms_norm_mul * Non-broadcast fuse works * Add fused adds * format * Remove n_fuse from template params * Address review comments * Move template inside binbcast
12 lines
579 B
Plaintext
12 lines
579 B
Plaintext
#include "common.cuh"
|
|
|
|
void ggml_cuda_op_repeat(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
|
void ggml_cuda_op_add(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
|
void ggml_cuda_op_sub(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
|
void ggml_cuda_op_mul(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
|
void ggml_cuda_op_div(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
|
|
|
void ggml_cuda_op_repeat_back(ggml_backend_cuda_context & ctx, ggml_tensor * dst);
|
|
|
|
void ggml_cuda_op_fused_add(ggml_backend_cuda_context & ctx, ggml_tensor * dst, int n_fuse);
|