mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-12 10:47:01 +00:00
CUDA: fix bug in rms_norm fusion (#15660)
* CUDA: fix bug in rms_norm fusion * Fix bug for OP_REPEAT * Fix index for add
This commit is contained in:
@@ -2827,7 +2827,7 @@ static bool ggml_cuda_can_fuse(const struct ggml_cgraph * cgraph, int node_idx,
|
||||
const ggml_tensor *add = nullptr;
|
||||
|
||||
if (ops.size() == 3 && ops.begin()[2] == GGML_OP_ADD) {
|
||||
add = cgraph->nodes[node_idx+1];
|
||||
add = cgraph->nodes[node_idx+2];
|
||||
}
|
||||
|
||||
GGML_ASSERT(rms_norm->src[0]->type == GGML_TYPE_F32);
|
||||
|
||||
Reference in New Issue
Block a user