train : make KQ_pos memory buffer permanent via dummy scale op

2025-10-31 08:51:55 +00:00 · 2023-09-20 14:14:50 +03:00
parent 54206962c7
commit 2f3a46fccf
1 changed files with 2 additions and 0 deletions
--- a/examples/train-text-from-scratch/train-text-from-scratch.cpp
+++ b/examples/train-text-from-scratch/train-text-from-scratch.cpp
@@ -795,6 +795,8 @@ struct ggml_tensor * llama_build_train_graphs(
        ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, t36, one));
        // input gradient
        ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, t36->grad, one));
        // KQ_pos
        ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, KQ_pos, one));
        GGML_ASSERT(t36->grad->data == NULL && !ggml_is_view(t36->grad));
        ggml_allocr_alloc(alloc, t36->grad);
        // gradient tensors (will be set to zero by ggml_graph_reset)