mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	train : make KQ_pos memory buffer permanent via dummy scale op
This commit is contained in:
		| @@ -795,6 +795,8 @@ struct ggml_tensor * llama_build_train_graphs( | |||||||
|         ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, t36, one)); |         ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, t36, one)); | ||||||
|         // input gradient |         // input gradient | ||||||
|         ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, t36->grad, one)); |         ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, t36->grad, one)); | ||||||
|  |         // KQ_pos | ||||||
|  |         ggml_build_forward_expand(gb, ggml_scale_inplace(ctx, KQ_pos, one)); | ||||||
|         GGML_ASSERT(t36->grad->data == NULL && !ggml_is_view(t36->grad)); |         GGML_ASSERT(t36->grad->data == NULL && !ggml_is_view(t36->grad)); | ||||||
|         ggml_allocr_alloc(alloc, t36->grad); |         ggml_allocr_alloc(alloc, t36->grad); | ||||||
|         // gradient tensors (will be set to zero by ggml_graph_reset) |         // gradient tensors (will be set to zero by ggml_graph_reset) | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov