mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	llama-graph : use ggml_repeat_4d (#13998)
This commit is contained in:
		| @@ -769,9 +769,8 @@ ggml_tensor * llm_graph_context::build_moe_ffn( | |||||||
|     cur = ggml_reshape_3d(ctx0, cur, n_embd, 1, n_tokens); |     cur = ggml_reshape_3d(ctx0, cur, n_embd, 1, n_tokens); | ||||||
|  |  | ||||||
|     if (weight_before_ffn) { |     if (weight_before_ffn) { | ||||||
|         // TODO: this is a workaround as we don't yet have a repeat op that takes custom dim (ggml_repeat_4d) |         // repeat cur to [n_embd, n_expert_used, n_tokens] | ||||||
|         ggml_tensor * repeated = ggml_new_tensor_3d(ctx0, cur->type, n_embd, n_expert_used, n_tokens); |         ggml_tensor * repeated = ggml_repeat_4d(ctx0, cur, n_embd, n_expert_used, n_tokens, 1); | ||||||
|         repeated = ggml_repeat(ctx0, cur, repeated); // [n_embd, n_expert_used, n_tokens] |  | ||||||
|         cur = ggml_mul(ctx0, repeated, weights); |         cur = ggml_mul(ctx0, repeated, weights); | ||||||
|         cb(cur, "ffn_moe_weighted", il); |         cb(cur, "ffn_moe_weighted", il); | ||||||
|     } |     } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Xuan-Son Nguyen
					Xuan-Son Nguyen