mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	llama : replace (permute + reshape + view_1d) with (view_3d) (#2538)
ggml-ci
This commit is contained in:
		
							
								
								
									
										16
									
								
								llama.cpp
									
									
									
									
									
								
							
							
						
						
									
										16
									
								
								llama.cpp
									
									
									
									
									
								
							| @@ -1609,11 +1609,11 @@ static struct ggml_cgraph * llama_build_graph( | ||||
|             ggml_set_name(Q, "Q"); | ||||
|  | ||||
|             struct ggml_tensor * K = | ||||
|                 ggml_permute(ctx0, | ||||
|                         ggml_reshape_3d(ctx0, | ||||
|                             ggml_view_1d(ctx0, kv_self.k, (n_past + N)*n_embd_gqa, il*n_ctx*ggml_element_size(kv_self.k)*n_embd_gqa), | ||||
|                             n_embd_head, n_head_kv, n_past + N), | ||||
|                         0, 2, 1, 3); | ||||
|                 ggml_view_3d(ctx0, kv_self.k, | ||||
|                         n_embd_head, n_past + N, n_head_kv, | ||||
|                         ggml_element_size(kv_self.k)*n_embd_gqa, | ||||
|                         ggml_element_size(kv_self.k)*n_embd_head, | ||||
|                         ggml_element_size(kv_self.k)*n_embd_gqa*n_ctx*il); | ||||
|             offload_func_kq(K); | ||||
|             ggml_set_name(K, "K"); | ||||
|  | ||||
| @@ -1642,9 +1642,9 @@ static struct ggml_cgraph * llama_build_graph( | ||||
|             struct ggml_tensor * V = | ||||
|                 ggml_view_3d(ctx0, kv_self.v, | ||||
|                         n_past + N, n_embd_head, n_head_kv, | ||||
|                         n_ctx*ggml_element_size(kv_self.v), | ||||
|                         n_ctx*ggml_element_size(kv_self.v)*n_embd_head, | ||||
|                         n_ctx*ggml_element_size(kv_self.v)*n_embd_gqa*il); | ||||
|                         ggml_element_size(kv_self.v)*n_ctx, | ||||
|                         ggml_element_size(kv_self.v)*n_ctx*n_embd_head, | ||||
|                         ggml_element_size(kv_self.v)*n_ctx*n_embd_gqa*il); | ||||
|             offload_func_v(V); | ||||
|             ggml_set_name(V, "V"); | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov