mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	Fix Metal backend broken from the allocator changes (#2455)
* fix Metal backend broken from the allocator changes
This commit is contained in:
		
							
								
								
									
										17
									
								
								llama.cpp
									
									
									
									
									
								
							
							
						
						
									
										17
									
								
								llama.cpp
									
									
									
									
									
								
							| @@ -1812,6 +1812,12 @@ static bool llama_eval_internal( | ||||
|     // otherwise, the threads are spin-lock waiting for the BLAS calls and are degrading the performance | ||||
|     n_threads = N >= 32 && ggml_cpu_has_blas() && !ggml_cpu_has_gpublas() ? 1 : n_threads; | ||||
|  | ||||
|     struct ggml_tensor * res = gf->nodes[gf->n_nodes - 1]; | ||||
|     struct ggml_tensor * embeddings = gf->nodes[gf->n_nodes - 2]; | ||||
|  | ||||
|     LLAMA_ASSERT(strcmp(res->name, "result_output") == 0); | ||||
|     LLAMA_ASSERT(strcmp(embeddings->name, "result_norm") == 0); | ||||
|  | ||||
| #if GGML_USE_MPI | ||||
|     const int64_t n_layer = hparams.n_layer; | ||||
|     ggml_mpi_graph_compute_pre(lctx.ctx_mpi, gf, n_layer); | ||||
| @@ -1825,7 +1831,10 @@ static bool llama_eval_internal( | ||||
|         //} | ||||
|         ggml_metal_set_n_cb     (lctx.ctx_metal, n_threads); | ||||
|         ggml_metal_graph_compute(lctx.ctx_metal, gf); | ||||
|         ggml_metal_get_tensor   (lctx.ctx_metal, cur); | ||||
|         ggml_metal_get_tensor   (lctx.ctx_metal, res); | ||||
|         if (!lctx.embedding.empty()) { | ||||
|             ggml_metal_get_tensor(lctx.ctx_metal, embeddings); | ||||
|         } | ||||
|     } else { | ||||
|         // IMPORTANT: | ||||
|         // Since we don't have efficient Matrix x Matrix Metal multiplication yet, we fallback to vanilla | ||||
| @@ -1856,12 +1865,6 @@ static bool llama_eval_internal( | ||||
|     // update kv token count | ||||
|     lctx.kv_self.n = n_past + N; | ||||
|  | ||||
|     struct ggml_tensor * res = gf->nodes[gf->n_nodes - 1]; | ||||
|     struct ggml_tensor * embeddings = gf->nodes[gf->n_nodes - 2]; | ||||
|  | ||||
|     LLAMA_ASSERT(strcmp(res->name, "result_output") == 0); | ||||
|     LLAMA_ASSERT(strcmp(embeddings->name, "result_norm") == 0); | ||||
|  | ||||
|     if (cgraph_fname) { | ||||
|         ggml_graph_export(gf, cgraph_fname); | ||||
|     } | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 slaren
					slaren