mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	llama : do not print "offloading layers" message in CPU-only builds (#5416)
This commit is contained in:
		
							
								
								
									
										10
									
								
								llama.cpp
									
									
									
									
									
								
							
							
						
						
									
										10
									
								
								llama.cpp
									
									
									
									
									
								
							| @@ -4209,8 +4209,7 @@ static bool llm_load_tensors( | ||||
|         ctx_bufs.emplace_back(ctx, buf); | ||||
|     } | ||||
|  | ||||
|     // print memory requirements | ||||
|     { | ||||
|     if (llama_supports_gpu_offload()) { | ||||
|         const int n_gpu = std::min(n_gpu_layers, int(hparams.n_layer)); | ||||
|  | ||||
|         LLAMA_LOG_INFO("%s: offloading %d repeating layers to GPU\n", __func__, n_gpu); | ||||
| @@ -4222,10 +4221,11 @@ static bool llm_load_tensors( | ||||
|         const int max_offloadable_layers       = hparams.n_layer + 1; | ||||
|  | ||||
|         LLAMA_LOG_INFO("%s: offloaded %d/%d layers to GPU\n", __func__, std::min(n_gpu_layers, max_offloadable_layers), max_backend_supported_layers); | ||||
|     } | ||||
|  | ||||
|         for (ggml_backend_buffer_t buf : model.bufs) { | ||||
|             LLAMA_LOG_INFO("%s: %10s buffer size = %8.2f MiB\n", __func__, ggml_backend_buffer_name(buf), ggml_backend_buffer_get_size(buf) / 1024.0 / 1024.0); | ||||
|         } | ||||
|     // print memory requirements | ||||
|     for (ggml_backend_buffer_t buf : model.bufs) { | ||||
|         LLAMA_LOG_INFO("%s: %10s buffer size = %8.2f MiB\n", __func__, ggml_backend_buffer_name(buf), ggml_backend_buffer_get_size(buf) / 1024.0 / 1024.0); | ||||
|     } | ||||
|  | ||||
|     // populate tensors_by_name | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 slaren
					slaren