mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	llama : remove obsolete map for layer counting
This commit is contained in:
		| @@ -5469,9 +5469,6 @@ static struct ggml_cgraph * llama_build_graph( | |||||||
|     // check if we should build the worst-case graph (for memory measurement) |     // check if we should build the worst-case graph (for memory measurement) | ||||||
|     const bool worst_case = ggml_allocr_is_measure(lctx.alloc); |     const bool worst_case = ggml_allocr_is_measure(lctx.alloc); | ||||||
|  |  | ||||||
|     // count the number of times a tensor with a given name has been offloaded |  | ||||||
|     std::unordered_map<std::string, int> offload_n; |  | ||||||
|  |  | ||||||
|     // keep track of the input that has already been allocated |     // keep track of the input that has already been allocated | ||||||
|     bool alloc_inp_tokens   = false; |     bool alloc_inp_tokens   = false; | ||||||
|     bool alloc_inp_embd     = false; |     bool alloc_inp_embd     = false; | ||||||
| @@ -5654,7 +5651,7 @@ static struct ggml_cgraph * llama_build_graph( | |||||||
|                 break; |                 break; | ||||||
|             case OFFLOAD_FUNC: |             case OFFLOAD_FUNC: | ||||||
|                 if (n_gpu_layers < n_layer) { |                 if (n_gpu_layers < n_layer) { | ||||||
|                     if (offload_n[name]++ < i_gpu_start) { |                     if (il < i_gpu_start) { | ||||||
|                         func_e = OFFLOAD_FUNC_NOP; |                         func_e = OFFLOAD_FUNC_NOP; | ||||||
|                     } |                     } | ||||||
|                 } |                 } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov