mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	llama : do a warm-up eval at start for better timings (#1824)
This commit is contained in:
		| @@ -331,6 +331,13 @@ int main(int argc, char ** argv) { | ||||
|  | ||||
|     std::vector<llama_token> embd; | ||||
|  | ||||
|     // do one empty run to warm up the model | ||||
|     { | ||||
|         const std::vector<llama_token> tmp = { llama_token_bos(), }; | ||||
|         llama_eval(ctx, tmp.data(), tmp.size(), 0, params.n_threads); | ||||
|         llama_reset_timings(ctx); | ||||
|     } | ||||
|  | ||||
|     while ((n_remain != 0 && !is_antiprompt) || params.interactive) { | ||||
|         // predict | ||||
|         if (embd.size() > 0) { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov