mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	perplexity : fix ETA by warming up the model with an empty run
This commit is contained in:
		| @@ -752,6 +752,14 @@ std::tuple<struct llama_model *, struct llama_context *> llama_init_from_gpt_par | |||||||
|         params.logit_bias[llama_token_eos(lctx)] = -INFINITY; |         params.logit_bias[llama_token_eos(lctx)] = -INFINITY; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     { | ||||||
|  |         LOG("warming up the model with an empty run\n"); | ||||||
|  |  | ||||||
|  |         const std::vector<llama_token> tmp = { llama_token_bos(lctx), }; | ||||||
|  |         llama_eval(lctx, tmp.data(), tmp.size(), 0, params.n_threads); | ||||||
|  |         llama_reset_timings(lctx); | ||||||
|  |     } | ||||||
|  |  | ||||||
|     return std::make_tuple(model, lctx); |     return std::make_tuple(model, lctx); | ||||||
| } | } | ||||||
|  |  | ||||||
|   | |||||||
| @@ -492,14 +492,6 @@ int main(int argc, char ** argv) { | |||||||
|     std::vector<llama_token> embd; |     std::vector<llama_token> embd; | ||||||
|     std::vector<llama_token> embd_guidance; |     std::vector<llama_token> embd_guidance; | ||||||
|  |  | ||||||
|     { |  | ||||||
|         LOG("warming up the model with an empty run\n"); |  | ||||||
|  |  | ||||||
|         const std::vector<llama_token> tmp = { llama_token_bos(ctx), }; |  | ||||||
|         llama_eval(ctx, tmp.data(), tmp.size(), 0, params.n_threads); |  | ||||||
|         llama_reset_timings(ctx); |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     while ((n_remain != 0 && !is_antiprompt) || params.interactive) { |     while ((n_remain != 0 && !is_antiprompt) || params.interactive) { | ||||||
|         // predict |         // predict | ||||||
|         if (embd.size() > 0) { |         if (embd.size() > 0) { | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov