mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	parallel : add option to load external prompt file (#3416)
* Enable external file and add datestamp * Add name of external file at end * Upload ToK2024 * Delete ToK2024.txt * Experiments with jeopardy * Move ParallelQuestions to /proimpts and rename * Interim commit * Interim commit * Final revision * Remove trailing whitespace * remove cmake_all.sh * Remove cmake_all.sh * Changed .gitignore * Improved reporting and new question files. * Corrected typo * More LLM questions * Update LLM-questions.txt * Yet more LLM-questions * Remove jeopardy results file * Reinstate original jeopardy.sh * Update examples/parallel/parallel.cpp --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
		
							
								
								
									
										10
									
								
								llama.cpp
									
									
									
									
									
								
							
							
						
						
									
										10
									
								
								llama.cpp
									
									
									
									
									
								
							| @@ -8219,14 +8219,14 @@ void llama_print_timings(struct llama_context * ctx) { | ||||
|     const llama_timings timings = llama_get_timings(ctx); | ||||
|  | ||||
|     LLAMA_LOG_INFO("\n"); | ||||
|     LLAMA_LOG_INFO("%s:        load time = %8.2f ms\n", __func__, timings.t_load_ms); | ||||
|     LLAMA_LOG_INFO("%s:      sample time = %8.2f ms / %5d runs   (%8.2f ms per token, %8.2f tokens per second)\n", | ||||
|     LLAMA_LOG_INFO("%s:        load time = %10.2f ms\n", __func__, timings.t_load_ms); | ||||
|     LLAMA_LOG_INFO("%s:      sample time = %10.2f ms / %5d runs   (%8.2f ms per token, %8.2f tokens per second)\n", | ||||
|             __func__, timings.t_sample_ms, timings.n_sample, timings.t_sample_ms / timings.n_sample, 1e3 / timings.t_sample_ms * timings.n_sample); | ||||
|     LLAMA_LOG_INFO("%s: prompt eval time = %8.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n", | ||||
|     LLAMA_LOG_INFO("%s: prompt eval time = %10.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n", | ||||
|             __func__, timings.t_p_eval_ms, timings.n_p_eval, timings.t_p_eval_ms / timings.n_p_eval, 1e3 / timings.t_p_eval_ms * timings.n_p_eval); | ||||
|     LLAMA_LOG_INFO("%s:        eval time = %8.2f ms / %5d runs   (%8.2f ms per token, %8.2f tokens per second)\n", | ||||
|     LLAMA_LOG_INFO("%s:        eval time = %10.2f ms / %5d runs   (%8.2f ms per token, %8.2f tokens per second)\n", | ||||
|             __func__, timings.t_eval_ms, timings.n_eval, timings.t_eval_ms / timings.n_eval, 1e3 / timings.t_eval_ms * timings.n_eval); | ||||
|     LLAMA_LOG_INFO("%s:       total time = %8.2f ms\n", __func__, (timings.t_end_ms - timings.t_start_ms)); | ||||
|     LLAMA_LOG_INFO("%s:       total time = %10.2f ms\n", __func__, (timings.t_end_ms - timings.t_start_ms)); | ||||
| } | ||||
|  | ||||
| void llama_reset_timings(struct llama_context * ctx) { | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 pudepiedj
					pudepiedj