mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	llama : uniform variable names + struct init
This commit is contained in:
		| @@ -915,14 +915,14 @@ static json format_timings(llama_server_context &llama) | ||||
|  | ||||
|     return json{ | ||||
|         {"prompt_n", timings.n_eval}, | ||||
|         {"prompt_ms", timings.prompt_eval_time_ms}, | ||||
|         {"prompt_per_token_ms", timings.prompt_eval_time_ms / timings.n_p_eval}, | ||||
|         {"prompt_per_second", 1e3 / timings.prompt_eval_time_ms * timings.n_p_eval}, | ||||
|         {"prompt_ms", timings.t_p_eval_ms}, | ||||
|         {"prompt_per_token_ms", timings.t_p_eval_ms / timings.n_p_eval}, | ||||
|         {"prompt_per_second", 1e3 / timings.t_p_eval_ms * timings.n_p_eval}, | ||||
|  | ||||
|         {"predicted_n", timings.n_eval}, | ||||
|         {"predicted_ms", timings.eval_time_ms}, | ||||
|         {"predicted_per_token_ms", timings.eval_time_ms / timings.n_eval}, | ||||
|         {"predicted_per_second", 1e3 / timings.eval_time_ms * timings.n_eval}, | ||||
|         {"predicted_ms", timings.t_eval_ms}, | ||||
|         {"predicted_per_token_ms", timings.t_eval_ms / timings.n_eval}, | ||||
|         {"predicted_per_second", 1e3 / timings.t_eval_ms * timings.n_eval}, | ||||
|     }; | ||||
| } | ||||
|  | ||||
|   | ||||
							
								
								
									
										36
									
								
								llama.cpp
									
									
									
									
									
								
							
							
						
						
									
										36
									
								
								llama.cpp
									
									
									
									
									
								
							| @@ -3480,34 +3480,34 @@ llama_token llama_token_nl() { | ||||
|     return 13; | ||||
| } | ||||
|  | ||||
| llama_timings llama_get_timings(struct llama_context * ctx) { | ||||
|     llama_timings timings; | ||||
| struct llama_timings llama_get_timings(struct llama_context * ctx) { | ||||
|     struct llama_timings result = { | ||||
|         /*.t_start_ms  =*/ 1e-3 * ctx->t_start_us, | ||||
|         /*.t_end_ms    =*/ 1.00 * ggml_time_ms(), | ||||
|         /*.t_load_ms   =*/ 1e-3 * ctx->t_load_us, | ||||
|         /*.t_sample_ms =*/ 1e-3 * ctx->t_sample_us, | ||||
|         /*.t_p_eval_ms =*/ 1e-3 * ctx->t_p_eval_us, | ||||
|         /*.t_eval_ms   =*/ 1e-3 * ctx->t_eval_us, | ||||
|  | ||||
|     timings.t_end_ms = ggml_time_ms(); | ||||
|     timings.t_start_ms = 1e-3 * ctx->t_start_us; | ||||
|     timings.load_time_ms = 1e-3 * ctx->t_load_us; | ||||
|     timings.sample_time_ms = 1e-3 * ctx->t_sample_us; | ||||
|     timings.prompt_eval_time_ms = 1e-3 * ctx->t_p_eval_us; | ||||
|     timings.eval_time_ms = 1e-3 * ctx->t_eval_us; | ||||
|         /*.n_sample =*/ std::max(1, ctx->n_sample), | ||||
|         /*.n_p_eval =*/ std::max(1, ctx->n_p_eval), | ||||
|         /*.n_eval   =*/ std::max(1, ctx->n_eval), | ||||
|     }; | ||||
|  | ||||
|     timings.n_sample = std::max(1, ctx->n_sample); | ||||
|     timings.n_p_eval = std::max(1, ctx->n_p_eval); | ||||
|     timings.n_eval = std::max(1, ctx->n_eval); | ||||
|  | ||||
|     return timings; | ||||
|     return result; | ||||
| } | ||||
|  | ||||
| void llama_print_timings(struct llama_context * ctx) { | ||||
|     llama_timings timings = llama_get_timings(ctx); | ||||
|     const llama_timings timings = llama_get_timings(ctx); | ||||
|  | ||||
|     fprintf(stderr, "\n"); | ||||
|     fprintf(stderr, "%s:        load time = %8.2f ms\n", __func__, timings.load_time_ms); | ||||
|     fprintf(stderr, "%s:        load time = %8.2f ms\n", __func__, timings.t_load_ms); | ||||
|     fprintf(stderr, "%s:      sample time = %8.2f ms / %5d runs   (%8.2f ms per token, %8.2f tokens per second)\n", | ||||
|             __func__, timings.sample_time_ms, timings.n_sample, timings.sample_time_ms / timings.n_sample, 1e3 / timings.sample_time_ms * timings.n_sample); | ||||
|             __func__, timings.t_sample_ms, timings.n_sample, timings.t_sample_ms / timings.n_sample, 1e3 / timings.t_sample_ms * timings.n_sample); | ||||
|     fprintf(stderr, "%s: prompt eval time = %8.2f ms / %5d tokens (%8.2f ms per token, %8.2f tokens per second)\n", | ||||
|             __func__, timings.prompt_eval_time_ms, timings.n_p_eval, timings.prompt_eval_time_ms / timings.n_p_eval, 1e3 / timings.prompt_eval_time_ms * timings.n_p_eval); | ||||
|             __func__, timings.t_p_eval_ms, timings.n_p_eval, timings.t_p_eval_ms / timings.n_p_eval, 1e3 / timings.t_p_eval_ms * timings.n_p_eval); | ||||
|     fprintf(stderr, "%s:        eval time = %8.2f ms / %5d runs   (%8.2f ms per token, %8.2f tokens per second)\n", | ||||
|             __func__, timings.eval_time_ms, timings.n_eval, timings.eval_time_ms / timings.n_eval, 1e3 / timings.eval_time_ms * timings.n_eval); | ||||
|             __func__, timings.t_eval_ms, timings.n_eval, timings.t_eval_ms / timings.n_eval, 1e3 / timings.t_eval_ms * timings.n_eval); | ||||
|     fprintf(stderr, "%s:       total time = %8.2f ms\n", __func__, (timings.t_end_ms - timings.t_start_ms)); | ||||
| } | ||||
|  | ||||
|   | ||||
							
								
								
									
										10
									
								
								llama.h
									
									
									
									
									
								
							
							
						
						
									
										10
									
								
								llama.h
									
									
									
									
									
								
							| @@ -138,10 +138,10 @@ extern "C" { | ||||
|     struct llama_timings { | ||||
|         double t_start_ms; | ||||
|         double t_end_ms; | ||||
|         double load_time_ms; | ||||
|         double sample_time_ms; | ||||
|         double prompt_eval_time_ms; | ||||
|         double eval_time_ms; | ||||
|         double t_load_ms; | ||||
|         double t_sample_ms; | ||||
|         double t_p_eval_ms; | ||||
|         double t_eval_ms; | ||||
|  | ||||
|         int32_t n_sample; | ||||
|         int32_t n_p_eval; | ||||
| @@ -345,7 +345,7 @@ extern "C" { | ||||
|     LLAMA_API llama_token llama_sample_token(struct llama_context * ctx, llama_token_data_array * candidates); | ||||
|  | ||||
|     // Performance information | ||||
|     LLAMA_API llama_timings llama_get_timings(struct llama_context * ctx); | ||||
|     LLAMA_API struct llama_timings llama_get_timings(struct llama_context * ctx); | ||||
|     LLAMA_API void llama_print_timings(struct llama_context * ctx); | ||||
|     LLAMA_API void llama_reset_timings(struct llama_context * ctx); | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov