mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	llama-bench : log benchmark progress (#9287)
* llama-bench : add optional progress messages
This commit is contained in:
		| @@ -249,6 +249,7 @@ struct cmd_params { | |||||||
|     ggml_sched_priority prio; |     ggml_sched_priority prio; | ||||||
|     int delay; |     int delay; | ||||||
|     bool verbose; |     bool verbose; | ||||||
|  |     bool progress; | ||||||
|     output_formats output_format; |     output_formats output_format; | ||||||
|     output_formats output_format_stderr; |     output_formats output_format_stderr; | ||||||
| }; | }; | ||||||
| @@ -280,6 +281,7 @@ static const cmd_params cmd_params_defaults = { | |||||||
|     /* prio                 */ GGML_SCHED_PRIO_NORMAL, |     /* prio                 */ GGML_SCHED_PRIO_NORMAL, | ||||||
|     /* delay                */ 0, |     /* delay                */ 0, | ||||||
|     /* verbose              */ false, |     /* verbose              */ false, | ||||||
|  |     /* progress             */ false, | ||||||
|     /* output_format        */ MARKDOWN, |     /* output_format        */ MARKDOWN, | ||||||
|     /* output_format_stderr */ NONE, |     /* output_format_stderr */ NONE, | ||||||
| }; | }; | ||||||
| @@ -319,6 +321,7 @@ static void print_usage(int /* argc */, char ** argv) { | |||||||
|     printf("  -o, --output <csv|json|jsonl|md|sql>      (default: %s)\n", output_format_str(cmd_params_defaults.output_format)); |     printf("  -o, --output <csv|json|jsonl|md|sql>      (default: %s)\n", output_format_str(cmd_params_defaults.output_format)); | ||||||
|     printf("  -oe, --output-err <csv|json|jsonl|md|sql> (default: %s)\n", output_format_str(cmd_params_defaults.output_format_stderr)); |     printf("  -oe, --output-err <csv|json|jsonl|md|sql> (default: %s)\n", output_format_str(cmd_params_defaults.output_format_stderr)); | ||||||
|     printf("  -v, --verbose                             (default: %s)\n", cmd_params_defaults.verbose ? "1" : "0"); |     printf("  -v, --verbose                             (default: %s)\n", cmd_params_defaults.verbose ? "1" : "0"); | ||||||
|  |     printf("  --progress                                (default: %s)\n", cmd_params_defaults.progress ? "1" : "0"); | ||||||
|     printf("\n"); |     printf("\n"); | ||||||
|     printf("Multiple values can be given for each parameter by separating them with ',' or by specifying the parameter multiple times.\n"); |     printf("Multiple values can be given for each parameter by separating them with ',' or by specifying the parameter multiple times.\n"); | ||||||
| } | } | ||||||
| @@ -364,6 +367,7 @@ static cmd_params parse_cmd_params(int argc, char ** argv) { | |||||||
|     params.numa = cmd_params_defaults.numa; |     params.numa = cmd_params_defaults.numa; | ||||||
|     params.prio = cmd_params_defaults.prio; |     params.prio = cmd_params_defaults.prio; | ||||||
|     params.delay = cmd_params_defaults.delay; |     params.delay = cmd_params_defaults.delay; | ||||||
|  |     params.progress = cmd_params_defaults.progress; | ||||||
|  |  | ||||||
|     for (int i = 1; i < argc; i++) { |     for (int i = 1; i < argc; i++) { | ||||||
|         arg = argv[i]; |         arg = argv[i]; | ||||||
| @@ -616,6 +620,8 @@ static cmd_params parse_cmd_params(int argc, char ** argv) { | |||||||
|             invalid_param = !output_format_from_str(argv[i], params.output_format_stderr); |             invalid_param = !output_format_from_str(argv[i], params.output_format_stderr); | ||||||
|         } else if (arg == "-v" || arg == "--verbose") { |         } else if (arg == "-v" || arg == "--verbose") { | ||||||
|             params.verbose = true; |             params.verbose = true; | ||||||
|  |         } else if (arg == "--progress") { | ||||||
|  |             params.progress = true; | ||||||
|         } else { |         } else { | ||||||
|             invalid_param = true; |             invalid_param = true; | ||||||
|             break; |             break; | ||||||
| @@ -1523,7 +1529,13 @@ int main(int argc, char ** argv) { | |||||||
|     llama_model * lmodel = nullptr; |     llama_model * lmodel = nullptr; | ||||||
|     const cmd_params_instance * prev_inst = nullptr; |     const cmd_params_instance * prev_inst = nullptr; | ||||||
|  |  | ||||||
|  |     int params_idx = 0; | ||||||
|  |     auto params_count = params_instances.size(); | ||||||
|     for (const auto & inst : params_instances) { |     for (const auto & inst : params_instances) { | ||||||
|  |         params_idx ++; | ||||||
|  |         if (params.progress) { | ||||||
|  |             fprintf(stderr, "llama-bench: benchmark %d/%ld: starting\n", params_idx, params_count); | ||||||
|  |         } | ||||||
|         // keep the same model between tests when possible |         // keep the same model between tests when possible | ||||||
|         if (!lmodel || !prev_inst || !inst.equal_mparams(*prev_inst)) { |         if (!lmodel || !prev_inst || !inst.equal_mparams(*prev_inst)) { | ||||||
|             if (lmodel) { |             if (lmodel) { | ||||||
| @@ -1556,7 +1568,7 @@ int main(int argc, char ** argv) { | |||||||
|  |  | ||||||
|         struct ggml_threadpool_params tpp = ggml_threadpool_params_default(t.n_threads); |         struct ggml_threadpool_params tpp = ggml_threadpool_params_default(t.n_threads); | ||||||
|         if (!parse_cpu_mask(t.cpu_mask, tpp.cpumask)) { |         if (!parse_cpu_mask(t.cpu_mask, tpp.cpumask)) { | ||||||
|             LOG_TEE("%s: failed to parse cpu-mask: %s\n", __func__, t.cpu_mask.c_str()); |             fprintf(stderr, "%s: failed to parse cpu-mask: %s\n", __func__, t.cpu_mask.c_str()); | ||||||
|             exit(1); |             exit(1); | ||||||
|         } |         } | ||||||
|         tpp.strict_cpu = t.cpu_strict; |         tpp.strict_cpu = t.cpu_strict; | ||||||
| @@ -1565,7 +1577,7 @@ int main(int argc, char ** argv) { | |||||||
|  |  | ||||||
|         struct ggml_threadpool* threadpool = ggml_threadpool_new(&tpp); |         struct ggml_threadpool* threadpool = ggml_threadpool_new(&tpp); | ||||||
|         if (!threadpool) { |         if (!threadpool) { | ||||||
|             LOG_TEE("%s: threadpool create failed : n_threads %d\n", __func__, tpp.n_threads); |             fprintf(stderr, "%s: threadpool create failed : n_threads %d\n", __func__, tpp.n_threads); | ||||||
|             exit(1); |             exit(1); | ||||||
|         } |         } | ||||||
|  |  | ||||||
| @@ -1573,10 +1585,16 @@ int main(int argc, char ** argv) { | |||||||
|  |  | ||||||
|         // warmup run |         // warmup run | ||||||
|         if (t.n_prompt > 0) { |         if (t.n_prompt > 0) { | ||||||
|  |             if (params.progress) { | ||||||
|  |                 fprintf(stderr, "llama-bench: benchmark %d/%ld: warmup prompt run\n", params_idx, params_count); | ||||||
|  |             } | ||||||
|             //test_prompt(ctx, std::min(t.n_batch, std::min(t.n_prompt, 32)), 0, t.n_batch, t.n_threads); |             //test_prompt(ctx, std::min(t.n_batch, std::min(t.n_prompt, 32)), 0, t.n_batch, t.n_threads); | ||||||
|             test_prompt(ctx, t.n_prompt, 0, t.n_batch, t.n_threads); |             test_prompt(ctx, t.n_prompt, 0, t.n_batch, t.n_threads); | ||||||
|         } |         } | ||||||
|         if (t.n_gen > 0) { |         if (t.n_gen > 0) { | ||||||
|  |             if (params.progress) { | ||||||
|  |                 fprintf(stderr, "llama-bench: benchmark %d/%ld: warmup generation run\n", params_idx, params_count); | ||||||
|  |             } | ||||||
|             test_gen(ctx, 1, 0, t.n_threads); |             test_gen(ctx, 1, 0, t.n_threads); | ||||||
|         } |         } | ||||||
|  |  | ||||||
| @@ -1586,9 +1604,15 @@ int main(int argc, char ** argv) { | |||||||
|             uint64_t t_start = get_time_ns(); |             uint64_t t_start = get_time_ns(); | ||||||
|  |  | ||||||
|             if (t.n_prompt > 0) { |             if (t.n_prompt > 0) { | ||||||
|  |                 if (params.progress) { | ||||||
|  |                     fprintf(stderr, "llama-bench: benchmark %d/%ld: prompt run %d/%d\n", params_idx, params_count, i + 1, params.reps); | ||||||
|  |                 } | ||||||
|                 test_prompt(ctx, t.n_prompt, 0, t.n_batch, t.n_threads); |                 test_prompt(ctx, t.n_prompt, 0, t.n_batch, t.n_threads); | ||||||
|             } |             } | ||||||
|             if (t.n_gen > 0) { |             if (t.n_gen > 0) { | ||||||
|  |                 if (params.progress) { | ||||||
|  |                     fprintf(stderr, "llama-bench: benchmark %d/%ld: generation run %d/%d\n", params_idx, params_count, i + 1, params.reps); | ||||||
|  |                 } | ||||||
|                 test_gen(ctx, t.n_gen, t.n_prompt, t.n_threads); |                 test_gen(ctx, t.n_gen, t.n_prompt, t.n_threads); | ||||||
|             } |             } | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Aarni Koskela
					Aarni Koskela