mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-29 08:41:22 +00:00 
			
		
		
		
	llama-bench : add JSONL (NDJSON) output mode (#9288)
* llama-bench : add JSONL (NDJSON) output mode * llama-bench : update usage docs
This commit is contained in:
		| @@ -14,7 +14,8 @@ Performance testing tool for llama.cpp. | ||||
|     1. [Markdown](#markdown) | ||||
|     2. [CSV](#csv) | ||||
|     3. [JSON](#json) | ||||
|     4. [SQL](#sql) | ||||
|     4. [JSONL](#jsonl) | ||||
|     5. [SQL](#sql) | ||||
|  | ||||
| ## Syntax | ||||
|  | ||||
| @@ -26,13 +27,17 @@ options: | ||||
|   -m, --model <filename>                    (default: models/7B/ggml-model-q4_0.gguf) | ||||
|   -p, --n-prompt <n>                        (default: 512) | ||||
|   -n, --n-gen <n>                           (default: 128) | ||||
|   -pg <pp,tg>                         (default: 512,128) | ||||
|   -pg <pp,tg>                               (default: ) | ||||
|   -b, --batch-size <n>                      (default: 2048) | ||||
|   -ub, --ubatch-size <n>                    (default: 512) | ||||
|   -ctk, --cache-type-k <t>                  (default: f16) | ||||
|   -ctv, --cache-type-v <t>                  (default: f16) | ||||
|   -t, --threads <n>                   (default: 16) | ||||
|   -t, --threads <n>                         (default: 8) | ||||
|   -C, --cpu-mask <hex,hex>                  (default: 0x0) | ||||
|   --cpu-strict <0|1>                        (default: 0) | ||||
|   --poll <0...100>                          (default: 50) | ||||
|   -ngl, --n-gpu-layers <n>                  (default: 99) | ||||
|   -rpc, --rpc <rpc_servers>                 (default: ) | ||||
|   -sm, --split-mode <none|layer|row>        (default: layer) | ||||
|   -mg, --main-gpu <i>                       (default: 0) | ||||
|   -nkvo, --no-kv-offload <0|1>              (default: 0) | ||||
| @@ -42,7 +47,10 @@ options: | ||||
|   -embd, --embeddings <0|1>                 (default: 0) | ||||
|   -ts, --tensor-split <ts0/ts1/..>          (default: 0) | ||||
|   -r, --repetitions <n>                     (default: 5) | ||||
|   -o, --output <csv|json|md|sql>      (default: md) | ||||
|   --prio <0|1|2|3>                          (default: 0) | ||||
|   --delay <0...N> (seconds)                 (default: 0) | ||||
|   -o, --output <csv|json|jsonl|md|sql>      (default: md) | ||||
|   -oe, --output-err <csv|json|jsonl|md|sql> (default: none) | ||||
|   -v, --verbose                             (default: 0) | ||||
|  | ||||
| Multiple values can be given for each parameter by separating them with ',' or by specifying the parameter multiple times. | ||||
| @@ -238,6 +246,19 @@ $ ./llama-bench -o json | ||||
| ] | ||||
| ``` | ||||
|  | ||||
|  | ||||
| ### JSONL | ||||
|  | ||||
| ```sh | ||||
| $ ./llama-bench -o jsonl | ||||
| ``` | ||||
|  | ||||
| ```json lines | ||||
| {"build_commit":"3469684","build_number":1275,"cuda":true,"metal":false,"gpu_blas":true,"blas":true,"cpu_info":"13th Gen Intel(R) Core(TM) i9-13900K","gpu_info":"NVIDIA GeForce RTX 3090 Ti","model_filename":"models/7B/ggml-model-q4_0.gguf","model_type":"llama 7B mostly Q4_0","model_size":3825065984,"model_n_params":6738415616,"n_batch":512,"n_threads":16,"f16_kv":true,"n_gpu_layers":99,"main_gpu":0,"mul_mat_q":true,"tensor_split":"0.00","n_prompt":512,"n_gen":0,"test_time":"2023-09-23T12:09:57Z","avg_ns":212365953,"stddev_ns":985423,"avg_ts":2410.974041,"stddev_ts":11.163766,"samples_ns":[213837238,211635853,212328053,211329715,212698907],"samples_ts":[2394.34,2419.25,2411.36,2422.75,2407.16]} | ||||
| {"build_commit":"3469684","build_number":1275,"cuda":true,"metal":false,"gpu_blas":true,"blas":true,"cpu_info":"13th Gen Intel(R) Core(TM) i9-13900K","gpu_info":"NVIDIA GeForce RTX 3090 Ti","model_filename":"models/7B/ggml-model-q4_0.gguf","model_type":"llama 7B mostly Q4_0","model_size":3825065984,"model_n_params":6738415616,"n_batch":512,"n_threads":16,"f16_kv":true,"n_gpu_layers":99,"main_gpu":0,"mul_mat_q":true,"tensor_split":"0.00","n_prompt":0,"n_gen":128,"test_time":"2023-09-23T12:09:59Z","avg_ns":977425219,"stddev_ns":9268593,"avg_ts":130.965708,"stddev_ts":1.238924,"samples_ns":[984472709,974901233,989474741,970729355,967548060],"samples_ts":[130.019,131.295,129.362,131.86,132.293]} | ||||
| ``` | ||||
|  | ||||
|  | ||||
| ### SQL | ||||
|  | ||||
| SQL output is suitable for importing into a SQLite database. The output can be piped into the `sqlite3` command line tool to add the results to a database. | ||||
|   | ||||
| @@ -171,13 +171,14 @@ static std::string get_gpu_info() { | ||||
| } | ||||
|  | ||||
| // command line params | ||||
| enum output_formats {NONE, CSV, JSON, MARKDOWN, SQL}; | ||||
| enum output_formats {NONE, CSV, JSON, JSONL, MARKDOWN, SQL}; | ||||
|  | ||||
| static const char * output_format_str(output_formats format) { | ||||
|     switch (format) { | ||||
|         case NONE:     return "none"; | ||||
|         case CSV:      return "csv"; | ||||
|         case JSON:     return "json"; | ||||
|         case JSONL:    return "jsonl"; | ||||
|         case MARKDOWN: return "md"; | ||||
|         case SQL:      return "sql"; | ||||
|         default: GGML_ABORT("invalid output format"); | ||||
| @@ -191,6 +192,8 @@ static bool output_format_from_str(const std::string & s, output_formats & forma | ||||
|         format = CSV; | ||||
|     } else if (s == "json") { | ||||
|         format = JSON; | ||||
|     } else if (s == "jsonl") { | ||||
|         format = JSONL; | ||||
|     } else if (s == "md") { | ||||
|         format = MARKDOWN; | ||||
|     } else if (s == "sql") { | ||||
| @@ -308,8 +311,8 @@ static void print_usage(int /* argc */, char ** argv) { | ||||
|     printf("  -r, --repetitions <n>                     (default: %d)\n", cmd_params_defaults.reps); | ||||
|     printf("  --prio <0|1|2|3>                          (default: %d)\n", cmd_params_defaults.prio); | ||||
|     printf("  --delay <0...N> (seconds)                 (default: %d)\n", cmd_params_defaults.delay); | ||||
|     printf("  -o, --output <csv|json|md|sql>      (default: %s)\n", output_format_str(cmd_params_defaults.output_format)); | ||||
|     printf("  -oe, --output-err <csv|json|md|sql> (default: %s)\n", output_format_str(cmd_params_defaults.output_format_stderr)); | ||||
|     printf("  -o, --output <csv|json|jsonl|md|sql>      (default: %s)\n", output_format_str(cmd_params_defaults.output_format)); | ||||
|     printf("  -oe, --output-err <csv|json|jsonl|md|sql> (default: %s)\n", output_format_str(cmd_params_defaults.output_format_stderr)); | ||||
|     printf("  -v, --verbose                             (default: %s)\n", cmd_params_defaults.verbose ? "1" : "0"); | ||||
|     printf("\n"); | ||||
|     printf("Multiple values can be given for each parameter by separating them with ',' or by specifying the parameter multiple times.\n"); | ||||
| @@ -1074,10 +1077,8 @@ struct csv_printer : public printer { | ||||
|     } | ||||
| }; | ||||
|  | ||||
| struct json_printer : public printer { | ||||
|     bool first = true; | ||||
|  | ||||
|     static std::string escape_json(const std::string & value) { | ||||
| static std::string escape_json(const std::string & value) { | ||||
|     std::string escaped; | ||||
|     for (auto c : value) { | ||||
|         if (c == '"') { | ||||
| @@ -1093,9 +1094,9 @@ struct json_printer : public printer { | ||||
|         } | ||||
|     } | ||||
|     return escaped; | ||||
|     } | ||||
| } | ||||
|  | ||||
|     static std::string format_value(const std::string & field, const std::string & value) { | ||||
| static std::string format_json_value(const std::string & field, const std::string & value) { | ||||
|     switch (test::get_field_type(field)) { | ||||
|         case test::STRING: | ||||
|             return "\"" + escape_json(value) + "\""; | ||||
| @@ -1104,7 +1105,10 @@ struct json_printer : public printer { | ||||
|         default: | ||||
|             return value; | ||||
|     } | ||||
|     } | ||||
| } | ||||
|  | ||||
| struct json_printer : public printer { | ||||
|     bool first = true; | ||||
|  | ||||
|     void print_header(const cmd_params & params) override { | ||||
|         fprintf(fout, "[\n"); | ||||
| @@ -1114,7 +1118,7 @@ struct json_printer : public printer { | ||||
|     void print_fields(const std::vector<std::string> & fields, const std::vector<std::string> & values) { | ||||
|         assert(fields.size() == values.size()); | ||||
|         for (size_t i = 0; i < fields.size(); i++) { | ||||
|             fprintf(fout, "    \"%s\": %s,\n", fields.at(i).c_str(), format_value(fields.at(i), values.at(i)).c_str()); | ||||
|             fprintf(fout, "    \"%s\": %s,\n", fields.at(i).c_str(), format_json_value(fields.at(i), values.at(i)).c_str()); | ||||
|         } | ||||
|     } | ||||
|  | ||||
| @@ -1137,6 +1141,25 @@ struct json_printer : public printer { | ||||
|     } | ||||
| }; | ||||
|  | ||||
|  | ||||
| struct jsonl_printer : public printer { | ||||
|     void print_fields(const std::vector<std::string> & fields, const std::vector<std::string> & values) { | ||||
|         assert(fields.size() == values.size()); | ||||
|         for (size_t i = 0; i < fields.size(); i++) { | ||||
|             fprintf(fout, "\"%s\": %s, ", fields.at(i).c_str(), format_json_value(fields.at(i), values.at(i)).c_str()); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     void print_test(const test & t) override { | ||||
|         fprintf(fout, "{"); | ||||
|         print_fields(test::get_fields(), t.get_values()); | ||||
|         fprintf(fout, "\"samples_ns\": [ %s ],", join(t.samples_ns, ", ").c_str()); | ||||
|         fprintf(fout, "\"samples_ts\": [ %s ]", join(t.get_ts(), ", ").c_str()); | ||||
|         fprintf(fout, "}\n"); | ||||
|         fflush(fout); | ||||
|     } | ||||
| }; | ||||
|  | ||||
| struct markdown_printer : public printer { | ||||
|     std::vector<std::string> fields; | ||||
|  | ||||
| @@ -1437,6 +1460,8 @@ static std::unique_ptr<printer> create_printer(output_formats format) { | ||||
|             return std::unique_ptr<printer>(new csv_printer()); | ||||
|         case JSON: | ||||
|             return std::unique_ptr<printer>(new json_printer()); | ||||
|         case JSONL: | ||||
|             return std::unique_ptr<printer>(new jsonl_printer()); | ||||
|         case MARKDOWN: | ||||
|             return std::unique_ptr<printer>(new markdown_printer()); | ||||
|         case SQL: | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Aarni Koskela
					Aarni Koskela