mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	common : add missing env var for speculative (#10801)
This commit is contained in:
		| @@ -2083,35 +2083,35 @@ common_params_context common_params_parser_init(common_params & params, llama_ex | |||||||
|         [](common_params & params, int value) { |         [](common_params & params, int value) { | ||||||
|             params.speculative.n_max = value; |             params.speculative.n_max = value; | ||||||
|         } |         } | ||||||
|     ).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_LOOKUP, LLAMA_EXAMPLE_SERVER})); |     ).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_LOOKUP, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_DRAFT_MAX")); | ||||||
|     add_opt(common_arg( |     add_opt(common_arg( | ||||||
|         {"--draft-min", "--draft-n-min"}, "N", |         {"--draft-min", "--draft-n-min"}, "N", | ||||||
|         string_format("minimum number of draft tokens to use for speculative decoding (default: %d)", params.speculative.n_min), |         string_format("minimum number of draft tokens to use for speculative decoding (default: %d)", params.speculative.n_min), | ||||||
|         [](common_params & params, int value) { |         [](common_params & params, int value) { | ||||||
|             params.speculative.n_min = value; |             params.speculative.n_min = value; | ||||||
|         } |         } | ||||||
|     ).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_LOOKUP, LLAMA_EXAMPLE_SERVER})); |     ).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_LOOKUP, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_DRAFT_MIN")); | ||||||
|     add_opt(common_arg( |     add_opt(common_arg( | ||||||
|         {"--draft-p-split"}, "P", |         {"--draft-p-split"}, "P", | ||||||
|         string_format("speculative decoding split probability (default: %.1f)", (double)params.speculative.p_split), |         string_format("speculative decoding split probability (default: %.1f)", (double)params.speculative.p_split), | ||||||
|         [](common_params & params, const std::string & value) { |         [](common_params & params, const std::string & value) { | ||||||
|             params.speculative.p_split = std::stof(value); |             params.speculative.p_split = std::stof(value); | ||||||
|         } |         } | ||||||
|     ).set_examples({LLAMA_EXAMPLE_SPECULATIVE})); |     ).set_examples({LLAMA_EXAMPLE_SPECULATIVE}).set_env("LLAMA_ARG_DRAFT_P_SPLIT")); | ||||||
|     add_opt(common_arg( |     add_opt(common_arg( | ||||||
|         {"--draft-p-min"}, "P", |         {"--draft-p-min"}, "P", | ||||||
|         string_format("minimum speculative decoding probability (greedy) (default: %.1f)", (double)params.speculative.p_min), |         string_format("minimum speculative decoding probability (greedy) (default: %.1f)", (double)params.speculative.p_min), | ||||||
|         [](common_params & params, const std::string & value) { |         [](common_params & params, const std::string & value) { | ||||||
|             params.speculative.p_min = std::stof(value); |             params.speculative.p_min = std::stof(value); | ||||||
|         } |         } | ||||||
|     ).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER})); |     ).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_DRAFT_P_MIN")); | ||||||
|     add_opt(common_arg( |     add_opt(common_arg( | ||||||
|         {"-cd", "--ctx-size-draft"}, "N", |         {"-cd", "--ctx-size-draft"}, "N", | ||||||
|         string_format("size of the prompt context for the draft model (default: %d, 0 = loaded from model)", params.speculative.n_ctx), |         string_format("size of the prompt context for the draft model (default: %d, 0 = loaded from model)", params.speculative.n_ctx), | ||||||
|         [](common_params & params, int value) { |         [](common_params & params, int value) { | ||||||
|             params.speculative.n_ctx = value; |             params.speculative.n_ctx = value; | ||||||
|         } |         } | ||||||
|     ).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER})); |     ).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_CTX_SIZE_DRAFT")); | ||||||
|     add_opt(common_arg( |     add_opt(common_arg( | ||||||
|         {"-devd", "--device-draft"}, "<dev1,dev2,..>", |         {"-devd", "--device-draft"}, "<dev1,dev2,..>", | ||||||
|         "comma-separated list of devices to use for offloading the draft model (none = don't offload)\n" |         "comma-separated list of devices to use for offloading the draft model (none = don't offload)\n" | ||||||
| @@ -2131,14 +2131,14 @@ common_params_context common_params_parser_init(common_params & params, llama_ex | |||||||
|                 fprintf(stderr, "warning: consult docs/build.md for compilation instructions\n"); |                 fprintf(stderr, "warning: consult docs/build.md for compilation instructions\n"); | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|     ).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER})); |     ).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_N_GPU_LAYERS_DRAFT")); | ||||||
|     add_opt(common_arg( |     add_opt(common_arg( | ||||||
|         {"-md", "--model-draft"}, "FNAME", |         {"-md", "--model-draft"}, "FNAME", | ||||||
|         "draft model for speculative decoding (default: unused)", |         "draft model for speculative decoding (default: unused)", | ||||||
|         [](common_params & params, const std::string & value) { |         [](common_params & params, const std::string & value) { | ||||||
|             params.speculative.model = value; |             params.speculative.model = value; | ||||||
|         } |         } | ||||||
|     ).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER})); |     ).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODEL_DRAFT")); | ||||||
|  |  | ||||||
|     return ctx_arg; |     return ctx_arg; | ||||||
| } | } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Xuan Son Nguyen
					Xuan Son Nguyen