mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	Merge branch 'master' into finelayer
This commit is contained in:
		| @@ -977,6 +977,10 @@ static bool common_params_parse_ex(int argc, char ** argv, common_params_context | ||||
|         for (auto & seq_breaker : params.sampling.dry_sequence_breakers) { | ||||
|             string_process_escapes(seq_breaker); | ||||
|         } | ||||
|         for (auto & pair : params.speculative.replacements) { | ||||
|             string_process_escapes(pair.first); | ||||
|             string_process_escapes(pair.second); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     if (!params.kv_overrides.empty()) { | ||||
| @@ -2092,6 +2096,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex | ||||
|             params.no_kv_offload = true; | ||||
|         } | ||||
|     ).set_env("LLAMA_ARG_NO_KV_OFFLOAD")); | ||||
|     add_opt(common_arg( | ||||
|         {"-nr", "--no-repack"}, | ||||
|         "disable weight repacking", | ||||
|         [](common_params & params) { | ||||
|             params.no_extra_bufts = true; | ||||
|         } | ||||
|     ).set_env("LLAMA_ARG_NO_REPACK")); | ||||
|     add_opt(common_arg( | ||||
|         {"-ctk", "--cache-type-k"}, "TYPE", | ||||
|         string_format( | ||||
| @@ -2370,6 +2381,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex | ||||
|             } | ||||
|         } | ||||
|     )); | ||||
|     add_opt(common_arg( | ||||
|         {"--cpu-moe"}, | ||||
|         "use CPU for Mixture of Experts (MoE) weights", | ||||
|         [](common_params & params) { | ||||
|             params.tensor_buft_overrides.push_back({"\\.ffn_up_exps\\.weight$",   ggml_backend_cpu_buffer_type()}); | ||||
|             params.tensor_buft_overrides.push_back({"\\.ffn_down_exps\\.weight$", ggml_backend_cpu_buffer_type()}); | ||||
|             params.tensor_buft_overrides.push_back({"\\.ffn_gate_exps\\.weight$", ggml_backend_cpu_buffer_type()}); | ||||
|         } | ||||
|     ).set_env("LLAMA_ARG_CPU_MOE")); | ||||
|     add_opt(common_arg( | ||||
|         {"-ngl", "--gpu-layers", "--n-gpu-layers"}, "N", | ||||
|         "number of layers to store in VRAM", | ||||
| @@ -2628,6 +2648,15 @@ common_params_context common_params_parser_init(common_params & params, llama_ex | ||||
|             params.n_out_freq = value; | ||||
|         } | ||||
|     ).set_examples({LLAMA_EXAMPLE_IMATRIX})); | ||||
|     add_opt(common_arg( | ||||
|         {"--output-format"}, "{gguf,dat}", | ||||
|         string_format("output format for imatrix file (default: %s)", params.imat_dat > 0 ? "dat" : "gguf"), | ||||
|         [](common_params & params, const std::string & value) { | ||||
|             /**/ if (value == "gguf") { params.imat_dat = -1; } | ||||
|             else if (value == "dat")  { params.imat_dat = 1;  } | ||||
|             else { throw std::invalid_argument("invalid output format"); } | ||||
|         } | ||||
|     ).set_examples({LLAMA_EXAMPLE_IMATRIX})); | ||||
|     add_opt(common_arg( | ||||
|         {"--save-frequency"}, "N", | ||||
|         string_format("save an imatrix copy every N iterations (default: %d)", params.n_save_freq), | ||||
| @@ -3250,6 +3279,13 @@ common_params_context common_params_parser_init(common_params & params, llama_ex | ||||
|             params.speculative.model.path = value; | ||||
|         } | ||||
|     ).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER}).set_env("LLAMA_ARG_MODEL_DRAFT")); | ||||
|     add_opt(common_arg( | ||||
|         {"--spec-replace"}, "TARGET", "DRAFT", | ||||
|         "translate the string in TARGET into DRAFT if the draft model and main model are not compatible", | ||||
|         [](common_params & params, const std::string & tgt, const std::string & dft) { | ||||
|             params.speculative.replacements.push_back({ tgt, dft }); | ||||
|         } | ||||
|     ).set_examples({LLAMA_EXAMPLE_SPECULATIVE, LLAMA_EXAMPLE_SERVER})); | ||||
|     add_opt(common_arg( | ||||
|         {"-ctkd", "--cache-type-k-draft"}, "TYPE", | ||||
|         string_format( | ||||
| @@ -3439,28 +3475,11 @@ common_params_context common_params_parser_init(common_params & params, llama_ex | ||||
|         } | ||||
|     ).set_examples({LLAMA_EXAMPLE_SERVER})); | ||||
|  | ||||
|     // diffusion parameters | ||||
|     add_opt(common_arg( | ||||
|         { "--diffusion-steps" }, "N", | ||||
|         string_format("number of diffusion steps (default: %d)", params.diffusion.steps), | ||||
|         [](common_params & params, int value) { params.diffusion.steps = value; } | ||||
|     ).set_examples({ LLAMA_EXAMPLE_DIFFUSION })); | ||||
|     add_opt(common_arg( | ||||
|         { "--diffusion-eps" }, "F", | ||||
|         string_format("epsilon for timesteps (default: %.6f)", (double) params.diffusion.eps), | ||||
|         [](common_params & params, const std::string & value) { params.diffusion.eps = std::stof(value); } | ||||
|     ).set_examples({ LLAMA_EXAMPLE_DIFFUSION })); | ||||
|     add_opt(common_arg( | ||||
|         { "--diffusion-algorithm" }, "N", | ||||
|         string_format("diffusion algorithm: 0=ORIGIN, 1=MASKGIT_PLUS, 2=TOPK_MARGIN, 3=ENTROPY (default: %d)", | ||||
|                       params.diffusion.algorithm), | ||||
|         [](common_params & params, int value) { params.diffusion.algorithm = value; } | ||||
|     ).set_examples({ LLAMA_EXAMPLE_DIFFUSION })); | ||||
|     add_opt(common_arg( | ||||
|         { "--diffusion-alg-temp" }, "F", | ||||
|         string_format("algorithm temperature (default: %.3f)", (double) params.diffusion.alg_temp), | ||||
|         [](common_params & params, const std::string & value) { params.diffusion.alg_temp = std::stof(value); } | ||||
|     ).set_examples({ LLAMA_EXAMPLE_DIFFUSION })); | ||||
|     add_opt(common_arg( | ||||
|         { "--diffusion-visual" }, | ||||
|         string_format("enable visual diffusion mode (show progressive generation) (default: %s)", | ||||
| @@ -3468,6 +3487,40 @@ common_params_context common_params_parser_init(common_params & params, llama_ex | ||||
|         [](common_params & params) { params.diffusion.visual_mode = true; } | ||||
|     ).set_examples({ LLAMA_EXAMPLE_DIFFUSION })); | ||||
|  | ||||
|     add_opt(common_arg( | ||||
|         { "--diffusion-eps" }, "F", | ||||
|         string_format("epsilon for timesteps (default: %.6f)", (double) params.diffusion.eps), | ||||
|         [](common_params & params, const std::string & value) { params.diffusion.eps = std::stof(value); } | ||||
|     ).set_examples({ LLAMA_EXAMPLE_DIFFUSION })); | ||||
|     add_opt(common_arg( | ||||
|         { "--diffusion-algorithm" }, "N", | ||||
|         string_format("diffusion algorithm: 0=ORIGIN, 1=ENTROPY_BASED, 2=MARGIN_BASED, 3=RANDOM, 4=LOW_CONFIDENCE (default: %d)", | ||||
|                       params.diffusion.algorithm), | ||||
|         [](common_params & params, int value) { params.diffusion.algorithm = value; } | ||||
|     ).set_examples({ LLAMA_EXAMPLE_DIFFUSION })); | ||||
|     add_opt(common_arg( | ||||
|         { "--diffusion-alg-temp" }, "F", | ||||
|         string_format("dream algorithm temperature (default: %.3f)", (double) params.diffusion.alg_temp), | ||||
|         [](common_params & params, const std::string & value) { params.diffusion.alg_temp = std::stof(value); } | ||||
|     ).set_examples({ LLAMA_EXAMPLE_DIFFUSION })); | ||||
|  | ||||
|     add_opt(common_arg( | ||||
|         { "--diffusion-block-length" }, "N", | ||||
|         string_format("llada block length for generation (default: %d)", params.diffusion.block_length), | ||||
|         [](common_params & params, int value) { params.diffusion.block_length = value; } | ||||
|     ).set_examples({ LLAMA_EXAMPLE_DIFFUSION })); | ||||
|     add_opt(common_arg( | ||||
|         { "--diffusion-cfg-scale" }, "F", | ||||
|         string_format("llada classifier-free guidance scale (default: %.3f)", (double) params.diffusion.cfg_scale), | ||||
|         [](common_params & params, const std::string & value) { params.diffusion.cfg_scale = std::stof(value); } | ||||
|     ).set_examples({ LLAMA_EXAMPLE_DIFFUSION })); | ||||
|     add_opt(common_arg( | ||||
|         { "--diffusion-add-gumbel-noise" }, "F", | ||||
|         string_format("add gumbel noise to the logits if temp > 0.0 (default: %s)", params.diffusion.add_gumbel_noise ? "true" : "false"), | ||||
|         [](common_params & params, const std::string & value) { params.diffusion.add_gumbel_noise = std::stof(value); } | ||||
|     ).set_examples({ LLAMA_EXAMPLE_DIFFUSION })); | ||||
|  | ||||
|  | ||||
|     add_opt( | ||||
|         common_arg({ "-lr", "--learning-rate-initial" }, "ALPHA", | ||||
|                    string_format( | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Jonathan Graehl
					Jonathan Graehl