mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	gguf : deduplicate (#2629)
* gguf : better type names * dedup : CPU + Metal is working * ggml : fix warnings about unused results * llama.cpp : fix line feed and compiler warning * llama : fix strncpy warning + note token_to_str does not write null * llama : restore the original load/save session implementation Will migrate this to GGUF in the future * convert-llama-h5-to-gguf.py : support alt ctx param name * ggml : assert when using ggml_mul with non-F32 src1 * examples : dedup simple --------- Co-authored-by: klosax <131523366+klosax@users.noreply.github.com>
This commit is contained in:
		| @@ -170,18 +170,6 @@ bool gpt_params_parse(int argc, char ** argv, gpt_params & params) { | ||||
|                 break; | ||||
|             } | ||||
|             params.n_ctx = std::stoi(argv[i]); | ||||
|         } else if (arg == "-gqa" || arg == "--gqa") { | ||||
|             if (++i >= argc) { | ||||
|                 invalid_param = true; | ||||
|                 break; | ||||
|             } | ||||
|             params.n_gqa = std::stoi(argv[i]); | ||||
|         } else if (arg == "-eps" || arg == "--rms-norm-eps") { | ||||
|             if (++i >= argc) { | ||||
|                 invalid_param = true; | ||||
|                 break; | ||||
|             } | ||||
|             params.rms_norm_eps = std::stof(argv[i]); | ||||
|         } else if (arg == "--rope-freq-base") { | ||||
|             if (++i >= argc) { | ||||
|                 invalid_param = true; | ||||
| @@ -546,8 +534,6 @@ void gpt_print_usage(int /*argc*/, char ** argv, const gpt_params & params) { | ||||
|     fprintf(stdout, "  -n N, --n-predict N   number of tokens to predict (default: %d, -1 = infinity, -2 = until context filled)\n", params.n_predict); | ||||
|     fprintf(stdout, "  -c N, --ctx-size N    size of the prompt context (default: %d)\n", params.n_ctx); | ||||
|     fprintf(stdout, "  -b N, --batch-size N  batch size for prompt processing (default: %d)\n", params.n_batch); | ||||
|     fprintf(stdout, "  -gqa N, --gqa N       grouped-query attention factor (TEMP!!! use 8 for LLaMAv2 70B) (default: %d)\n", params.n_gqa); | ||||
|     fprintf(stdout, "  -eps N, --rms-norm-eps N rms norm eps (TEMP!!! use 1e-5 for LLaMAv2) (default: %.1e)\n", params.rms_norm_eps); | ||||
|     fprintf(stdout, "  --top-k N             top-k sampling (default: %d, 0 = disabled)\n", params.top_k); | ||||
|     fprintf(stdout, "  --top-p N             top-p sampling (default: %.1f, 1.0 = disabled)\n", (double)params.top_p); | ||||
|     fprintf(stdout, "  --tfs N               tail free sampling, parameter z (default: %.1f, 1.0 = disabled)\n", (double)params.tfs_z); | ||||
| @@ -638,8 +624,6 @@ struct llama_context_params llama_context_params_from_gpt_params(const gpt_param | ||||
|  | ||||
|     lparams.n_ctx           = params.n_ctx; | ||||
|     lparams.n_batch         = params.n_batch; | ||||
|     lparams.n_gqa           = params.n_gqa; | ||||
|     lparams.rms_norm_eps    = params.rms_norm_eps; | ||||
|     lparams.n_gpu_layers    = params.n_gpu_layers; | ||||
|     lparams.main_gpu        = params.main_gpu; | ||||
|     lparams.tensor_split    = params.tensor_split; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov