mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	common : add -hfd option for the draft model (#11318)
* common : add -hfd option for the draft model * cont : fix env var * cont : more fixes
This commit is contained in:
		| @@ -1728,13 +1728,16 @@ struct server_context { | ||||
|         add_bos_token = llama_vocab_get_add_bos(vocab); | ||||
|         has_eos_token = llama_vocab_eos(vocab) != LLAMA_TOKEN_NULL; | ||||
|  | ||||
|         if (!params_base.speculative.model.empty()) { | ||||
|         if (!params_base.speculative.model.empty() || !params_base.speculative.hf_repo.empty()) { | ||||
|             SRV_INF("loading draft model '%s'\n", params_base.speculative.model.c_str()); | ||||
|  | ||||
|             auto params_dft = params_base; | ||||
|  | ||||
|             params_dft.devices      = params_base.speculative.devices; | ||||
|             params_dft.hf_file      = params_base.speculative.hf_file; | ||||
|             params_dft.hf_repo      = params_base.speculative.hf_repo; | ||||
|             params_dft.model        = params_base.speculative.model; | ||||
|             params_dft.model_url    = params_base.speculative.model_url; | ||||
|             params_dft.n_ctx        = params_base.speculative.n_ctx == 0 ? params_base.n_ctx / params_base.n_parallel : params_base.speculative.n_ctx; | ||||
|             params_dft.n_gpu_layers = params_base.speculative.n_gpu_layers; | ||||
|             params_dft.n_parallel   = 1; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov