mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	speculative : refactor and add a simpler example (#10362)
* speculative : refactor and add a simpler example ggml-ci * speculative : clean-up and add comments and TODOs [no ci] * speculative : manage context in common_speculative ggml-ci * speculative : simplify ggml-ci * speculative : simplify (cont) ggml-ci * speculative : add --draft-min CLI arg * speculative : minor fixup * make : build fixes * speculative : do not redraft previous drafts ggml-ci * speculative : fix the draft sampling ggml-ci * speculative : fix compile warning * common : refactor args ggml-ci * common : change defaults [no ci] * common : final touches ggml-ci
This commit is contained in:
		| @@ -22,7 +22,7 @@ int main(int argc, char ** argv){ | ||||
|     common_init(); | ||||
|  | ||||
|     // max. number of additional tokens to draft if match is found | ||||
|     const int n_draft = params.n_draft; | ||||
|     const int n_draft = params.speculative.n_max; | ||||
|  | ||||
|     const bool dump_kv_cache = params.dump_kv_cache; | ||||
|  | ||||
| @@ -102,7 +102,7 @@ int main(int argc, char ** argv){ | ||||
|  | ||||
|     bool has_eos = false; | ||||
|  | ||||
|     struct common_sampler * smpl = common_sampler_init(model, params.sparams); | ||||
|     struct common_sampler * smpl = common_sampler_init(model, params.sampling); | ||||
|  | ||||
|     std::vector<llama_token> draft; | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov