mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	common : add llama.vim preset for Qwen2.5 Coder (#11945)
This commit adds a preset for llama.vim to use the default Qwen 2.5 Coder models. The motivation for this change is to make it easier to start a server suitable to be used with the llama.vim plugin. For example, the server can be started with a command like the following: ```console $ llama.vim --fim-qwen-1.5b-default ``` Refs: https://github.com/ggml-org/llama.cpp/issues/10932
This commit is contained in:
		| @@ -2502,5 +2502,53 @@ common_params_context common_params_parser_init(common_params & params, llama_ex | |||||||
|         } |         } | ||||||
|     ).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER})); |     ).set_examples({LLAMA_EXAMPLE_EMBEDDING, LLAMA_EXAMPLE_SERVER})); | ||||||
|  |  | ||||||
|  |     add_opt(common_arg( | ||||||
|  |         {"--fim-qwen-1.5b-default"}, | ||||||
|  |         string_format("use default Qwen 2.5 Coder 1.5B (note: can download weights from the internet)"), | ||||||
|  |         [](common_params & params) { | ||||||
|  |             params.hf_repo = "ggml-org/Qwen2.5-Coder-1.5B-Q8_0-GGUF"; | ||||||
|  |             params.hf_file = "qwen2.5-coder-1.5b-q8_0.gguf"; | ||||||
|  |             params.port = 8012; | ||||||
|  |             params.n_gpu_layers = 99; | ||||||
|  |             params.flash_attn = true; | ||||||
|  |             params.n_ubatch = 1024; | ||||||
|  |             params.n_batch = 1024; | ||||||
|  |             params.n_ctx = 0; | ||||||
|  |             params.n_cache_reuse = 256; | ||||||
|  |         } | ||||||
|  |     ).set_examples({LLAMA_EXAMPLE_SERVER})); | ||||||
|  |  | ||||||
|  |     add_opt(common_arg( | ||||||
|  |         {"--fim-qwen-3b-default"}, | ||||||
|  |         string_format("use default Qwen 2.5 Coder 3B (note: can download weights from the internet)"), | ||||||
|  |         [](common_params & params) { | ||||||
|  |             params.hf_repo = "ggml-org/Qwen2.5-Coder-3B-Q8_0-GGUF"; | ||||||
|  |             params.hf_file = "qwen2.5-coder-3b-q8_0.gguf"; | ||||||
|  |             params.port = 8012; | ||||||
|  |             params.n_gpu_layers = 99; | ||||||
|  |             params.flash_attn = true; | ||||||
|  |             params.n_ubatch = 1024; | ||||||
|  |             params.n_batch = 1024; | ||||||
|  |             params.n_ctx = 0; | ||||||
|  |             params.n_cache_reuse = 256; | ||||||
|  |         } | ||||||
|  |     ).set_examples({LLAMA_EXAMPLE_SERVER})); | ||||||
|  |  | ||||||
|  |     add_opt(common_arg( | ||||||
|  |         {"--fim-qwen-7b-default"}, | ||||||
|  |         string_format("use default Qwen 2.5 Coder 7B (note: can download weights from the internet)"), | ||||||
|  |         [](common_params & params) { | ||||||
|  |             params.hf_repo = "ggml-org/Qwen2.5-Coder-7B-Q8_0-GGUF"; | ||||||
|  |             params.hf_file = "qwen2.5-coder-7b-q8_0.gguf"; | ||||||
|  |             params.port = 8012; | ||||||
|  |             params.n_gpu_layers = 99; | ||||||
|  |             params.flash_attn = true; | ||||||
|  |             params.n_ubatch = 1024; | ||||||
|  |             params.n_batch = 1024; | ||||||
|  |             params.n_ctx = 0; | ||||||
|  |             params.n_cache_reuse = 256; | ||||||
|  |         } | ||||||
|  |     ).set_examples({LLAMA_EXAMPLE_SERVER})); | ||||||
|  |  | ||||||
|     return ctx_arg; |     return ctx_arg; | ||||||
| } | } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Daniel Bevenius
					Daniel Bevenius