mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	CUDA: mul_mat_q=true llama_context_params default (#2912)
This commit is contained in:
		| @@ -5287,7 +5287,7 @@ struct llama_context_params llama_context_default_params() { | |||||||
|         /*.progress_callback           =*/ nullptr, |         /*.progress_callback           =*/ nullptr, | ||||||
|         /*.progress_callback_user_data =*/ nullptr, |         /*.progress_callback_user_data =*/ nullptr, | ||||||
|         /*.low_vram                    =*/ false, |         /*.low_vram                    =*/ false, | ||||||
|         /*.mul_mat_q                   =*/ false, |         /*.mul_mat_q                   =*/ true, | ||||||
|         /*.f16_kv                      =*/ true, |         /*.f16_kv                      =*/ true, | ||||||
|         /*.logits_all                  =*/ false, |         /*.logits_all                  =*/ false, | ||||||
|         /*.vocab_only                  =*/ false, |         /*.vocab_only                  =*/ false, | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Johannes Gäßler
					Johannes Gäßler