mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	llama : shorten quantization descriptions
This commit is contained in:
		| @@ -14,103 +14,27 @@ struct quant_option { | |||||||
| }; | }; | ||||||
|  |  | ||||||
| static const std::vector<struct quant_option> QUANT_OPTIONS = { | static const std::vector<struct quant_option> QUANT_OPTIONS = { | ||||||
|     { |     { "Q4_0",   LLAMA_FTYPE_MOSTLY_Q4_0,   " 3.50G, +0.2499 ppl @ 7B", }, | ||||||
|         "Q4_0", |     { "Q4_1",   LLAMA_FTYPE_MOSTLY_Q4_1,   " 3.90G, +0.1846 ppl @ 7B", }, | ||||||
|         LLAMA_FTYPE_MOSTLY_Q4_0, |     { "Q5_0",   LLAMA_FTYPE_MOSTLY_Q5_0,   " 4.30G, +0.0796 ppl @ 7B", }, | ||||||
|         " 3.50G, +0.2499 ppl @ 7B - small, very high quality loss - legacy, prefer using Q3_K_M", |     { "Q5_1",   LLAMA_FTYPE_MOSTLY_Q5_1,   " 4.70G, +0.0415 ppl @ 7B", }, | ||||||
|     }, |  | ||||||
|     { |  | ||||||
|         "Q4_1", |  | ||||||
|         LLAMA_FTYPE_MOSTLY_Q4_1, |  | ||||||
|         " 3.90G, +0.1846 ppl @ 7B - small, substantial quality loss - legacy, prefer using Q3_K_L", |  | ||||||
|     }, |  | ||||||
|     { |  | ||||||
|         "Q5_0", |  | ||||||
|         LLAMA_FTYPE_MOSTLY_Q5_0, |  | ||||||
|         " 4.30G, +0.0796 ppl @ 7B - medium, balanced quality - legacy, prefer using Q4_K_M", |  | ||||||
|     }, |  | ||||||
|     { |  | ||||||
|         "Q5_1", |  | ||||||
|         LLAMA_FTYPE_MOSTLY_Q5_1, |  | ||||||
|         " 4.70G, +0.0415 ppl @ 7B - medium, low quality loss - legacy, prefer using Q5_K_M", |  | ||||||
|     }, |  | ||||||
| #ifdef GGML_USE_K_QUANTS | #ifdef GGML_USE_K_QUANTS | ||||||
|     { |     { "Q2_K",   LLAMA_FTYPE_MOSTLY_Q2_K,   " 2.67G, +0.8698 ppl @ 7B", }, | ||||||
|         "Q2_K", |     { "Q3_K",   LLAMA_FTYPE_MOSTLY_Q3_K_M, "alias for Q3_K_M" }, | ||||||
|         LLAMA_FTYPE_MOSTLY_Q2_K, |     { "Q3_K_S", LLAMA_FTYPE_MOSTLY_Q3_K_S, " 2.75G, +0.5505 ppl @ 7B", }, | ||||||
|         " 2.67G, +0.8698 ppl @ 7B - smallest, extreme quality loss - not recommended", |     { "Q3_K_M", LLAMA_FTYPE_MOSTLY_Q3_K_M, " 3.06G, +0.2437 ppl @ 7B", }, | ||||||
|     }, |     { "Q3_K_L", LLAMA_FTYPE_MOSTLY_Q3_K_L, " 3.35G, +0.1803 ppl @ 7B", }, | ||||||
|     { |     { "Q4_K",   LLAMA_FTYPE_MOSTLY_Q4_K_M, "alias for Q4_K_M", }, | ||||||
|         "Q3_K", |     { "Q4_K_S", LLAMA_FTYPE_MOSTLY_Q4_K_S, " 3.56G, +0.1149 ppl @ 7B", }, | ||||||
|         LLAMA_FTYPE_MOSTLY_Q3_K_M, |     { "Q4_K_M", LLAMA_FTYPE_MOSTLY_Q4_K_M, " 3.80G, +0.0535 ppl @ 7B", }, | ||||||
|         "alias for Q3_K_M" |     { "Q5_K",   LLAMA_FTYPE_MOSTLY_Q5_K_M, "alias for Q5_K_M", }, | ||||||
|     }, |     { "Q5_K_S", LLAMA_FTYPE_MOSTLY_Q5_K_S, " 4.33G, +0.0353 ppl @ 7B", }, | ||||||
|     { |     { "Q5_K_M", LLAMA_FTYPE_MOSTLY_Q5_K_M, " 4.45G, +0.0142 ppl @ 7B", }, | ||||||
|         "Q3_K_S", |     { "Q6_K",   LLAMA_FTYPE_MOSTLY_Q6_K,   " 5.15G, +0.0044 ppl @ 7B", }, | ||||||
|         LLAMA_FTYPE_MOSTLY_Q3_K_S, |  | ||||||
|         " 2.75G, +0.5505 ppl @ 7B - very small, very high quality loss", |  | ||||||
|     }, |  | ||||||
|     { |  | ||||||
|         "Q3_K_M", |  | ||||||
|         LLAMA_FTYPE_MOSTLY_Q3_K_M, |  | ||||||
|         " 3.06G, +0.2437 ppl @ 7B - very small, very high quality loss", |  | ||||||
|     }, |  | ||||||
|     { |  | ||||||
|         "Q3_K_L", |  | ||||||
|         LLAMA_FTYPE_MOSTLY_Q3_K_L, |  | ||||||
|         " 3.35G, +0.1803 ppl @ 7B - small, substantial quality loss", |  | ||||||
|     }, |  | ||||||
|     { |  | ||||||
|         "Q4_K", |  | ||||||
|         LLAMA_FTYPE_MOSTLY_Q4_K_M, |  | ||||||
|         "alias for Q4_K_M", |  | ||||||
|     }, |  | ||||||
|     { |  | ||||||
|         "Q4_K_S", |  | ||||||
|         LLAMA_FTYPE_MOSTLY_Q4_K_S, |  | ||||||
|         " 3.56G, +0.1149 ppl @ 7B - small, significant quality loss", |  | ||||||
|     }, |  | ||||||
|     { |  | ||||||
|         "Q4_K_M", |  | ||||||
|         LLAMA_FTYPE_MOSTLY_Q4_K_M, |  | ||||||
|         " 3.80G, +0.0535 ppl @ 7B - medium, balanced quality - *recommended*", |  | ||||||
|     }, |  | ||||||
|     { |  | ||||||
|         "Q5_K", |  | ||||||
|         LLAMA_FTYPE_MOSTLY_Q5_K_M, |  | ||||||
|         "alias for Q5_K_M", |  | ||||||
|     }, |  | ||||||
|     { |  | ||||||
|         "Q5_K_S", |  | ||||||
|         LLAMA_FTYPE_MOSTLY_Q5_K_S, |  | ||||||
|         " 4.33G, +0.0353 ppl @ 7B - large, low quality loss - *recommended*", |  | ||||||
|     }, |  | ||||||
|     { |  | ||||||
|         "Q5_K_M", |  | ||||||
|         LLAMA_FTYPE_MOSTLY_Q5_K_M, |  | ||||||
|         " 4.45G, +0.0142 ppl @ 7B - large, very low quality loss - *recommended*", |  | ||||||
|     }, |  | ||||||
|     { |  | ||||||
|         "Q6_K", |  | ||||||
|         LLAMA_FTYPE_MOSTLY_Q6_K, |  | ||||||
|         " 5.15G, +0.0044 ppl @ 7B - very large, extremely low quality loss", |  | ||||||
|     }, |  | ||||||
| #endif | #endif | ||||||
|     { |     { "Q8_0",   LLAMA_FTYPE_MOSTLY_Q8_0,   " 6.70G, +0.0004 ppl @ 7B", }, | ||||||
|         "Q8_0", |     { "F16",    LLAMA_FTYPE_MOSTLY_F16,    "13.00G              @ 7B", }, | ||||||
|         LLAMA_FTYPE_MOSTLY_Q8_0, |     { "F32",    LLAMA_FTYPE_ALL_F32,       "26.00G              @ 7B", }, | ||||||
|         " 6.70G, +0.0004 ppl @ 7B - very large, extremely low quality loss - not recommended", |  | ||||||
|     }, |  | ||||||
|     { |  | ||||||
|         "F16", |  | ||||||
|         LLAMA_FTYPE_MOSTLY_F16, |  | ||||||
|         "13.00G              @ 7B - extremely large, virtually no quality loss - not recommended", |  | ||||||
|     }, |  | ||||||
|     { |  | ||||||
|         "F32", |  | ||||||
|         LLAMA_FTYPE_ALL_F32, |  | ||||||
|         "26.00G              @ 7B - absolutely huge, lossless - not recommended", |  | ||||||
|     }, |  | ||||||
| }; | }; | ||||||
|  |  | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov