mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	bitnet : replace 1.58b with b1.58, as in the paper
This commit is contained in:
		| @@ -300,7 +300,7 @@ class Model: | ||||
|  | ||||
|                 if self.ftype != gguf.LlamaFileType.ALL_F32 and extra_f16 and not extra_f32: | ||||
|                     # TODO: cleaner model-specific per-tensor types | ||||
|                     # NOTE: Q1_3 is only relevant for BitNet 1.58b | ||||
|                     # NOTE: Q1_3 is only relevant for BitNet b1.58 | ||||
|                     if ( | ||||
|                         self.ftype == gguf.LlamaFileType.MOSTLY_Q1_3 | ||||
|                         and gguf.can_quantize_to_q1_3(data) | ||||
|   | ||||
| @@ -26,8 +26,8 @@ static const std::vector<struct quant_option> QUANT_OPTIONS = { | ||||
|     { "IQ2_M",  LLAMA_FTYPE_MOSTLY_IQ2_M,  " 2.7  bpw quantization",            }, | ||||
|     { "IQ1_S",  LLAMA_FTYPE_MOSTLY_IQ1_S,  " 1.56 bpw quantization",            }, | ||||
|     { "IQ1_M",  LLAMA_FTYPE_MOSTLY_IQ1_M,  " 1.75 bpw quantization",            }, | ||||
|     { "Q1_3",   LLAMA_FTYPE_MOSTLY_Q1_3,   " 1.63 bpw for BitNet 1.58b",        }, | ||||
|     { "Q2_2",   LLAMA_FTYPE_MOSTLY_Q2_2,   " 2.00 bpw for BitNet 1.58b",        }, | ||||
|     { "Q1_3",   LLAMA_FTYPE_MOSTLY_Q1_3,   " 1.63 bpw for BitNet b1.58",        }, | ||||
|     { "Q2_2",   LLAMA_FTYPE_MOSTLY_Q2_2,   " 2.00 bpw for BitNet b1.58",        }, | ||||
|     { "Q2_K",   LLAMA_FTYPE_MOSTLY_Q2_K,   " 2.96G, +3.5199 ppl @ Llama-3-8B",  }, | ||||
|     { "Q2_K_S", LLAMA_FTYPE_MOSTLY_Q2_K_S, " 2.96G, +3.1836 ppl @ Llama-3-8B",  }, | ||||
|     { "IQ3_XXS",LLAMA_FTYPE_MOSTLY_IQ3_XXS," 3.06 bpw quantization",            }, | ||||
|   | ||||
| @@ -137,7 +137,7 @@ typedef sycl::half2 ggml_half2; | ||||
|  | ||||
| #endif // GGML_COMMON_DECL_CUDA || GGML_COMMON_DECL_HIP | ||||
|  | ||||
| // 1.625 bpw for BitNet 1.58b models | ||||
| // 1.625 bpw for BitNet b1.58 models | ||||
| #define QK1_3 64 | ||||
| typedef struct { | ||||
|     uint8_t q[(QK1_3 - 4*QK1_3/64)/5]; // 5 elements per byte (3^5 = 243 < 256) | ||||
|   | ||||
| @@ -3366,7 +3366,7 @@ size_t quantize_q2_2(const float * restrict src, void * restrict dst, int64_t nr | ||||
|     return nrow * row_size; | ||||
| } | ||||
|  | ||||
| // ====================== 1.625 bpw (de)-quantization (BitNet 1.58b) | ||||
| // ====================== 1.625 bpw (de)-quantization (BitNet b1.58) | ||||
|  | ||||
| void quantize_row_q1_3_reference(const float * restrict x, block_q1_3 * restrict y, int64_t k) { | ||||
|     assert(k % QK1_3 == 0); | ||||
|   | ||||
| @@ -4186,8 +4186,8 @@ static std::string llama_model_ftype_name(llama_ftype ftype) { | ||||
|         case LLAMA_FTYPE_ALL_F32:     return "all F32"; | ||||
|         case LLAMA_FTYPE_MOSTLY_F16:  return "F16"; | ||||
|         case LLAMA_FTYPE_MOSTLY_BF16: return "BF16"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q1_3: return "Q1_3 - 1.625 bpw for BitNet 1.58b"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q2_2: return "Q2_2 - 2.000 bpw for BitNet 1.58b"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q1_3: return "Q1_3 - 1.625 bpw for BitNet b1.58"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q2_2: return "Q2_2 - 2.000 bpw for BitNet b1.58"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q4_0: return "Q4_0"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q4_1: return "Q4_1"; | ||||
|         case LLAMA_FTYPE_MOSTLY_Q4_1_SOME_F16: | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Francis Couture-Harpin
					Francis Couture-Harpin