mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-02 09:12:03 +00:00
ggml-quants : 1.625 bpw ternary packing for BitNet 1.58b
This commit is contained in:
@@ -1023,6 +1023,8 @@ class GGMLQuantizationType(IntEnum):
|
||||
F64 = 28
|
||||
IQ1_M = 29
|
||||
BF16 = 30
|
||||
Q2_2 = 31
|
||||
Q1_3 = 32
|
||||
|
||||
|
||||
# TODO: add GGMLFileType from ggml_ftype in ggml.h
|
||||
@@ -1064,6 +1066,8 @@ class LlamaFileType(IntEnum):
|
||||
MOSTLY_IQ4_XS = 30 # except 1d tensors
|
||||
MOSTLY_IQ1_M = 31 # except 1d tensors
|
||||
MOSTLY_BF16 = 32 # except 1d tensors
|
||||
MOSTLY_Q2_2 = 33 # except 1d tensors
|
||||
MOSTLY_Q1_3 = 34 # except 1d tensors
|
||||
|
||||
GUESSED = 1024 # not specified in the model file
|
||||
|
||||
@@ -1137,6 +1141,8 @@ GGML_QUANT_SIZES: dict[GGMLQuantizationType, tuple[int, int]] = {
|
||||
GGMLQuantizationType.F64: (1, 8),
|
||||
GGMLQuantizationType.IQ1_M: (256, QK_K // 8 + QK_K // 16 + QK_K // 32),
|
||||
GGMLQuantizationType.BF16: (1, 2),
|
||||
GGMLQuantizationType.Q2_2: (32, 8),
|
||||
GGMLQuantizationType.Q1_3: (64, 12 + 1),
|
||||
}
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user