ggml-quants : handle imatrix for MXFP4

This commit is contained in:
Francis Couture-Harpin
2025-08-11 22:02:53 -04:00
parent be48528b06
commit d9b625edb6
3 changed files with 483 additions and 6 deletions

View File

@@ -670,8 +670,10 @@ class MXFP4(__Quant, qtype=GGMLQuantizationType.MXFP4):
d = abs(blocks).max(axis=-1, keepdims=True)
with np.errstate(divide="ignore"):
e = np.where(d > 0, np.floor(np.log2(d)) - 2 + 127, 0).astype(np.uint8)
scale = (d / np.float32(4)).view(np.uint32)
# round away from zero
scale += (scale & np.uint32(0x00400000)) << 1
e = ((scale >> 23) & np.uint32(0xFF)).astype(np.uint8)
d = cls.e8m0_to_fp32_half(e)