mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-27 08:21:30 +00:00
ggml-quants : handle zero amax for MXFP4
This commit is contained in:
@@ -288,7 +288,7 @@ void quantize_row_mxfp4_ref(const float * GGML_RESTRICT x, block_mxfp4 * GGML_RE
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
const uint8_t e = (uint8_t) (floorf(log2f(amax)) - 2 + 127);
|
const uint8_t e = amax > 0.0f ? (uint8_t) (floorf(log2f(amax)) - 2 + 127) : 0;
|
||||||
|
|
||||||
const float d = GGML_E8M0_TO_FP32_HALF(e);
|
const float d = GGML_E8M0_TO_FP32_HALF(e);
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user