mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-27 08:21:30 +00:00
convert : avoid dequantizing mxfp4 for GPT-OSS (#16756)
This commit is contained in:
@@ -8943,6 +8943,13 @@ class SmolLM3Model(LlamaModel):
|
||||
class GptOssModel(TextModel):
|
||||
model_arch = gguf.MODEL_ARCH.GPT_OSS
|
||||
|
||||
# TODO: remove once MXFP4 is supported more generally
|
||||
def dequant_model(self):
|
||||
quant_config = self.hparams.get("quantization_config")
|
||||
if quant_config is not None and quant_config.get("quant_method") == "mxfp4":
|
||||
return
|
||||
return super().dequant_model()
|
||||
|
||||
def transform_nibble_layout(self, tensor):
|
||||
assert tensor.dtype == torch.uint8
|
||||
assert tensor.shape[-1] == 16
|
||||
|
||||
Reference in New Issue
Block a user