mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-01 09:01:57 +00:00
convert : for FP8, use scale type to decide auto type
This commit is contained in:
@@ -376,11 +376,13 @@ class ModelBase:
|
|||||||
weight_name = name.removesuffix("_scale_inv")
|
weight_name = name.removesuffix("_scale_inv")
|
||||||
w = self.model_tensors[weight_name]
|
w = self.model_tensors[weight_name]
|
||||||
s = self.model_tensors[name]
|
s = self.model_tensors[name]
|
||||||
|
# TODO: change to FP8 once natively supported
|
||||||
|
auto_qtype = s.auto_qtype if s.auto_qtype is not gguf.GGMLQuantizationType.F32 else gguf.GGMLQuantizationType.BF16
|
||||||
self.model_tensors[weight_name] = ModelTensorInfo(
|
self.model_tensors[weight_name] = ModelTensorInfo(
|
||||||
load=lambda w=w, s=s: dequant_simple(w.load(), s.load()),
|
load=lambda w=w, s=s: dequant_simple(w.load(), s.load()),
|
||||||
size=w.size,
|
size=w.size,
|
||||||
src_type=w.src_type,
|
src_type=w.src_type,
|
||||||
auto_qtype=gguf.GGMLQuantizationType.BF16, # TODO: change to FP8 once natively supported
|
auto_qtype=auto_qtype,
|
||||||
)
|
)
|
||||||
tensors_to_remove.append(name)
|
tensors_to_remove.append(name)
|
||||||
elif quant_method == "gptq":
|
elif quant_method == "gptq":
|
||||||
|
|||||||
Reference in New Issue
Block a user