mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	convert : remove unused field ModelTensorInfo.src_qtype
This commit is contained in:
		| @@ -65,8 +65,7 @@ class ModelTensorInfo: | ||||
|     load: Callable[[], Tensor] | ||||
|     size: int  # in elements | ||||
|     src_type: str | ||||
|     src_qtype: gguf.GGMLQuantizationType | None = None | ||||
|     dst_qtype: gguf.GGMLQuantizationType | None = None | ||||
|     auto_qtype: gguf.GGMLQuantizationType | None = None | ||||
|  | ||||
|  | ||||
| class ModelBase: | ||||
| @@ -139,17 +138,17 @@ class ModelBase: | ||||
|             # find out the most common type | ||||
|             hist: dict[gguf.GGMLQuantizationType, int] = {} | ||||
|             for t in self.model_tensors.values(): | ||||
|                 if t.dst_qtype is not None: | ||||
|                     if t.dst_qtype not in hist: | ||||
|                         hist[t.dst_qtype] = 0 | ||||
|                     hist[t.dst_qtype] += t.size | ||||
|                 if t.auto_qtype is not None: | ||||
|                     if t.auto_qtype not in hist: | ||||
|                         hist[t.auto_qtype] = 0 | ||||
|                     hist[t.auto_qtype] += t.size | ||||
|             max_qtype = gguf.GGMLQuantizationType.F32 | ||||
|             max_size = 0 | ||||
|             for qtype, size in hist.items(): | ||||
|                 if size > max_size: | ||||
|                     max_qtype = qtype | ||||
|                     max_size = size | ||||
|             # TODO: add more type if they're used as dst_qtypes | ||||
|             # TODO: add more type if they're used as auto_qtype | ||||
|             if max_qtype == gguf.GGMLQuantizationType.F32: | ||||
|                 self.ftype = gguf.LlamaFileType.ALL_F32 | ||||
|             elif max_qtype == gguf.GGMLQuantizationType.F16: | ||||
| @@ -200,8 +199,7 @@ class ModelBase: | ||||
|                     load=lambda r=remote_tensor: LazyTorchTensor.from_remote_tensor(r), | ||||
|                     size=math.prod(remote_tensor.shape), | ||||
|                     src_type=str(dtype), | ||||
|                     src_qtype=qtype, | ||||
|                     dst_qtype=qtype, | ||||
|                     auto_qtype=qtype, | ||||
|                 ) | ||||
|  | ||||
|             return tensors | ||||
| @@ -265,8 +263,7 @@ class ModelBase: | ||||
|                         load=data_gen, | ||||
|                         size=size, | ||||
|                         src_type=str(dtype), | ||||
|                         src_qtype=qtype, | ||||
|                         dst_qtype=qtype, | ||||
|                         auto_qtype=qtype, | ||||
|                     ) | ||||
|  | ||||
|         # verify tensor name presence and identify potentially missing files | ||||
| @@ -370,8 +367,7 @@ class ModelBase: | ||||
|                             load=lambda w=w, s=s: dequant_bitnet(w.load(), s.load()), | ||||
|                             size=w.size, | ||||
|                             src_type="bitnet", | ||||
|                             src_qtype=gguf.GGMLQuantizationType.F32, | ||||
|                             dst_qtype=gguf.GGMLQuantizationType.TQ1_0, | ||||
|                             auto_qtype=gguf.GGMLQuantizationType.TQ1_0, | ||||
|                         ) | ||||
|                         tensors_to_remove.append(name) | ||||
|             elif quant_method == "fp8": | ||||
| @@ -384,8 +380,7 @@ class ModelBase: | ||||
|                             load=lambda w=w, s=s: dequant_simple(w.load(), s.load()), | ||||
|                             size=w.size, | ||||
|                             src_type=w.src_type, | ||||
|                             src_qtype=gguf.GGMLQuantizationType.F32, | ||||
|                             dst_qtype=gguf.GGMLQuantizationType.BF16, # TODO: change to FP8 once natively supported | ||||
|                             auto_qtype=gguf.GGMLQuantizationType.BF16, # TODO: change to FP8 once natively supported | ||||
|                         ) | ||||
|                         tensors_to_remove.append(name) | ||||
|             elif quant_method == "gptq": | ||||
| @@ -403,8 +398,7 @@ class ModelBase: | ||||
|                             ), | ||||
|                             size=qweight.size,  # TODO: use more accurate value | ||||
|                             src_type=f"GPTQ-{bits}bit", | ||||
|                             src_qtype=gguf.GGMLQuantizationType.F32, | ||||
|                             dst_qtype=gguf.GGMLQuantizationType.Q8_0 if bits == 8 else gguf.GGMLQuantizationType.Q4_1, | ||||
|                             auto_qtype=gguf.GGMLQuantizationType.Q8_0 if bits == 8 else gguf.GGMLQuantizationType.Q4_1, | ||||
|                         ) | ||||
|                         tensors_to_remove += [ | ||||
|                             base_name + n | ||||
| @@ -569,7 +563,7 @@ class ModelBase: | ||||
|                 # No override (data_qtype is False), or wants to be quantized (data_qtype is True) | ||||
|                 if isinstance(data_qtype, bool): | ||||
|                     if self.ftype_guessed: | ||||
|                         data_qtype = old_qtype if tensor_info is None or tensor_info.dst_qtype is None else tensor_info.dst_qtype | ||||
|                         data_qtype = old_qtype if tensor_info is None or tensor_info.auto_qtype is None else tensor_info.auto_qtype | ||||
|                     elif self.ftype == gguf.LlamaFileType.ALL_F32: | ||||
|                         data_qtype = gguf.GGMLQuantizationType.F32 | ||||
|                     elif self.ftype == gguf.LlamaFileType.MOSTLY_F16: | ||||
| @@ -8942,7 +8936,7 @@ class LazyTorchTensor(gguf.LazyBase): | ||||
|             meta=gguf.LazyNumpyTensor.meta_with_dtype_and_shape(dtype, self.shape), | ||||
|             args=(self,), | ||||
|             func=(lambda s: s.numpy()), | ||||
|             ranges=self._ranges | ||||
|             ranges=self._ranges, | ||||
|         ) | ||||
|  | ||||
|     @classmethod | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Francis Couture-Harpin
					Francis Couture-Harpin