mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	gguf-py : fix some metadata name extraction edge cases
* convert_lora : use the lora dir for the model card path
This commit is contained in:
		| @@ -62,6 +62,7 @@ class Model: | ||||
|     gguf_writer: gguf.GGUFWriter | ||||
|     model_name: str | None | ||||
|     metadata_override: Path | None | ||||
|     dir_model_card: Path | ||||
|  | ||||
|     # subclasses should define this! | ||||
|     model_arch: gguf.MODEL_ARCH | ||||
| @@ -90,6 +91,7 @@ class Model: | ||||
|         self.tensor_names = None | ||||
|         self.metadata_override = metadata_override | ||||
|         self.model_name = model_name | ||||
|         self.dir_model_card = dir_model  # overridden in convert_lora_to_gguf.py | ||||
|  | ||||
|         # Apply heuristics to figure out typical tensor encoding based on first layer tensor encoding type | ||||
|         if self.ftype == gguf.LlamaFileType.GUESSED: | ||||
| @@ -345,7 +347,7 @@ class Model: | ||||
|  | ||||
|         total_params, shared_params, expert_params, expert_count = self.gguf_writer.get_total_parameter_count() | ||||
|  | ||||
|         self.metadata = gguf.Metadata.load(self.metadata_override, self.dir_model, self.model_name, total_params) | ||||
|         self.metadata = gguf.Metadata.load(self.metadata_override, self.dir_model, self.model_name, self.dir_model_card, total_params) | ||||
|  | ||||
|         # Fallback to model directory name if metadata name is still missing | ||||
|         if self.metadata.name is None: | ||||
|   | ||||
| @@ -304,12 +304,6 @@ if __name__ == '__main__': | ||||
|     # load base model | ||||
|     logger.info(f"Loading base model: {dir_base_model.name}") | ||||
|     hparams = Model.load_hparams(dir_base_model) | ||||
|  | ||||
|     with open(lora_config, "r") as f: | ||||
|         lparams: dict[str, Any] = json.load(f) | ||||
|  | ||||
|     alpha: float = lparams["lora_alpha"] | ||||
|  | ||||
|     with torch.inference_mode(): | ||||
|         try: | ||||
|             model_class = Model.from_model_architecture(hparams["architectures"][0]) | ||||
| @@ -320,12 +314,21 @@ if __name__ == '__main__': | ||||
|         class LoraModel(model_class): | ||||
|             model_arch = model_class.model_arch | ||||
|  | ||||
|             lora_alpha: float | ||||
|  | ||||
|             def __init__(self, *args, dir_lora_model: Path, lora_alpha: float, **kwargs): | ||||
|  | ||||
|                 super().__init__(*args, **kwargs) | ||||
|  | ||||
|                 self.dir_model_card = dir_lora_model | ||||
|                 self.lora_alpha = float(lora_alpha) | ||||
|  | ||||
|             def set_type(self): | ||||
|                 self.gguf_writer.add_type(gguf.GGUFType.ADAPTER) | ||||
|                 self.gguf_writer.add_string(gguf.Keys.Adapter.TYPE, "lora") | ||||
|  | ||||
|             def set_gguf_parameters(self): | ||||
|                 self.gguf_writer.add_float32(gguf.Keys.Adapter.LORA_ALPHA, float(alpha)) | ||||
|                 self.gguf_writer.add_float32(gguf.Keys.Adapter.LORA_ALPHA, self.lora_alpha) | ||||
|                 super().set_gguf_parameters() | ||||
|  | ||||
|             def get_tensors(self) -> Iterator[tuple[str, Tensor]]: | ||||
| @@ -368,6 +371,11 @@ if __name__ == '__main__': | ||||
|                     yield (dest_name + ".lora_a", lora_a) | ||||
|                     yield (dest_name + ".lora_b", lora_b) | ||||
|  | ||||
|         with open(lora_config, "r") as f: | ||||
|             lparams: dict[str, Any] = json.load(f) | ||||
|  | ||||
|         alpha: float = lparams["lora_alpha"] | ||||
|  | ||||
|         model_instance = LoraModel( | ||||
|             dir_base_model, | ||||
|             ftype, | ||||
| @@ -376,6 +384,8 @@ if __name__ == '__main__': | ||||
|             use_temp_file=False, | ||||
|             eager=args.no_lazy, | ||||
|             dry_run=args.dry_run, | ||||
|             dir_lora_model=dir_lora, | ||||
|             lora_alpha=alpha, | ||||
|         ) | ||||
|  | ||||
|         logger.info("Exporting model...") | ||||
|   | ||||
| @@ -44,7 +44,7 @@ class Metadata: | ||||
|     datasets: Optional[list[str]] = None | ||||
|  | ||||
|     @staticmethod | ||||
|     def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Path] = None, model_name: Optional[str] = None, total_params: int = 0) -> Metadata: | ||||
|     def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Path] = None, model_name: Optional[str] = None, model_card_path: Optional[Path] = None, total_params: int = 0) -> Metadata: | ||||
|         # This grabs as many contextual authorship metadata as possible from the model repository | ||||
|         # making any conversion as required to match the gguf kv store metadata format | ||||
|         # as well as giving users the ability to override any authorship metadata that may be incorrect | ||||
| @@ -52,11 +52,14 @@ class Metadata: | ||||
|         # Create a new Metadata instance | ||||
|         metadata = Metadata() | ||||
|  | ||||
|         model_card = Metadata.load_model_card(model_path) | ||||
|         if model_card_path is None: | ||||
|             model_card_path = model_path | ||||
|  | ||||
|         model_card = Metadata.load_model_card(model_card_path) | ||||
|         hf_params = Metadata.load_hf_parameters(model_path) | ||||
|  | ||||
|         # heuristics | ||||
|         metadata = Metadata.apply_metadata_heuristic(metadata, model_card, hf_params, model_path, total_params) | ||||
|         metadata = Metadata.apply_metadata_heuristic(metadata, model_card, hf_params, model_card_path, total_params) | ||||
|  | ||||
|         # Metadata Override File Provided | ||||
|         # This is based on LLM_KV_NAMES mapping in llama.cpp | ||||
| @@ -177,6 +180,12 @@ class Metadata: | ||||
|             org_component = None | ||||
|  | ||||
|         name_parts: list[str] = model_full_name_component.split('-') | ||||
|  | ||||
|         # Remove empty parts | ||||
|         for i in reversed(range(len(name_parts))): | ||||
|             if len(name_parts[i]) == 0: | ||||
|                 del name_parts[i] | ||||
|  | ||||
|         name_types: list[ | ||||
|             set[Literal["basename", "size_label", "finetune", "version", "type"]] | ||||
|         ] = [set() for _ in name_parts] | ||||
| @@ -227,6 +236,13 @@ class Metadata: | ||||
|                 if part.lower() == "lora": | ||||
|                     name_parts[i] = "LoRA" | ||||
|  | ||||
|         # Ignore word-based size labels when there is at least a number-based one present | ||||
|         if any(c.isdecimal() for n, t in zip(name_parts, name_types) if "size_label" in t for c in n): | ||||
|             for n, t in zip(name_parts, name_types): | ||||
|                 if "size_label" in t: | ||||
|                     if all(c.isalpha() for c in n): | ||||
|                         t.remove("size_label") | ||||
|  | ||||
|         at_start = True | ||||
|         # Find the basename through the annotated name | ||||
|         for part, t in zip(name_parts, name_types): | ||||
| @@ -247,7 +263,8 @@ class Metadata: | ||||
|                 break | ||||
|  | ||||
|         basename = "-".join(n for n, t in zip(name_parts, name_types) if "basename" in t) or None | ||||
|         size_label = "-".join(s for s, t in zip(name_parts, name_types) if "size_label" in t) or None | ||||
|         # Deduplicate size labels using order-preserving 'dict' ('set' seems to sort the keys) | ||||
|         size_label = "-".join(dict.fromkeys(s for s, t in zip(name_parts, name_types) if "size_label" in t).keys()) or None | ||||
|         finetune = "-".join(f for f, t in zip(name_parts, name_types) if "finetune" in t) or None | ||||
|         # TODO: should the basename version always be excluded? | ||||
|         # TODO: should multiple versions be joined together? | ||||
|   | ||||
| @@ -54,7 +54,7 @@ class TestMetadataMethod(unittest.TestCase): | ||||
|         self.assertEqual(gguf.Metadata.get_model_id_components("NousResearch/Meta-Llama-3-8B"), | ||||
|                          ('Meta-Llama-3-8B', "NousResearch", 'Meta-Llama-3', None, None, '8B')) | ||||
|  | ||||
|         # Can't detect all non standard form in a heuristically safe way... best to err in caution and output nothing... | ||||
|         # Non standard naming | ||||
|         self.assertEqual(gguf.Metadata.get_model_id_components("Qwen1.5-MoE-A2.7B-Chat"), | ||||
|                          ('Qwen1.5-MoE-A2.7B-Chat', None, 'Qwen1.5-MoE', 'Chat', None, 'A2.7B')) | ||||
|  | ||||
| @@ -71,7 +71,7 @@ class TestMetadataMethod(unittest.TestCase): | ||||
|         self.assertEqual(gguf.Metadata.get_model_id_components("delphi-suite/stories-llama2-50k", 50 * 10**3), | ||||
|                          ('stories-llama2-50k', 'delphi-suite', 'stories-llama2', None, None, '50K')) | ||||
|  | ||||
|         # None standard and not easy to disambiguate | ||||
|         # Non standard and not easy to disambiguate | ||||
|         self.assertEqual(gguf.Metadata.get_model_id_components("DeepSeek-Coder-V2-Lite-Instruct"), | ||||
|                          ('DeepSeek-Coder-V2-Lite-Instruct', None, 'DeepSeek-Coder-V2-Lite', 'Instruct', None, None)) | ||||
|  | ||||
| @@ -123,6 +123,20 @@ class TestMetadataMethod(unittest.TestCase): | ||||
|         self.assertEqual(gguf.Metadata.get_model_id_components("bigscience/bloom-7b1-petals"), | ||||
|                          ('bloom-7b1-petals', 'bigscience', 'bloom', 'petals', None, '7.1B')) | ||||
|  | ||||
|         # Ignore full-text size labels when there are number-based ones, and deduplicate size labels | ||||
|         self.assertEqual(gguf.Metadata.get_model_id_components("MaziyarPanahi/GreenNode-mini-7B-multilingual-v1olet-Mistral-7B-Instruct-v0.1"), | ||||
|                          ('GreenNode-mini-7B-multilingual-v1olet-Mistral-7B-Instruct-v0.1', 'MaziyarPanahi', 'GreenNode-mini', 'multilingual-v1olet-Mistral-Instruct', 'v0.1', '7B')) | ||||
|  | ||||
|         # Version at the end with a long basename | ||||
|         self.assertEqual(gguf.Metadata.get_model_id_components("mistralai/Mistral-Nemo-Base-2407"), | ||||
|                          ('Mistral-Nemo-Base-2407', 'mistralai', 'Mistral-Nemo-Base', None, '2407', None)) | ||||
|  | ||||
|         ## Invalid cases ## | ||||
|  | ||||
|         # Start with a dash and has dashes in rows | ||||
|         self.assertEqual(gguf.Metadata.get_model_id_components("mistralai/-Mistral--Nemo-Base-2407-"), | ||||
|                          ('-Mistral--Nemo-Base-2407-', 'mistralai', 'Mistral-Nemo-Base', None, '2407', None)) | ||||
|  | ||||
|     def test_apply_metadata_heuristic_from_model_card(self): | ||||
|         model_card = { | ||||
|             'tags': ['Llama-3', 'instruct', 'finetune', 'chatml', 'DPO', 'RLHF', 'gpt4', 'synthetic data', 'distillation', 'function calling', 'json mode', 'axolotl'], | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Francis Couture-Harpin
					Francis Couture-Harpin