mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	convert_lora : fix default filename
The default filename was previously hardcoded. * convert_hf : Model.fname_out can no longer be None
This commit is contained in:
		| @@ -48,7 +48,7 @@ class Model: | ||||
|  | ||||
|     dir_model: Path | ||||
|     ftype: gguf.LlamaFileType | ||||
|     fname_out: Path | None | ||||
|     fname_out: Path | ||||
|     is_big_endian: bool | ||||
|     endianess: gguf.GGUFEndian | ||||
|     use_temp_file: bool | ||||
| @@ -67,7 +67,7 @@ class Model: | ||||
|     # subclasses should define this! | ||||
|     model_arch: gguf.MODEL_ARCH | ||||
|  | ||||
|     def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path | None, is_big_endian: bool = False, | ||||
|     def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool = False, | ||||
|                  use_temp_file: bool = False, eager: bool = False, | ||||
|                  metadata_override: Path | None = None, model_name: str | None = None, | ||||
|                  split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False, small_first_shard: bool = False): | ||||
| @@ -347,7 +347,7 @@ class Model: | ||||
|  | ||||
|         total_params, shared_params, expert_params, expert_count = self.gguf_writer.get_total_parameter_count() | ||||
|  | ||||
|         self.metadata = gguf.Metadata.load(self.metadata_override, self.dir_model, self.model_name, self.dir_model_card, total_params) | ||||
|         self.metadata = gguf.Metadata.load(self.metadata_override, self.dir_model_card, self.model_name, total_params) | ||||
|  | ||||
|         # Fallback to model directory name if metadata name is still missing | ||||
|         if self.metadata.name is None: | ||||
| @@ -361,27 +361,22 @@ class Model: | ||||
|         output_type: str = self.ftype.name.partition("_")[2] | ||||
|  | ||||
|         # Filename Output | ||||
|         # Note: `not is_dir()` is used because `.is_file()` will not detect | ||||
|         #       file template strings as it doesn't actually exist as a file | ||||
|         if self.fname_out is not None and not self.fname_out.is_dir(): | ||||
|             # Output path is a custom defined templated filename | ||||
|  | ||||
|             # Process templated file name with the output ftype, useful with the "auto" ftype | ||||
|             self.fname_out = self.fname_out.parent / gguf.fill_templated_filename(self.fname_out.name, output_type) | ||||
|         else: | ||||
|         if self.fname_out.is_dir(): | ||||
|             # Generate default filename based on model specification and available metadata | ||||
|             if not vocab_only: | ||||
|                 fname_default: str = gguf.naming_convention(self.metadata.name, self.metadata.basename, self.metadata.finetune, self.metadata.version, self.metadata.size_label, output_type, model_type="LoRA" if total_params < 0 else None) | ||||
|             else: | ||||
|                 fname_default: str = gguf.naming_convention(self.metadata.name, self.metadata.basename, self.metadata.finetune, self.metadata.version, size_label=None, output_type=None, model_type="vocab") | ||||
|  | ||||
|             # Check if preferred output directory path was provided | ||||
|             if self.fname_out is not None and self.fname_out.is_dir(): | ||||
|                 # output path is a directory | ||||
|                 self.fname_out = self.fname_out / f"{fname_default}.gguf" | ||||
|             else: | ||||
|                 # output in the same directory as the model by default | ||||
|                 self.fname_out = self.dir_model / f"{fname_default}.gguf" | ||||
|             # Use the default filename | ||||
|             self.fname_out = self.fname_out / f"{fname_default}.gguf" | ||||
|         else: | ||||
|             # Output path is a custom defined templated filename | ||||
|             # Note: `not is_dir()` is used because `.is_file()` will not detect | ||||
|             #       file template strings as it doesn't actually exist as a file | ||||
|  | ||||
|             # Process templated file name with the output ftype, useful with the "auto" ftype | ||||
|             self.fname_out = self.fname_out.parent / gguf.fill_templated_filename(self.fname_out.name, output_type) | ||||
|  | ||||
|         self.set_type() | ||||
|  | ||||
| @@ -3626,10 +3621,10 @@ def main() -> None: | ||||
|         logger.error("Error: Cannot use temp file when splitting") | ||||
|         sys.exit(1) | ||||
|  | ||||
|     fname_out = None | ||||
|  | ||||
|     if args.outfile is not None: | ||||
|         fname_out = args.outfile | ||||
|     else: | ||||
|         fname_out = dir_model | ||||
|  | ||||
|     logger.info(f"Loading model: {dir_model.name}") | ||||
|  | ||||
| @@ -3660,7 +3655,6 @@ def main() -> None: | ||||
|         else: | ||||
|             logger.info("Exporting model...") | ||||
|             model_instance.write() | ||||
|             assert model_instance.fname_out is not None | ||||
|             out_path = f"{model_instance.fname_out.parent}{os.sep}" if is_split else model_instance.fname_out | ||||
|             logger.info(f"Model successfully exported to {out_path}") | ||||
|  | ||||
|   | ||||
| @@ -290,7 +290,7 @@ if __name__ == '__main__': | ||||
|         fname_out = args.outfile | ||||
|     else: | ||||
|         # output in the same directory as the model by default | ||||
|         fname_out = dir_lora / 'ggml-lora-{ftype}.gguf' | ||||
|         fname_out = dir_lora | ||||
|  | ||||
|     if os.path.exists(input_model): | ||||
|         # lazy import load_file only if lora is in safetensors format. | ||||
|   | ||||
| @@ -44,7 +44,7 @@ class Metadata: | ||||
|     datasets: Optional[list[str]] = None | ||||
|  | ||||
|     @staticmethod | ||||
|     def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Path] = None, model_name: Optional[str] = None, model_card_path: Optional[Path] = None, total_params: int = 0) -> Metadata: | ||||
|     def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Path] = None, model_name: Optional[str] = None, total_params: int = 0) -> Metadata: | ||||
|         # This grabs as many contextual authorship metadata as possible from the model repository | ||||
|         # making any conversion as required to match the gguf kv store metadata format | ||||
|         # as well as giving users the ability to override any authorship metadata that may be incorrect | ||||
| @@ -52,14 +52,12 @@ class Metadata: | ||||
|         # Create a new Metadata instance | ||||
|         metadata = Metadata() | ||||
|  | ||||
|         if model_card_path is None: | ||||
|             model_card_path = model_path | ||||
|  | ||||
|         model_card = Metadata.load_model_card(model_card_path) | ||||
|         model_card = Metadata.load_model_card(model_path) | ||||
|         hf_params = Metadata.load_hf_parameters(model_path) | ||||
|         # TODO: load adapter_config.json when possible, it usually contains the base model of the LoRA adapter | ||||
|  | ||||
|         # heuristics | ||||
|         metadata = Metadata.apply_metadata_heuristic(metadata, model_card, hf_params, model_card_path, total_params) | ||||
|         metadata = Metadata.apply_metadata_heuristic(metadata, model_card, hf_params, model_path, total_params) | ||||
|  | ||||
|         # Metadata Override File Provided | ||||
|         # This is based on LLM_KV_NAMES mapping in llama.cpp | ||||
| @@ -232,11 +230,14 @@ class Metadata: | ||||
|                 name_parts[i] = part | ||||
|             # Some easy to recognize finetune names | ||||
|             elif i > 0 and re.fullmatch(r'chat|instruct|vision|lora', part, re.IGNORECASE): | ||||
|                 name_types[i].add("finetune") | ||||
|                 if part.lower() == "lora": | ||||
|                     name_parts[i] = "LoRA" | ||||
|                 if total_params < 0 and part.lower() == "lora": | ||||
|                     # ignore redundant "lora" in the finetune part when the output is a lora adapter | ||||
|                     name_types[i].add("type") | ||||
|                 else: | ||||
|                     name_types[i].add("finetune") | ||||
|  | ||||
|         # Ignore word-based size labels when there is at least a number-based one present | ||||
|         # TODO: should word-based size labels always be removed instead? | ||||
|         if any(c.isdecimal() for n, t in zip(name_parts, name_types) if "size_label" in t for c in n): | ||||
|             for n, t in zip(name_parts, name_types): | ||||
|                 if "size_label" in t: | ||||
|   | ||||
| @@ -159,6 +159,15 @@ class TestMetadataMethod(unittest.TestCase): | ||||
|         self.assertEqual(gguf.Metadata.get_model_id_components("mistralai/-Mistral--Nemo-Base-2407-"), | ||||
|                          ('-Mistral--Nemo-Base-2407-', 'mistralai', 'Mistral-Nemo-Base', None, '2407', None)) | ||||
|  | ||||
|         ## LoRA ## | ||||
|  | ||||
|         self.assertEqual(gguf.Metadata.get_model_id_components("Llama-3-Instruct-abliteration-LoRA-8B"), | ||||
|                          ('Llama-3-Instruct-abliteration-LoRA-8B', None, 'Llama-3', 'Instruct-abliteration-LoRA', None, '8B')) | ||||
|  | ||||
|         # Negative size --> output is a LoRA adaper --> prune "LoRA" out of the name to avoid redundancy with the suffix | ||||
|         self.assertEqual(gguf.Metadata.get_model_id_components("Llama-3-Instruct-abliteration-LoRA-8B", -1234), | ||||
|                          ('Llama-3-Instruct-abliteration-LoRA-8B', None, 'Llama-3', 'Instruct-abliteration', None, '8B')) | ||||
|  | ||||
|     def test_apply_metadata_heuristic_from_model_card(self): | ||||
|         model_card = { | ||||
|             'tags': ['Llama-3', 'instruct', 'finetune', 'chatml', 'DPO', 'RLHF', 'gpt4', 'synthetic data', 'distillation', 'function calling', 'json mode', 'axolotl'], | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Francis Couture-Harpin
					Francis Couture-Harpin