mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	convert.py : add python logging instead of print() (#6511)
* convert.py: add python logging instead of print() * convert.py: verbose flag takes priority over dump flag log suppression * convert.py: named instance logging * convert.py: use explicit logger id string * convert.py: convert extra print() to named logger * convert.py: sys.stderr.write --> logger.error * *.py: Convert all python scripts to use logging module * requirements.txt: remove extra line * flake8: update flake8 ignore and exclude to match ci settings * gh-actions: add flake8-no-print to flake8 lint step * pre-commit: add flake8-no-print to flake8 and also update pre-commit version * convert-hf-to-gguf.py: print() to logger conversion * *.py: logging basiconfig refactor to use conditional expression * *.py: removed commented out logging * fixup! *.py: logging basiconfig refactor to use conditional expression * constant.py: logger.error then exit should be a raise exception instead * *.py: Convert logger error and sys.exit() into a raise exception (for atypical error) * gguf-convert-endian.py: refactor convert_byteorder() to use tqdm progressbar * verify-checksum-model.py: This is the result of the program, it should be printed to stdout. * compare-llama-bench.py: add blank line for readability during missing repo response * reader.py: read_gguf_file() use print() over logging * convert.py: warning goes to stderr and won't hurt the dump output * gguf-dump.py: dump_metadata() should print to stdout * convert-hf-to-gguf.py: print --> logger.debug or ValueError() * verify-checksum-models.py: use print() for printing table * *.py: refactor logging.basicConfig() * gguf-py/gguf/*.py: use __name__ as logger name Since they will be imported and not run directly. * python-lint.yml: use .flake8 file instead * constants.py: logger no longer required * convert-hf-to-gguf.py: add additional logging * convert-hf-to-gguf.py: print() --> logger * *.py: fix flake8 warnings * revert changes to convert-hf-to-gguf.py for get_name() * convert-hf-to-gguf-update.py: use triple quoted f-string instead * *.py: accidentally corrected the wrong line * *.py: add compilade warning suggestions and style fixes
This commit is contained in:
		
							
								
								
									
										60
									
								
								convert.py
									
									
									
									
									
								
							
							
						
						
									
										60
									
								
								convert.py
									
									
									
									
									
								
							| @@ -1,6 +1,7 @@ | ||||
| #!/usr/bin/env python3 | ||||
| from __future__ import annotations | ||||
|  | ||||
| import logging | ||||
| import argparse | ||||
| import concurrent.futures | ||||
| import enum | ||||
| @@ -35,6 +36,8 @@ import gguf | ||||
| if TYPE_CHECKING: | ||||
|     from typing_extensions import Self, TypeAlias | ||||
|  | ||||
| logger = logging.getLogger("convert") | ||||
|  | ||||
| if hasattr(faulthandler, 'register') and hasattr(signal, 'SIGUSR1'): | ||||
|     faulthandler.register(signal.SIGUSR1) | ||||
|  | ||||
| @@ -643,7 +646,6 @@ class LlamaHfVocab(Vocab): | ||||
|  | ||||
|  | ||||
| def permute(weights: NDArray, n_head: int, n_head_kv: int) -> NDArray: | ||||
|     # print( "permute debug " + str(weights.shape[0]) + " x " + str(weights.shape[1]) + " nhead " + str(n_head) + " nheadkv " + str(n_kv_head) ) | ||||
|     if n_head_kv is not None and n_head != n_head_kv: | ||||
|         n_head = n_head_kv | ||||
|     return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:]) | ||||
| @@ -1033,12 +1035,12 @@ def check_vocab_size(params: Params, vocab: BaseVocab, pad_vocab: bool = False) | ||||
|  | ||||
|     # Check for a vocab size mismatch | ||||
|     if params.n_vocab == vocab.vocab_size: | ||||
|         print("Ignoring added_tokens.json since model matches vocab size without it.") | ||||
|         logger.warning("Ignoring added_tokens.json since model matches vocab size without it.") | ||||
|         return | ||||
|  | ||||
|     if pad_vocab and params.n_vocab > vocab.vocab_size: | ||||
|         pad_count = params.n_vocab - vocab.vocab_size | ||||
|         print( | ||||
|         logger.debug( | ||||
|             f"Padding vocab with {pad_count} token(s) - <dummy00001> through <dummy{pad_count:05}>" | ||||
|         ) | ||||
|         for i in range(1, pad_count + 1): | ||||
| @@ -1166,7 +1168,7 @@ class OutputFile: | ||||
|             elapsed = time.time() - start | ||||
|             size = ' x '.join(f"{dim:6d}" for dim in lazy_tensor.shape) | ||||
|             padi = len(str(len(model))) | ||||
|             print( | ||||
|             logger.info( | ||||
|                 f"[{i + 1:{padi}d}/{len(model)}] Writing tensor {name:38s} | size {size:16} | type {lazy_tensor.data_type.name:4} | T+{int(elapsed):4}" | ||||
|             ) | ||||
|             self.gguf.write_tensor_data(ndarray) | ||||
| @@ -1281,12 +1283,12 @@ def convert_model_names(model: LazyModel, params: Params, skip_unknown: bool) -> | ||||
|     # HF models permut or pack some of the tensors, so we need to undo that | ||||
|     for i in itertools.count(): | ||||
|         if f"model.layers.{i}.self_attn.q_proj.weight" in model: | ||||
|             print(f"Permuting layer {i}") | ||||
|             logger.debug(f"Permuting layer {i}") | ||||
|             tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.q_proj.weight"], params.n_head, params.n_head) | ||||
|             tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = permute_lazy(model[f"model.layers.{i}.self_attn.k_proj.weight"], params.n_head, params.n_head_kv) | ||||
|             # tmp[f"model.layers.{i}.self_attn.v_proj.weight"] =              model[f"model.layers.{i}.self_attn.v_proj.weight"] | ||||
|         elif f"model.layers.{i}.self_attn.W_pack.weight" in model: | ||||
|             print(f"Unpacking and permuting layer {i}") | ||||
|             logger.debug(f"Unpacking and permuting layer {i}") | ||||
|             tmp[f"model.layers.{i}.self_attn.q_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 0, params.n_head, params.n_head) | ||||
|             tmp[f"model.layers.{i}.self_attn.k_proj.weight"] = permute_part_lazy(model[f"model.layers.{i}.self_attn.W_pack.weight"], 1, params.n_head, params.n_head_kv) | ||||
|             tmp[f"model.layers.{i}.self_attn.v_proj.weight"] = part_lazy        (model[f"model.layers.{i}.self_attn.W_pack.weight"], 2) | ||||
| @@ -1299,15 +1301,15 @@ def convert_model_names(model: LazyModel, params: Params, skip_unknown: bool) -> | ||||
|         tensor_type, name_new = tmap.get_type_and_name(name, try_suffixes = (".weight", ".bias")) or (None, None) | ||||
|         if name_new is None: | ||||
|             if skip_unknown: | ||||
|                 print(f"Unexpected tensor name: {name} - skipping") | ||||
|                 logger.warning(f"Unexpected tensor name: {name} - skipping") | ||||
|                 continue | ||||
|             raise ValueError(f"Unexpected tensor name: {name}. Use --skip-unknown to ignore it (e.g. LLaVA)") | ||||
|  | ||||
|         if tensor_type in should_skip: | ||||
|             print(f"skipping tensor {name_new}") | ||||
|             logger.debug(f"skipping tensor {name_new}") | ||||
|             continue | ||||
|  | ||||
|         print(f"{name:48s} -> {name_new:40s} | {lazy_tensor.data_type.name:6s} | {lazy_tensor.shape}") | ||||
|         logger.debug(f"{name:48s} -> {name_new:40s} | {lazy_tensor.data_type.name:6s} | {lazy_tensor.shape}") | ||||
|         out[name_new] = lazy_tensor | ||||
|  | ||||
|     return out | ||||
| @@ -1372,7 +1374,7 @@ def load_some_model(path: Path) -> ModelPlus: | ||||
|     paths = find_multifile_paths(path) | ||||
|     models_plus: list[ModelPlus] = [] | ||||
|     for path in paths: | ||||
|         print(f"Loading model file {path}") | ||||
|         logger.info(f"Loading model file {path}") | ||||
|         models_plus.append(lazy_load_file(path)) | ||||
|  | ||||
|     model_plus = merge_multifile_models(models_plus) | ||||
| @@ -1413,7 +1415,7 @@ class VocabFactory: | ||||
|         else: | ||||
|             raise FileNotFoundError(f"Could not find a tokenizer matching any of {vocab_types}") | ||||
|  | ||||
|         print(f"Loaded vocab file {vocab.fname_tokenizer!r}, type {vocab.name!r}") | ||||
|         logger.info(f"Loaded vocab file {vocab.fname_tokenizer!r}, type {vocab.name!r}") | ||||
|         return vocab | ||||
|  | ||||
|     def load_vocab(self, vocab_types: list[str] | None, model_parent_path: Path) -> tuple[BaseVocab, gguf.SpecialVocab]: | ||||
| @@ -1438,19 +1440,19 @@ def default_outfile(model_paths: list[Path], file_type: GGMLFileType) -> Path: | ||||
|     }[file_type] | ||||
|     ret = model_paths[0].parent / f"ggml-model-{namestr}.gguf" | ||||
|     if ret in model_paths: | ||||
|         sys.stderr.write( | ||||
|         logger.error( | ||||
|             f"Error: Default output path ({ret}) would overwrite the input. " | ||||
|             "Please explicitly specify a path using --outfile.\n") | ||||
|             "Please explicitly specify a path using --outfile.") | ||||
|         sys.exit(1) | ||||
|     return ret | ||||
|  | ||||
|  | ||||
| def do_dump_model(model_plus: ModelPlus) -> None: | ||||
|     print(f"model_plus.paths = {model_plus.paths!r}") | ||||
|     print(f"model_plus.format = {model_plus.format!r}") | ||||
|     print(f"model_plus.vocab = {model_plus.vocab!r}") | ||||
|     print(f"model_plus.paths = {model_plus.paths!r}") # noqa: NP100 | ||||
|     print(f"model_plus.format = {model_plus.format!r}") # noqa: NP100 | ||||
|     print(f"model_plus.vocab = {model_plus.vocab!r}") # noqa: NP100 | ||||
|     for name, lazy_tensor in model_plus.model.items(): | ||||
|         print(f"{name}: shape={lazy_tensor.shape} type={lazy_tensor.data_type}; {lazy_tensor.description}") | ||||
|         print(f"{name}: shape={lazy_tensor.shape} type={lazy_tensor.data_type}; {lazy_tensor.description}") # noqa: NP100 | ||||
|  | ||||
|  | ||||
| def main(args_in: list[str] | None = None) -> None: | ||||
| @@ -1473,8 +1475,18 @@ def main(args_in: list[str] | None = None) -> None: | ||||
|     parser.add_argument("--big-endian",   action="store_true",    help="model is executed on big endian machine") | ||||
|     parser.add_argument("--pad-vocab",    action="store_true",    help="add pad tokens when model vocab expects more than tokenizer metadata provides") | ||||
|     parser.add_argument("--skip-unknown", action="store_true",    help="skip unknown tensor names instead of failing") | ||||
|     parser.add_argument("--verbose",      action="store_true",    help="increase output verbosity") | ||||
|  | ||||
|     args = parser.parse_args(args_in) | ||||
|  | ||||
|     if args.verbose: | ||||
|         logging.basicConfig(level=logging.DEBUG) | ||||
|     elif args.dump_single or args.dump: | ||||
|         # Avoid printing anything besides the dump output | ||||
|         logging.basicConfig(level=logging.WARNING) | ||||
|     else: | ||||
|         logging.basicConfig(level=logging.INFO) | ||||
|  | ||||
|     if args.no_vocab and args.vocab_only: | ||||
|         raise ValueError("--vocab-only does not make sense with --no-vocab") | ||||
|  | ||||
| @@ -1491,6 +1503,7 @@ def main(args_in: list[str] | None = None) -> None: | ||||
|     if args.dump: | ||||
|         do_dump_model(model_plus) | ||||
|         return | ||||
|  | ||||
|     endianess = gguf.GGUFEndian.LITTLE | ||||
|     if args.big_endian: | ||||
|         endianess = gguf.GGUFEndian.BIG | ||||
| @@ -1513,7 +1526,7 @@ def main(args_in: list[str] | None = None) -> None: | ||||
|             "q8_0": GGMLFileType.MostlyQ8_0, | ||||
|         }[args.outtype] | ||||
|  | ||||
|     print(f"params = {params}") | ||||
|     logger.info(f"params = {params}") | ||||
|  | ||||
|     model_parent_path = model_plus.paths[0].parent | ||||
|     vocab_path = Path(args.vocab_dir or args.model or model_parent_path) | ||||
| @@ -1528,15 +1541,14 @@ def main(args_in: list[str] | None = None) -> None: | ||||
|         outfile = args.outfile | ||||
|         OutputFile.write_vocab_only(outfile, params, vocab, special_vocab, | ||||
|                                     endianess=endianess, pad_vocab=args.pad_vocab) | ||||
|         print(f"Wrote {outfile}") | ||||
|         logger.info(f"Wrote {outfile}") | ||||
|         return | ||||
|  | ||||
|     if model_plus.vocab is not None and args.vocab_dir is None and not args.no_vocab: | ||||
|         vocab = model_plus.vocab | ||||
|  | ||||
|     print(f"Vocab info: {vocab}") | ||||
|     print(f"Special vocab info: {special_vocab}") | ||||
|  | ||||
|     logger.info(f"Vocab info: {vocab}") | ||||
|     logger.info(f"Special vocab info: {special_vocab}") | ||||
|     model   = model_plus.model | ||||
|     model   = convert_model_names(model, params, args.skip_unknown) | ||||
|     ftype   = pick_output_type(model, args.outtype) | ||||
| @@ -1544,11 +1556,11 @@ def main(args_in: list[str] | None = None) -> None: | ||||
|     outfile = args.outfile or default_outfile(model_plus.paths, ftype) | ||||
|  | ||||
|     params.ftype = ftype | ||||
|     print(f"Writing {outfile}, format {ftype}") | ||||
|     logger.info(f"Writing {outfile}, format {ftype}") | ||||
|  | ||||
|     OutputFile.write_all(outfile, ftype, params, model, vocab, special_vocab, | ||||
|                          concurrency=args.concurrency, endianess=endianess, pad_vocab=args.pad_vocab) | ||||
|     print(f"Wrote {outfile}") | ||||
|     logger.info(f"Wrote {outfile}") | ||||
|  | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Brian
					Brian