mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	Merge branch 'master' into compilade/mamba2
This commit is contained in:
		| @@ -2145,7 +2145,6 @@ class Llama4Model(LlamaModel): | ||||
|  | ||||
|     def set_vocab(self): | ||||
|         self._set_vocab_gpt2() | ||||
|         self.gguf_writer.add_add_bos_token(True) | ||||
|  | ||||
|     def set_gguf_parameters(self): | ||||
|         super().set_gguf_parameters() | ||||
| @@ -2194,7 +2193,7 @@ class Llama4VisionModel(MmprojModel): | ||||
|                 name += ".weight" | ||||
|             if "multi_modal_projector.linear_1" in name: | ||||
|                 # despite the name with number postfix, this is a single fully connected layer | ||||
|                 return [(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.V_MMPROJ_FC], data_torch)] | ||||
|                 return [(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.V_MMPROJ_FC] + '.weight', data_torch)] | ||||
|             return [(self.map_tensor_name(name), data_torch)] | ||||
|         return [] | ||||
|  | ||||
| @@ -3918,9 +3917,6 @@ class BertModel(TextModel): | ||||
|         special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens)) | ||||
|         special_vocab.add_to_gguf(self.gguf_writer) | ||||
|  | ||||
|         self.gguf_writer.add_add_bos_token(True) | ||||
|         self.gguf_writer.add_add_eos_token(True) | ||||
|  | ||||
|  | ||||
| @ModelBase.register("DistilBertModel", "DistilBertForMaskedLM", "DistilBertForSequenceClassification") | ||||
| class DistilBertModel(BertModel): | ||||
| @@ -3962,8 +3958,6 @@ class RobertaModel(BertModel): | ||||
|         bpe_tok_path = self.dir_model / "tokenizer.json" | ||||
|         if bpe_tok_path.exists(): | ||||
|             self._set_vocab_gpt2() | ||||
|             self.gguf_writer.add_add_bos_token(True) | ||||
|             self.gguf_writer.add_add_eos_token(True) | ||||
|  | ||||
|             # we need this to validate the size of the token_type embeddings | ||||
|             # though currently we are passing all zeros to the token_type embeddings | ||||
| @@ -4950,8 +4944,6 @@ class JinaBertV2Model(BertModel): | ||||
|             self.gguf_writer.add_token_type_count(2) | ||||
|         else: | ||||
|             raise NotImplementedError(f'Tokenizer {tokenizer_class} is not supported for JinaBertModel') | ||||
|         self.gguf_writer.add_add_bos_token(True) | ||||
|         self.gguf_writer.add_add_eos_token(True) | ||||
|  | ||||
|  | ||||
| @ModelBase.register("OpenELMForCausalLM") | ||||
| @@ -5553,9 +5545,6 @@ class T5Model(TextModel): | ||||
|         special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens)) | ||||
|         special_vocab.add_to_gguf(self.gguf_writer) | ||||
|  | ||||
|         self.gguf_writer.add_add_bos_token(False) | ||||
|         self.gguf_writer.add_add_eos_token(True) | ||||
|  | ||||
|     def set_gguf_parameters(self): | ||||
|         if (n_ctx := self.find_hparam(["n_positions"], optional=True)) is None: | ||||
|             logger.warning("Couldn't find context length in config.json, assuming default value of 512") | ||||
| @@ -5693,9 +5682,6 @@ class T5EncoderModel(TextModel): | ||||
|         special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens)) | ||||
|         special_vocab.add_to_gguf(self.gguf_writer) | ||||
|  | ||||
|         self.gguf_writer.add_add_bos_token(False) | ||||
|         self.gguf_writer.add_add_eos_token(True) | ||||
|  | ||||
|     def set_gguf_parameters(self): | ||||
|         if (n_ctx := self.find_hparam(["n_positions"], optional=True)) is None: | ||||
|             logger.warning("Couldn't find context length in config.json, assuming default value of 512") | ||||
| @@ -6491,8 +6477,8 @@ def parse_args() -> argparse.Namespace: | ||||
|         help="model is executed on big endian machine", | ||||
|     ) | ||||
|     parser.add_argument( | ||||
|         "model", type=Path, | ||||
|         help="directory containing model file", | ||||
|         "model", type=str, | ||||
|         help="directory containing model file or huggingface repository ID (if --remote)", | ||||
|         nargs="?", | ||||
|     ) | ||||
|     parser.add_argument( | ||||
| @@ -6603,18 +6589,20 @@ def main() -> None: | ||||
|     else: | ||||
|         logging.basicConfig(level=logging.INFO) | ||||
|  | ||||
|     dir_model = args.model | ||||
|  | ||||
|     if args.remote: | ||||
|         hf_repo_id = args.model | ||||
|         from huggingface_hub import snapshot_download | ||||
|         local_dir = snapshot_download( | ||||
|             repo_id=str(dir_model), | ||||
|             repo_id=hf_repo_id, | ||||
|             allow_patterns=["LICENSE", "*.json", "*.md", "*.txt", "tokenizer.model"]) | ||||
|         dir_model = Path(local_dir) | ||||
|         logger.info(f"Downloaded config and tokenizer to {local_dir}") | ||||
|     else: | ||||
|         hf_repo_id = None | ||||
|         dir_model = Path(args.model) | ||||
|  | ||||
|     if not dir_model.is_dir(): | ||||
|         logger.error(f'Error: {args.model} is not a directory') | ||||
|         logger.error(f'Error: {dir_model} is not a directory') | ||||
|         sys.exit(1) | ||||
|  | ||||
|     ftype_map: dict[str, gguf.LlamaFileType] = { | ||||
| @@ -6634,9 +6622,9 @@ def main() -> None: | ||||
|  | ||||
|     if args.outfile is not None: | ||||
|         fname_out = args.outfile | ||||
|     elif args.remote: | ||||
|     elif hf_repo_id: | ||||
|         # if remote, use the model ID as the output file name | ||||
|         fname_out = Path("./" + str(args.model).replace("/", "-") + "-{ftype}.gguf") | ||||
|         fname_out = Path("./" + hf_repo_id.replace("/", "-") + "-{ftype}.gguf") | ||||
|     else: | ||||
|         fname_out = dir_model | ||||
|  | ||||
| @@ -6665,7 +6653,7 @@ def main() -> None: | ||||
|                                      split_max_tensors=args.split_max_tensors, | ||||
|                                      split_max_size=split_str_to_n_bytes(args.split_max_size), dry_run=args.dry_run, | ||||
|                                      small_first_shard=args.no_tensor_first_split, | ||||
|                                      remote_hf_model_id=str(args.model) if args.remote else None) | ||||
|                                      remote_hf_model_id=hf_repo_id) | ||||
|  | ||||
|         if args.vocab_only: | ||||
|             logger.info("Exporting model vocab...") | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Francis Couture-Harpin
					Francis Couture-Harpin