Merge branch 'master' into compilade/mamba2

2025-10-31 08:51:55 +00:00 · 2025-06-23 10:40:16 -04:00
parent 830e5542c2 bf2a99e3cb
commit afdb669206
99 changed files with 5168 additions and 4226 deletions
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -2145,7 +2145,6 @@ class Llama4Model(LlamaModel):

    def set_vocab(self):
        self._set_vocab_gpt2()
-        self.gguf_writer.add_add_bos_token(True)

    def set_gguf_parameters(self):
        super().set_gguf_parameters()
@@ -2194,7 +2193,7 @@ class Llama4VisionModel(MmprojModel):
                name += ".weight"
            if "multi_modal_projector.linear_1" in name:
                # despite the name with number postfix, this is a single fully connected layer
-                return [(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.V_MMPROJ_FC], data_torch)]
+                return [(gguf.TENSOR_NAMES[gguf.MODEL_TENSOR.V_MMPROJ_FC] + '.weight', data_torch)]
            return [(self.map_tensor_name(name), data_torch)]
        return []

@@ -3918,9 +3917,6 @@ class BertModel(TextModel):
        special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
        special_vocab.add_to_gguf(self.gguf_writer)

-        self.gguf_writer.add_add_bos_token(True)
-        self.gguf_writer.add_add_eos_token(True)
-

@ModelBase.register("DistilBertModel", "DistilBertForMaskedLM", "DistilBertForSequenceClassification")
 class DistilBertModel(BertModel):
@@ -3962,8 +3958,6 @@ class RobertaModel(BertModel):
        bpe_tok_path = self.dir_model / "tokenizer.json"
        if bpe_tok_path.exists():
            self._set_vocab_gpt2()
-            self.gguf_writer.add_add_bos_token(True)
-            self.gguf_writer.add_add_eos_token(True)

            # we need this to validate the size of the token_type embeddings
            # though currently we are passing all zeros to the token_type embeddings
@@ -4950,8 +4944,6 @@ class JinaBertV2Model(BertModel):
            self.gguf_writer.add_token_type_count(2)
        else:
            raise NotImplementedError(f'Tokenizer {tokenizer_class} is not supported for JinaBertModel')
-        self.gguf_writer.add_add_bos_token(True)
-        self.gguf_writer.add_add_eos_token(True)


@ModelBase.register("OpenELMForCausalLM")
@@ -5553,9 +5545,6 @@ class T5Model(TextModel):
        special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
        special_vocab.add_to_gguf(self.gguf_writer)

-        self.gguf_writer.add_add_bos_token(False)
-        self.gguf_writer.add_add_eos_token(True)
-
    def set_gguf_parameters(self):
        if (n_ctx := self.find_hparam(["n_positions"], optional=True)) is None:
            logger.warning("Couldn't find context length in config.json, assuming default value of 512")
@@ -5693,9 +5682,6 @@ class T5EncoderModel(TextModel):
        special_vocab = gguf.SpecialVocab(self.dir_model, n_vocab=len(tokens))
        special_vocab.add_to_gguf(self.gguf_writer)

-        self.gguf_writer.add_add_bos_token(False)
-        self.gguf_writer.add_add_eos_token(True)
-
    def set_gguf_parameters(self):
        if (n_ctx := self.find_hparam(["n_positions"], optional=True)) is None:
            logger.warning("Couldn't find context length in config.json, assuming default value of 512")
@@ -6491,8 +6477,8 @@ def parse_args() -> argparse.Namespace:
        help="model is executed on big endian machine",
    )
    parser.add_argument(
-        "model", type=Path,
-        help="directory containing model file",
+        "model", type=str,
+        help="directory containing model file or huggingface repository ID (if --remote)",
        nargs="?",
    )
    parser.add_argument(
@@ -6603,18 +6589,20 @@ def main() -> None:
    else:
        logging.basicConfig(level=logging.INFO)

-    dir_model = args.model
-
    if args.remote:
+        hf_repo_id = args.model
        from huggingface_hub import snapshot_download
        local_dir = snapshot_download(
-            repo_id=str(dir_model),
+            repo_id=hf_repo_id,
            allow_patterns=["LICENSE", "*.json", "*.md", "*.txt", "tokenizer.model"])
        dir_model = Path(local_dir)
        logger.info(f"Downloaded config and tokenizer to {local_dir}")
+    else:
+        hf_repo_id = None
+        dir_model = Path(args.model)

    if not dir_model.is_dir():
-        logger.error(f'Error: {args.model} is not a directory')
+        logger.error(f'Error: {dir_model} is not a directory')
        sys.exit(1)

    ftype_map: dict[str, gguf.LlamaFileType] = {
@@ -6634,9 +6622,9 @@ def main() -> None:

    if args.outfile is not None:
        fname_out = args.outfile
-    elif args.remote:
+    elif hf_repo_id:
        # if remote, use the model ID as the output file name
-        fname_out = Path("./" + str(args.model).replace("/", "-") + "-{ftype}.gguf")
+        fname_out = Path("./" + hf_repo_id.replace("/", "-") + "-{ftype}.gguf")
    else:
        fname_out = dir_model

@@ -6665,7 +6653,7 @@ def main() -> None:
                                     split_max_tensors=args.split_max_tensors,
                                     split_max_size=split_str_to_n_bytes(args.split_max_size), dry_run=args.dry_run,
                                     small_first_shard=args.no_tensor_first_split,
-                                     remote_hf_model_id=str(args.model) if args.remote else None)
+                                     remote_hf_model_id=hf_repo_id)

        if args.vocab_only:
            logger.info("Exporting model vocab...")