mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-28 08:31:25 +00:00
convert : make Mistral community chat templates optional via parameter (#15420)
* Make Mistral community chat templates optional * Change the flag arg to disable instead of enable community chat templates * Improve error message * Improve help message * Tone down the logger messages
This commit is contained in:
@@ -89,13 +89,16 @@ class ModelBase:
|
|||||||
block_count: int
|
block_count: int
|
||||||
tensor_map: gguf.TensorNameMap
|
tensor_map: gguf.TensorNameMap
|
||||||
|
|
||||||
|
# Mistral format specifics
|
||||||
is_mistral_format: bool = False
|
is_mistral_format: bool = False
|
||||||
|
disable_mistral_community_chat_template: bool = False
|
||||||
|
|
||||||
def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, *, is_big_endian: bool = False,
|
def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, *, is_big_endian: bool = False,
|
||||||
use_temp_file: bool = False, eager: bool = False,
|
use_temp_file: bool = False, eager: bool = False,
|
||||||
metadata_override: Path | None = None, model_name: str | None = None,
|
metadata_override: Path | None = None, model_name: str | None = None,
|
||||||
split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False,
|
split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False,
|
||||||
small_first_shard: bool = False, hparams: dict[str, Any] | None = None, remote_hf_model_id: str | None = None):
|
small_first_shard: bool = False, hparams: dict[str, Any] | None = None, remote_hf_model_id: str | None = None,
|
||||||
|
disable_mistral_community_chat_template: bool = False):
|
||||||
if type(self) is ModelBase or \
|
if type(self) is ModelBase or \
|
||||||
type(self) is TextModel or \
|
type(self) is TextModel or \
|
||||||
type(self) is MmprojModel:
|
type(self) is MmprojModel:
|
||||||
@@ -147,6 +150,9 @@ class ModelBase:
|
|||||||
self.gguf_writer = gguf.GGUFWriter(path=None, arch=gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file,
|
self.gguf_writer = gguf.GGUFWriter(path=None, arch=gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file,
|
||||||
split_max_tensors=split_max_tensors, split_max_size=split_max_size, dry_run=dry_run, small_first_shard=small_first_shard)
|
split_max_tensors=split_max_tensors, split_max_size=split_max_size, dry_run=dry_run, small_first_shard=small_first_shard)
|
||||||
|
|
||||||
|
# Mistral specific
|
||||||
|
self.disable_mistral_community_chat_template = disable_mistral_community_chat_template
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def add_prefix_to_filename(cls, path: Path, prefix: str) -> Path:
|
def add_prefix_to_filename(cls, path: Path, prefix: str) -> Path:
|
||||||
stem, suffix = path.stem, path.suffix
|
stem, suffix = path.stem, path.suffix
|
||||||
@@ -2011,8 +2017,17 @@ class LlamaModel(TextModel):
|
|||||||
|
|
||||||
template_dir = Path(__file__).parent / "models/templates/"
|
template_dir = Path(__file__).parent / "models/templates/"
|
||||||
|
|
||||||
template = MistralModel.get_community_chat_template(vocab, template_dir)
|
if not self.is_mistral_format or not self.disable_mistral_community_chat_template:
|
||||||
self.gguf_writer.add_chat_template(template)
|
# Log only for Mistral format that the official tokenization and detokenization is via `mistral-common`.
|
||||||
|
if self.is_mistral_format:
|
||||||
|
logger.info(
|
||||||
|
"Using a Mistral community chat template. These templates can be subject to errors in early days or weeks after a release. "
|
||||||
|
"Mistral recommends to use `mistral-common` to perform tokenization and detokenization."
|
||||||
|
)
|
||||||
|
template = MistralModel.get_community_chat_template(vocab, template_dir, self.is_mistral_format)
|
||||||
|
self.gguf_writer.add_chat_template(template)
|
||||||
|
else:
|
||||||
|
logger.info("Not using a Mistral community chat template. Ensure to perform the tokenization and detokenization via `mistral-common`.")
|
||||||
|
|
||||||
def set_vocab(self):
|
def set_vocab(self):
|
||||||
if self.is_mistral_format:
|
if self.is_mistral_format:
|
||||||
@@ -8422,7 +8437,7 @@ class MistralModel(LlamaModel):
|
|||||||
undo_permute = False
|
undo_permute = False
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_community_chat_template(vocab: MistralVocab, templates_dir: Path):
|
def get_community_chat_template(vocab: MistralVocab, templates_dir: Path, is_mistral_format: bool):
|
||||||
assert TokenizerVersion is not None, "mistral_common is not installed"
|
assert TokenizerVersion is not None, "mistral_common is not installed"
|
||||||
assert isinstance(vocab.tokenizer, (Tekkenizer, SentencePieceTokenizer)), (
|
assert isinstance(vocab.tokenizer, (Tekkenizer, SentencePieceTokenizer)), (
|
||||||
f"Expected Tekkenizer or SentencePieceTokenizer, got {type(vocab.tokenizer)}"
|
f"Expected Tekkenizer or SentencePieceTokenizer, got {type(vocab.tokenizer)}"
|
||||||
@@ -8443,7 +8458,13 @@ class MistralModel(LlamaModel):
|
|||||||
elif vocab.tokenizer.version == TokenizerVersion.v13:
|
elif vocab.tokenizer.version == TokenizerVersion.v13:
|
||||||
template_file = "unsloth-mistral-Devstral-Small-2507.jinja"
|
template_file = "unsloth-mistral-Devstral-Small-2507.jinja"
|
||||||
else:
|
else:
|
||||||
raise ValueError(f"Unknown tokenizer type: {vocab.tokenizer_type} and version {vocab.tokenizer.version}")
|
err_message = f"Unknown tokenizer type: {vocab.tokenizer_type} and version {vocab.tokenizer.version}"
|
||||||
|
if is_mistral_format:
|
||||||
|
err_message += (
|
||||||
|
" . Please pass --disable-mistral-community-chat-template argument to the CLI "
|
||||||
|
"if you want to skip this error and use the Mistral official `mistral-common` pre-processing library."
|
||||||
|
)
|
||||||
|
raise ValueError(err_message)
|
||||||
|
|
||||||
template_path = templates_dir / template_file
|
template_path = templates_dir / template_file
|
||||||
if not template_path.exists():
|
if not template_path.exists():
|
||||||
@@ -8638,6 +8659,13 @@ def parse_args() -> argparse.Namespace:
|
|||||||
"--mistral-format", action="store_true",
|
"--mistral-format", action="store_true",
|
||||||
help="Whether the model is stored following the Mistral format.",
|
help="Whether the model is stored following the Mistral format.",
|
||||||
)
|
)
|
||||||
|
parser.add_argument(
|
||||||
|
"--disable-mistral-community-chat-template", action="store_true",
|
||||||
|
help=(
|
||||||
|
"Whether to disable usage of Mistral community chat templates. If set, use the Mistral official `mistral-common` library for tokenization and detokenization of Mistral models. "
|
||||||
|
"Using `mistral-common` ensure correctness and zero-day support of tokenization for models converted from the Mistral format but requires to manually setup the tokenization server."
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
if not args.print_supported_models and args.model is None:
|
if not args.print_supported_models and args.model is None:
|
||||||
@@ -8744,6 +8772,7 @@ def main() -> None:
|
|||||||
fname_out = ModelBase.add_prefix_to_filename(fname_out, "mmproj-")
|
fname_out = ModelBase.add_prefix_to_filename(fname_out, "mmproj-")
|
||||||
|
|
||||||
is_mistral_format = args.mistral_format
|
is_mistral_format = args.mistral_format
|
||||||
|
disable_mistral_community_chat_template = args.disable_mistral_community_chat_template
|
||||||
|
|
||||||
with torch.inference_mode():
|
with torch.inference_mode():
|
||||||
output_type = ftype_map[args.outtype]
|
output_type = ftype_map[args.outtype]
|
||||||
@@ -8770,7 +8799,7 @@ def main() -> None:
|
|||||||
split_max_tensors=args.split_max_tensors,
|
split_max_tensors=args.split_max_tensors,
|
||||||
split_max_size=split_str_to_n_bytes(args.split_max_size), dry_run=args.dry_run,
|
split_max_size=split_str_to_n_bytes(args.split_max_size), dry_run=args.dry_run,
|
||||||
small_first_shard=args.no_tensor_first_split,
|
small_first_shard=args.no_tensor_first_split,
|
||||||
remote_hf_model_id=hf_repo_id,
|
remote_hf_model_id=hf_repo_id, disable_mistral_community_chat_template=disable_mistral_community_chat_template
|
||||||
)
|
)
|
||||||
|
|
||||||
if args.vocab_only:
|
if args.vocab_only:
|
||||||
|
|||||||
Reference in New Issue
Block a user