vocab : JetBrains Mellum pre-tokenizer (#15045)

2025-10-27 08:21:30 +00:00 · 2025-08-03 12:38:18 -07:00
parent 83bc2f288c
commit 97366dc6ab
3 changed files with 6 additions and 1 deletions
--- a/convert_hf_to_gguf.py
+++ b/convert_hf_to_gguf.py
@@ -852,6 +852,9 @@ class TextModel(ModelBase):
        if chkhsh == "2085e1638f6c377a0aa4ead21b27bb4cb941bf800df86ed391011769c1758dfb":
            # ref: https://huggingface.co/LGAI-EXAONE/EXAONE-4.0-32B
            res = "exaone4"
+        if chkhsh == "a1e163ecab2e718a4c829d1148b6e86824ec36163bb71941c3dca9cd5ac25756":
+            # ref: https://huggingface.co/JetBrains/Mellum-4b-base
+            res = "mellum"

        if res is None:
            logger.warning("\n")