vocab : JetBrains Mellum pre-tokenizer (#15045)

This commit is contained in:
Csaba Kecskemeti
2025-08-03 12:38:18 -07:00
committed by GitHub
parent 83bc2f288c
commit 97366dc6ab
3 changed files with 6 additions and 1 deletions

View File

@@ -852,6 +852,9 @@ class TextModel(ModelBase):
if chkhsh == "2085e1638f6c377a0aa4ead21b27bb4cb941bf800df86ed391011769c1758dfb":
# ref: https://huggingface.co/LGAI-EXAONE/EXAONE-4.0-32B
res = "exaone4"
if chkhsh == "a1e163ecab2e718a4c829d1148b6e86824ec36163bb71941c3dca9cd5ac25756":
# ref: https://huggingface.co/JetBrains/Mellum-4b-base
res = "mellum"
if res is None:
logger.warning("\n")