llama : add API for token type

ggml-ci
This commit is contained in:
Georgi Gerganov
2023-08-21 19:35:31 +03:00
parent 8d177eddeb
commit 0b53b8b08d
6 changed files with 115 additions and 116 deletions

10
gguf.py
View File

@@ -61,6 +61,7 @@ KEY_TOKENIZER_PAD_ID = "tokenizer.ggml.padding_token_id"
KEY_TOKENIZER_HF_JSON = "tokenizer.huggingface.json"
KEY_TOKENIZER_RWKV = "tokenizer.rwkv.world"
#
# recommended mapping of model tensor names for storage in gguf
#
@@ -319,6 +320,15 @@ def get_tensor_name_map(arch: MODEL_ARCH, n_blocks: int) -> dict:
return tensor_map
class TokenType(IntEnum):
NORMAL = 1
UNKNOWN = 2
CONTROL = 3
USER_DEFINED = 4
UNUSED = 5
BYTE = 6
#
# implementation
#