mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-01 09:01:57 +00:00
llama : add API for token type
ggml-ci
This commit is contained in:
10
gguf.py
10
gguf.py
@@ -61,6 +61,7 @@ KEY_TOKENIZER_PAD_ID = "tokenizer.ggml.padding_token_id"
|
||||
KEY_TOKENIZER_HF_JSON = "tokenizer.huggingface.json"
|
||||
KEY_TOKENIZER_RWKV = "tokenizer.rwkv.world"
|
||||
|
||||
|
||||
#
|
||||
# recommended mapping of model tensor names for storage in gguf
|
||||
#
|
||||
@@ -319,6 +320,15 @@ def get_tensor_name_map(arch: MODEL_ARCH, n_blocks: int) -> dict:
|
||||
|
||||
return tensor_map
|
||||
|
||||
|
||||
class TokenType(IntEnum):
|
||||
NORMAL = 1
|
||||
UNKNOWN = 2
|
||||
CONTROL = 3
|
||||
USER_DEFINED = 4
|
||||
UNUSED = 5
|
||||
BYTE = 6
|
||||
|
||||
#
|
||||
# implementation
|
||||
#
|
||||
|
||||
Reference in New Issue
Block a user