llama : sync gguf-llama with llama (#2613)

* llama : sync gguf-llama with llama * tests : fix build + warnings (test-tokenizer-1 still fails) * tests : fix wstring_convert * convert : fix layer names * llama : sync gguf-llama.cpp * convert : update HF converter to new tokenizer voodoo magics
2025-11-08 10:07:01 +00:00 · 2023-08-14 21:33:33 +03:00
parent afc4ca2889
commit 7494c78428
8 changed files with 590 additions and 293 deletions
--- a/convert-llama-h5-to-gguf.py
+++ b/convert-llama-h5-to-gguf.py
@@ -95,7 +95,7 @@ else:

 gguf_writer.add_architecture(llm_arch)
 gguf_writer.add_name(last_dir)
-gguf_writer.add_file_type( "All tensors F32" if ftype == 0 else "Most tensors F16, some F32")
+gguf_writer.add_file_type("All tensors F32" if ftype == 0 else "Most tensors F16, some F32")
 gguf_writer.add_source_hf_repo(hf_repo)
 gguf_writer.add_context_length(llm_arch, hparams["max_position_embeddings"])
 gguf_writer.add_embedding_length(llm_arch, hparams["hidden_size"])
@@ -122,19 +122,11 @@ if Path(dir_model + "/tokenizer.model").is_file():

    for i in range(tokenizer.vocab_size()):
        text: bytes
-        if tokenizer.is_unknown(i):
-            text = " \u2047 ".encode("utf-8")
-        elif tokenizer.is_control(i):
-            text = b""
-        if tokenizer.is_byte(i):
-            piece = tokenizer.id_to_piece(i)
-            if len(piece) != 6:
-                raise Exception(f"Invalid token: {piece}")
-            byte_value = int(piece[3:-1], 16)
-            text = struct.pack("B", byte_value)
-        else:
-            text = tokenizer.id_to_piece(i).replace("\u2581", " ").encode("utf-8")
-        score: float = tokenizer.get_score(i)
+        score: float
+
+        piece = tokenizer.id_to_piece(i)
+        text  = piece.encode("utf-8")
+        score = tokenizer.get_score(i)

        tokens.append(text)
        scores.append(score)