mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	gguf : upd gguf conversion script
This commit is contained in:
		| @@ -12,11 +12,14 @@ from sentencepiece import SentencePieceProcessor | |||||||
|  |  | ||||||
|  |  | ||||||
| NDArray: 'TypeAlias' = 'np.ndarray[Any, Any]' | NDArray: 'TypeAlias' = 'np.ndarray[Any, Any]' | ||||||
|  |  | ||||||
|  |  | ||||||
| def permute(weights: NDArray, n_head: int) -> NDArray: | def permute(weights: NDArray, n_head: int) -> NDArray: | ||||||
|     return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:]) |     return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:]) | ||||||
|                    .swapaxes(1, 2) |                    .swapaxes(1, 2) | ||||||
|                    .reshape(weights.shape)) |                    .reshape(weights.shape)) | ||||||
|  |  | ||||||
|  |  | ||||||
| if len(sys.argv) < 3: | if len(sys.argv) < 3: | ||||||
|     print("Usage: convert-h5-to-ggml.py dir-model ftype\n") |     print("Usage: convert-h5-to-ggml.py dir-model ftype\n") | ||||||
|     print("  ftype == 0 -> float32") |     print("  ftype == 0 -> float32") | ||||||
| @@ -45,7 +48,7 @@ if len(sys.argv) > 2: | |||||||
|     fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".gguf" |     fname_out = sys.argv[1] + "/ggml-model-" + ftype_str[ftype] + ".gguf" | ||||||
|  |  | ||||||
|  |  | ||||||
| model = AutoModelForCausalLM.from_pretrained( dir_model, low_cpu_mem_usage=True, trust_remote_code=True ) | model = AutoModelForCausalLM.from_pretrained(dir_model, low_cpu_mem_usage=True, trust_remote_code=True) | ||||||
| list_vars = model.state_dict() | list_vars = model.state_dict() | ||||||
|  |  | ||||||
| # count tensors to be converted | # count tensors to be converted | ||||||
| @@ -56,7 +59,6 @@ for name in list_vars.keys(): | |||||||
|         continue |         continue | ||||||
|     tensor_count += 1 |     tensor_count += 1 | ||||||
|  |  | ||||||
| #fout = open(fname_out, "wb") |  | ||||||
| gguf_writer = gguf.GGUFWriter.open(fname_out) | gguf_writer = gguf.GGUFWriter.open(fname_out) | ||||||
|  |  | ||||||
| with open(dir_model + "/config.json", "r", encoding="utf-8") as f: | with open(dir_model + "/config.json", "r", encoding="utf-8") as f: | ||||||
| @@ -65,7 +67,7 @@ with open(dir_model + "/config.json", "r", encoding="utf-8") as f: | |||||||
| # This mmust be changed when adding/deleting kv | # This mmust be changed when adding/deleting kv | ||||||
| kv_count = 13 | kv_count = 13 | ||||||
|  |  | ||||||
| print("tensors " + str(tensor_count) + " kv " + str(kv_count) ) | print("tensors " + str(tensor_count) + " kv " + str(kv_count)) | ||||||
|  |  | ||||||
| print("write gguf header") | print("write gguf header") | ||||||
|  |  | ||||||
| @@ -92,10 +94,10 @@ gguf_writer.write_float32(llm_arch + ".attention.layer_norm_rms_epsilon", hparam | |||||||
| tokens: List[str] = [] | tokens: List[str] = [] | ||||||
| scores: List[float] = [] | scores: List[float] = [] | ||||||
|  |  | ||||||
| if Path( dir_model + "/tokenizer.model").is_file(): | if Path(dir_model + "/tokenizer.model").is_file(): | ||||||
|     # vocab type SPIECE |     # vocab type SPIECE | ||||||
|     print( "Adding sentencepiece tokenizer vocab." ) |     print("Adding sentencepiece tokenizer vocab.") | ||||||
|     tokenizer = SentencePieceProcessor( dir_model + "/tokenizer.model" ) |     tokenizer = SentencePieceProcessor(dir_model + "/tokenizer.model") | ||||||
|  |  | ||||||
|     # output vocab_size followed by all piece/score pairs |     # output vocab_size followed by all piece/score pairs | ||||||
|     outbytes: bytes |     outbytes: bytes | ||||||
| @@ -118,14 +120,14 @@ if Path( dir_model + "/tokenizer.model").is_file(): | |||||||
|             text = tokenizer.id_to_piece(i).replace("\u2581", " ").encode("utf-8") |             text = tokenizer.id_to_piece(i).replace("\u2581", " ").encode("utf-8") | ||||||
|         score: float = tokenizer.get_score(i) |         score: float = tokenizer.get_score(i) | ||||||
|  |  | ||||||
|         tokens.append( str(text) ); |         tokens.append(str(text)) | ||||||
|         scores.append( score ); |         scores.append(score) | ||||||
|  |  | ||||||
| print("write gguf tokens") | print("write gguf tokens") | ||||||
|  |  | ||||||
| gguf_writer.write_string("tokenizer.ggml.model", "llama") | gguf_writer.write_tokenizer_model("llama") | ||||||
| gguf_writer.write_array("tokenizer.ggml.tokens",tokens) | gguf_writer.write_token_list(tokens) | ||||||
| gguf_writer.write_array("tokenizer.ggml.scores",scores) | gguf_writer.write_token_scores(scores) | ||||||
|  |  | ||||||
| # TENSORS | # TENSORS | ||||||
|  |  | ||||||
| @@ -142,7 +144,7 @@ for name in list_vars.keys(): | |||||||
|  |  | ||||||
|     # permute these |     # permute these | ||||||
|     if name.endswith(".q_proj.weight") or name.endswith(".k_proj.weight"): |     if name.endswith(".q_proj.weight") or name.endswith(".k_proj.weight"): | ||||||
|         data = permute( data, hparams["num_attention_heads"] ) |         data = permute(data, hparams["num_attention_heads"]) | ||||||
|  |  | ||||||
|     # chnage tensor name |     # chnage tensor name | ||||||
|  |  | ||||||
| @@ -197,10 +199,10 @@ for name in list_vars.keys(): | |||||||
|         print("  Skip tensor: " + name) |         print("  Skip tensor: " + name) | ||||||
|         continue |         continue | ||||||
|  |  | ||||||
|     ## permute these |     # permute these | ||||||
|     if name.endswith(".q_proj.weight") or name.endswith(".k_proj.weight"): |     if name.endswith(".q_proj.weight") or name.endswith(".k_proj.weight"): | ||||||
|         print("  Permute tensor: " + name) |         print("  Permute tensor: " + name) | ||||||
|         data = permute( data, hparams["num_attention_heads"] ) |         data = permute(data, hparams["num_attention_heads"]) | ||||||
|  |  | ||||||
|     n_dims = len(data.shape) |     n_dims = len(data.shape) | ||||||
|  |  | ||||||
| @@ -221,7 +223,6 @@ for name in list_vars.keys(): | |||||||
|             data = data.astype(np.float32) |             data = data.astype(np.float32) | ||||||
|             ftype_cur = 0 |             ftype_cur = 0 | ||||||
|  |  | ||||||
|     gguf_writer.write_tensor_padding() |  | ||||||
|     gguf_writer.write_tensor(data) |     gguf_writer.write_tensor(data) | ||||||
|  |  | ||||||
| gguf_writer.close() | gguf_writer.close() | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 M. Yusuf Sarıgöz
					M. Yusuf Sarıgöz