mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	gguf : upd gguf conversion script
This commit is contained in:
		| @@ -12,11 +12,14 @@ from sentencepiece import SentencePieceProcessor | |||||||
|  |  | ||||||
|  |  | ||||||
| NDArray: 'TypeAlias' = 'np.ndarray[Any, Any]' | NDArray: 'TypeAlias' = 'np.ndarray[Any, Any]' | ||||||
|  |  | ||||||
|  |  | ||||||
| def permute(weights: NDArray, n_head: int) -> NDArray: | def permute(weights: NDArray, n_head: int) -> NDArray: | ||||||
|     return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:]) |     return (weights.reshape(n_head, 2, weights.shape[0] // n_head // 2, *weights.shape[1:]) | ||||||
|                    .swapaxes(1, 2) |                    .swapaxes(1, 2) | ||||||
|                    .reshape(weights.shape)) |                    .reshape(weights.shape)) | ||||||
|  |  | ||||||
|  |  | ||||||
| if len(sys.argv) < 3: | if len(sys.argv) < 3: | ||||||
|     print("Usage: convert-h5-to-ggml.py dir-model ftype\n") |     print("Usage: convert-h5-to-ggml.py dir-model ftype\n") | ||||||
|     print("  ftype == 0 -> float32") |     print("  ftype == 0 -> float32") | ||||||
| @@ -56,7 +59,6 @@ for name in list_vars.keys(): | |||||||
|         continue |         continue | ||||||
|     tensor_count += 1 |     tensor_count += 1 | ||||||
|  |  | ||||||
| #fout = open(fname_out, "wb") |  | ||||||
| gguf_writer = gguf.GGUFWriter.open(fname_out) | gguf_writer = gguf.GGUFWriter.open(fname_out) | ||||||
|  |  | ||||||
| with open(dir_model + "/config.json", "r", encoding="utf-8") as f: | with open(dir_model + "/config.json", "r", encoding="utf-8") as f: | ||||||
| @@ -118,14 +120,14 @@ if Path( dir_model + "/tokenizer.model").is_file(): | |||||||
|             text = tokenizer.id_to_piece(i).replace("\u2581", " ").encode("utf-8") |             text = tokenizer.id_to_piece(i).replace("\u2581", " ").encode("utf-8") | ||||||
|         score: float = tokenizer.get_score(i) |         score: float = tokenizer.get_score(i) | ||||||
|  |  | ||||||
|         tokens.append( str(text) ); |         tokens.append(str(text)) | ||||||
|         scores.append( score ); |         scores.append(score) | ||||||
|  |  | ||||||
| print("write gguf tokens") | print("write gguf tokens") | ||||||
|  |  | ||||||
| gguf_writer.write_string("tokenizer.ggml.model", "llama") | gguf_writer.write_tokenizer_model("llama") | ||||||
| gguf_writer.write_array("tokenizer.ggml.tokens",tokens) | gguf_writer.write_token_list(tokens) | ||||||
| gguf_writer.write_array("tokenizer.ggml.scores",scores) | gguf_writer.write_token_scores(scores) | ||||||
|  |  | ||||||
| # TENSORS | # TENSORS | ||||||
|  |  | ||||||
| @@ -197,7 +199,7 @@ for name in list_vars.keys(): | |||||||
|         print("  Skip tensor: " + name) |         print("  Skip tensor: " + name) | ||||||
|         continue |         continue | ||||||
|  |  | ||||||
|     ## permute these |     # permute these | ||||||
|     if name.endswith(".q_proj.weight") or name.endswith(".k_proj.weight"): |     if name.endswith(".q_proj.weight") or name.endswith(".k_proj.weight"): | ||||||
|         print("  Permute tensor: " + name) |         print("  Permute tensor: " + name) | ||||||
|         data = permute(data, hparams["num_attention_heads"]) |         data = permute(data, hparams["num_attention_heads"]) | ||||||
| @@ -221,7 +223,6 @@ for name in list_vars.keys(): | |||||||
|             data = data.astype(np.float32) |             data = data.astype(np.float32) | ||||||
|             ftype_cur = 0 |             ftype_cur = 0 | ||||||
|  |  | ||||||
|     gguf_writer.write_tensor_padding() |  | ||||||
|     gguf_writer.write_tensor(data) |     gguf_writer.write_tensor(data) | ||||||
|  |  | ||||||
| gguf_writer.close() | gguf_writer.close() | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 M. Yusuf Sarıgöz
					M. Yusuf Sarıgöz