mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	convert : make hf token optional (#14717)
* make hf token optional * fail if we can't get necessary tokenizer config
This commit is contained in:
		| @@ -7,7 +7,6 @@ import pathlib | ||||
| import re | ||||
|  | ||||
| import requests | ||||
| import sys | ||||
| import json | ||||
| import shutil | ||||
| import argparse | ||||
| @@ -69,8 +68,7 @@ args = parser.parse_args() | ||||
| hf_token = args.hf_token if args.hf_token is not None else hf_token | ||||
|  | ||||
| if hf_token is None: | ||||
|     logger.error("HF token is required. Please provide it as an argument or set it in ~/.cache/huggingface/token") | ||||
|     sys.exit(1) | ||||
|     logger.warning("HF token not found. You can provide it as an argument or set it in ~/.cache/huggingface/token") | ||||
|  | ||||
| # TODO: this string has to exercise as much pre-tokenizer functionality as possible | ||||
| #       will be updated with time - contributions welcome | ||||
| @@ -151,7 +149,7 @@ pre_computed_hashes = [ | ||||
|  | ||||
|  | ||||
| def download_file_with_auth(url, token, save_path): | ||||
|     headers = {"Authorization": f"Bearer {token}"} | ||||
|     headers = {"Authorization": f"Bearer {token}"} if token else None | ||||
|     response = sess.get(url, headers=headers) | ||||
|     response.raise_for_status() | ||||
|     os.makedirs(os.path.dirname(save_path), exist_ok=True) | ||||
| @@ -250,10 +248,9 @@ for model in [*pre_computed_hashes, *all_models]: | ||||
|     else: | ||||
|         # otherwise, compute the hash of the tokenizer | ||||
|  | ||||
|         # Skip if the tokenizer folder does not exist or there are other download issues previously | ||||
|         if not os.path.exists(f"models/tokenizers/{name}"): | ||||
|             logger.warning(f"Directory for tokenizer {name} not found. Skipping...") | ||||
|             continue | ||||
|         # Fail if the tokenizer folder with config does not exist or there are other download issues previously | ||||
|         if not os.path.isfile(f"models/tokenizers/{name}/tokenizer_config.json"): | ||||
|             raise OSError(f"Config for tokenizer {name} not found. The model may not exist or is not accessible with the provided token.") | ||||
|  | ||||
|         try: | ||||
|             logger.info(f"Loading tokenizer from {f'models/tokenizers/{name}'}...") | ||||
| @@ -261,9 +258,8 @@ for model in [*pre_computed_hashes, *all_models]: | ||||
|                 tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}", use_fast=False) | ||||
|             else: | ||||
|                 tokenizer = AutoTokenizer.from_pretrained(f"models/tokenizers/{name}") | ||||
|         except OSError as e: | ||||
|             logger.error(f"Error loading tokenizer for model {name}. The model may not exist or is not accessible with the provided token. Error: {e}") | ||||
|             continue  # Skip to the next model if the tokenizer can't be loaded | ||||
|         except Exception as e: | ||||
|             raise OSError(f"Error loading tokenizer for model {name}.") from e | ||||
|  | ||||
|         chktok = tokenizer.encode(CHK_TXT) | ||||
|         chkhsh = sha256(str(chktok).encode()).hexdigest() | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Sigbjørn Skjæret
					Sigbjørn Skjæret