mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-09 10:17:06 +00:00
support HF_TOKEN
This commit is contained in:
@@ -5498,7 +5498,7 @@ def parse_args() -> argparse.Namespace:
|
|||||||
)
|
)
|
||||||
parser.add_argument(
|
parser.add_argument(
|
||||||
"--remote", action="store_true",
|
"--remote", action="store_true",
|
||||||
help="(Experimental) Read safetensors file remotely without downloading to disk. Config and tokenizer files will still be downloaded. To use this feature, you need to specify Hugging Face model repo name instead of a local directory. For example: 'HuggingFaceTB/SmolLM2-1.7B-Instruct'",
|
help="(Experimental) Read safetensors file remotely without downloading to disk. Config and tokenizer files will still be downloaded. To use this feature, you need to specify Hugging Face model repo name instead of a local directory. For example: 'HuggingFaceTB/SmolLM2-1.7B-Instruct'. Note: To access gated repo, set HF_TOKEN environment variable to your Hugging Face token.",
|
||||||
)
|
)
|
||||||
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
|||||||
@@ -3,6 +3,7 @@ from __future__ import annotations
|
|||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Literal
|
from typing import Literal
|
||||||
|
|
||||||
|
import os
|
||||||
import json
|
import json
|
||||||
|
|
||||||
|
|
||||||
@@ -94,7 +95,7 @@ class SafetensorRemote:
|
|||||||
Example (one model has single safetensor file, the other has multiple):
|
Example (one model has single safetensor file, the other has multiple):
|
||||||
for model_id in ["ngxson/TEST-Tiny-Llama4", "Qwen/Qwen2.5-7B-Instruct"]:
|
for model_id in ["ngxson/TEST-Tiny-Llama4", "Qwen/Qwen2.5-7B-Instruct"]:
|
||||||
tensors = SafetensorRemote.get_list_tensors_hf_model(model_id)
|
tensors = SafetensorRemote.get_list_tensors_hf_model(model_id)
|
||||||
print(json.dumps(tensors, indent=2))
|
print(tensors)
|
||||||
|
|
||||||
Example reading tensor data:
|
Example reading tensor data:
|
||||||
tensors = SafetensorRemote.get_list_tensors_hf_model(model_id)
|
tensors = SafetensorRemote.get_list_tensors_hf_model(model_id)
|
||||||
@@ -223,8 +224,10 @@ class SafetensorRemote:
|
|||||||
raise ValueError(f"Invalid URL: {url}")
|
raise ValueError(f"Invalid URL: {url}")
|
||||||
|
|
||||||
headers = {}
|
headers = {}
|
||||||
|
if os.environ.get("HF_TOKEN"):
|
||||||
|
headers["Authorization"] = f"Bearer {os.environ['HF_TOKEN']}"
|
||||||
if size > -1:
|
if size > -1:
|
||||||
headers = {"Range": f"bytes={start}-{start + size}"}
|
headers["Range"] = f"bytes={start}-{start + size}"
|
||||||
response = requests.get(url, allow_redirects=True, headers=headers)
|
response = requests.get(url, allow_redirects=True, headers=headers)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
|
|
||||||
@@ -246,6 +249,8 @@ class SafetensorRemote:
|
|||||||
|
|
||||||
try:
|
try:
|
||||||
headers = {"Range": "bytes=0-0"}
|
headers = {"Range": "bytes=0-0"}
|
||||||
|
if os.environ.get("HF_TOKEN"):
|
||||||
|
headers["Authorization"] = f"Bearer {os.environ['HF_TOKEN']}"
|
||||||
response = requests.head(url, allow_redirects=True, headers=headers)
|
response = requests.head(url, allow_redirects=True, headers=headers)
|
||||||
# Success (2xx) or redirect (3xx)
|
# Success (2xx) or redirect (3xx)
|
||||||
return 200 <= response.status_code < 400
|
return 200 <= response.status_code < 400
|
||||||
|
|||||||
Reference in New Issue
Block a user