mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-11 10:36:54 +00:00
Merge branch 'master' into compilade/parallel-convert
This commit is contained in:
@@ -248,6 +248,8 @@ class MODEL_ARCH(IntEnum):
|
||||
QWEN2 = auto()
|
||||
QWEN2MOE = auto()
|
||||
QWEN2VL = auto()
|
||||
QWEN3 = auto()
|
||||
QWEN3MOE = auto()
|
||||
PHI2 = auto()
|
||||
PHI3 = auto()
|
||||
PHIMOE = auto()
|
||||
@@ -278,6 +280,7 @@ class MODEL_ARCH(IntEnum):
|
||||
DEEPSEEK = auto()
|
||||
DEEPSEEK2 = auto()
|
||||
CHATGLM = auto()
|
||||
GLM4 = auto()
|
||||
BITNET = auto()
|
||||
T5 = auto()
|
||||
T5ENCODER = auto()
|
||||
@@ -453,6 +456,8 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
||||
MODEL_ARCH.QWEN2: "qwen2",
|
||||
MODEL_ARCH.QWEN2MOE: "qwen2moe",
|
||||
MODEL_ARCH.QWEN2VL: "qwen2vl",
|
||||
MODEL_ARCH.QWEN3: "qwen3",
|
||||
MODEL_ARCH.QWEN3MOE: "qwen3moe",
|
||||
MODEL_ARCH.PHI2: "phi2",
|
||||
MODEL_ARCH.PHI3: "phi3",
|
||||
MODEL_ARCH.PHIMOE: "phimoe",
|
||||
@@ -483,6 +488,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = {
|
||||
MODEL_ARCH.DEEPSEEK: "deepseek",
|
||||
MODEL_ARCH.DEEPSEEK2: "deepseek2",
|
||||
MODEL_ARCH.CHATGLM: "chatglm",
|
||||
MODEL_ARCH.GLM4: "glm4",
|
||||
MODEL_ARCH.BITNET: "bitnet",
|
||||
MODEL_ARCH.T5: "t5",
|
||||
MODEL_ARCH.T5ENCODER: "t5encoder",
|
||||
@@ -953,6 +959,40 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
||||
MODEL_TENSOR.FFN_DOWN_SHEXP,
|
||||
MODEL_TENSOR.FFN_UP_SHEXP,
|
||||
],
|
||||
MODEL_ARCH.QWEN3: [
|
||||
MODEL_TENSOR.TOKEN_EMBD,
|
||||
MODEL_TENSOR.OUTPUT_NORM,
|
||||
MODEL_TENSOR.OUTPUT,
|
||||
MODEL_TENSOR.ROPE_FREQS,
|
||||
MODEL_TENSOR.ATTN_NORM,
|
||||
MODEL_TENSOR.ATTN_Q,
|
||||
MODEL_TENSOR.ATTN_Q_NORM,
|
||||
MODEL_TENSOR.ATTN_K,
|
||||
MODEL_TENSOR.ATTN_K_NORM,
|
||||
MODEL_TENSOR.ATTN_V,
|
||||
MODEL_TENSOR.ATTN_OUT,
|
||||
MODEL_TENSOR.FFN_NORM,
|
||||
MODEL_TENSOR.FFN_GATE,
|
||||
MODEL_TENSOR.FFN_DOWN,
|
||||
MODEL_TENSOR.FFN_UP,
|
||||
],
|
||||
MODEL_ARCH.QWEN3MOE: [
|
||||
MODEL_TENSOR.TOKEN_EMBD,
|
||||
MODEL_TENSOR.OUTPUT_NORM,
|
||||
MODEL_TENSOR.OUTPUT,
|
||||
MODEL_TENSOR.ATTN_NORM,
|
||||
MODEL_TENSOR.ATTN_Q,
|
||||
MODEL_TENSOR.ATTN_Q_NORM,
|
||||
MODEL_TENSOR.ATTN_K,
|
||||
MODEL_TENSOR.ATTN_K_NORM,
|
||||
MODEL_TENSOR.ATTN_V,
|
||||
MODEL_TENSOR.ATTN_OUT,
|
||||
MODEL_TENSOR.FFN_NORM,
|
||||
MODEL_TENSOR.FFN_GATE_INP,
|
||||
MODEL_TENSOR.FFN_GATE_EXP,
|
||||
MODEL_TENSOR.FFN_DOWN_EXP,
|
||||
MODEL_TENSOR.FFN_UP_EXP,
|
||||
],
|
||||
MODEL_ARCH.PLAMO: [
|
||||
MODEL_TENSOR.TOKEN_EMBD,
|
||||
MODEL_TENSOR.OUTPUT_NORM,
|
||||
@@ -1523,6 +1563,23 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = {
|
||||
MODEL_TENSOR.FFN_DOWN,
|
||||
MODEL_TENSOR.FFN_UP,
|
||||
],
|
||||
MODEL_ARCH.GLM4 : [
|
||||
MODEL_TENSOR.TOKEN_EMBD,
|
||||
MODEL_TENSOR.ROPE_FREQS,
|
||||
MODEL_TENSOR.OUTPUT_NORM,
|
||||
MODEL_TENSOR.OUTPUT,
|
||||
MODEL_TENSOR.ATTN_NORM,
|
||||
MODEL_TENSOR.ATTN_QKV,
|
||||
MODEL_TENSOR.ATTN_Q,
|
||||
MODEL_TENSOR.ATTN_K,
|
||||
MODEL_TENSOR.ATTN_V,
|
||||
MODEL_TENSOR.ATTN_OUT,
|
||||
MODEL_TENSOR.FFN_NORM,
|
||||
MODEL_TENSOR.FFN_DOWN,
|
||||
MODEL_TENSOR.FFN_UP,
|
||||
MODEL_TENSOR.ATTN_POST_NORM,
|
||||
MODEL_TENSOR.FFN_POST_NORM,
|
||||
],
|
||||
MODEL_ARCH.BITNET: [
|
||||
MODEL_TENSOR.ATTN_Q,
|
||||
MODEL_TENSOR.ATTN_K,
|
||||
|
||||
@@ -13,7 +13,7 @@ class TensorNameMap:
|
||||
"transformer.wte", # gpt2 gpt-j mpt refact qwen dbrx jais exaone
|
||||
"transformer.word_embeddings", # falcon
|
||||
"word_embeddings", # bloom
|
||||
"model.embed_tokens", # llama-hf nemotron olmoe olmo2 rwkv6qwen2
|
||||
"model.embed_tokens", # llama-hf nemotron olmoe olmo2 rwkv6qwen2 glm4-0414
|
||||
"tok_embeddings", # llama-pth
|
||||
"embeddings.word_embeddings", # bert nomic-bert
|
||||
"language_model.embedding.word_embeddings", # persimmon
|
||||
@@ -30,6 +30,7 @@ class TensorNameMap:
|
||||
"rwkv.embeddings", # rwkv6
|
||||
"model.embeddings", # rwkv7
|
||||
"model.word_embeddings", # bailingmoe
|
||||
"language_model.model.embed_tokens", # llama4
|
||||
),
|
||||
|
||||
# Token type embeddings
|
||||
@@ -67,6 +68,7 @@ class TensorNameMap:
|
||||
"output_layer", # chatglm
|
||||
"head", # rwkv
|
||||
"head.out", # wavtokenizer
|
||||
"language_model.lm_head", # llama4
|
||||
),
|
||||
|
||||
# Output norm
|
||||
@@ -89,6 +91,7 @@ class TensorNameMap:
|
||||
"rwkv.ln_out", # rwkv6
|
||||
"model.ln_out", # rwkv7
|
||||
"backbone.final_layer_norm", # wavtokenizer
|
||||
"language_model.model.norm", # llama4
|
||||
),
|
||||
|
||||
# Rope frequencies
|
||||
@@ -130,6 +133,7 @@ class TensorNameMap:
|
||||
"transformer.layers.{bid}.attn_norm", # openelm
|
||||
"rwkv.blocks.{bid}.ln1", # rwkv6
|
||||
"model.layers.{bid}.ln1", # rwkv7
|
||||
"language_model.model.layers.{bid}.input_layernorm", # llama4
|
||||
),
|
||||
|
||||
# Attention norm 2
|
||||
@@ -169,6 +173,7 @@ class TensorNameMap:
|
||||
"model.layers.{bid}.attention.wq", # internlm2
|
||||
"transformer.decoder_layer.{bid}.multi_head_attention.query",# Grok
|
||||
"transformer.h.{bid}.attn.attention.q_proj", # exaone
|
||||
"language_model.model.layers.{bid}.self_attn.q_proj", # llama4
|
||||
),
|
||||
|
||||
# Attention key
|
||||
@@ -183,6 +188,7 @@ class TensorNameMap:
|
||||
"model.layers.{bid}.attention.wk", # internlm2
|
||||
"transformer.decoder_layer.{bid}.multi_head_attention.key",# Grok
|
||||
"transformer.h.{bid}.attn.attention.k_proj", # exaone
|
||||
"language_model.model.layers.{bid}.self_attn.k_proj", # llama4
|
||||
),
|
||||
|
||||
# Attention value
|
||||
@@ -196,6 +202,7 @@ class TensorNameMap:
|
||||
"model.layers.{bid}.attention.wv", # internlm2
|
||||
"transformer.decoder_layer.{bid}.multi_head_attention.value",# Grok
|
||||
"transformer.h.{bid}.attn.attention.v_proj", # exaone
|
||||
"language_model.model.layers.{bid}.self_attn.v_proj", # llama4
|
||||
),
|
||||
|
||||
# Attention output
|
||||
@@ -222,6 +229,7 @@ class TensorNameMap:
|
||||
"encoder.layers.{bid}.self_attention.dense", # chatglm
|
||||
"transformer.layers.{bid}.attn.out_proj", # openelm
|
||||
"transformer.h.{bid}.attn.attention.out_proj", # exaone
|
||||
"language_model.model.layers.{bid}.self_attn.o_proj", # llama4
|
||||
),
|
||||
|
||||
# Attention output norm
|
||||
@@ -233,7 +241,8 @@ class TensorNameMap:
|
||||
),
|
||||
|
||||
MODEL_TENSOR.ATTN_POST_NORM: (
|
||||
"model.layers.{bid}.post_attention_layernorm", # gemma2 olmo2
|
||||
"model.layers.{bid}.post_attention_layernorm", # gemma2 olmo2 # ge
|
||||
"model.layers.{bid}.post_self_attn_layernorm", # glm-4-0414
|
||||
),
|
||||
|
||||
# Rotary embeddings
|
||||
@@ -259,6 +268,7 @@ class TensorNameMap:
|
||||
"transformer.decoder_layer.{bid}.rms_norm_2", # Grok
|
||||
"encoder.layers.{bid}.post_attention_layernorm", # chatglm
|
||||
"transformer.layers.{bid}.ffn_norm", # openelm
|
||||
"language_model.model.layers.{bid}.post_attention_layernorm", # llama4
|
||||
),
|
||||
|
||||
# Post feed-forward norm
|
||||
@@ -269,6 +279,7 @@ class TensorNameMap:
|
||||
# Post feed-forward norm
|
||||
MODEL_TENSOR.FFN_POST_NORM: (
|
||||
"model.layers.{bid}.post_feedforward_layernorm", # gemma2 olmo2
|
||||
"model.layers.{bid}.post_mlp_layernorm", # glm-4-0414
|
||||
),
|
||||
|
||||
MODEL_TENSOR.FFN_GATE_INP: (
|
||||
@@ -278,6 +289,7 @@ class TensorNameMap:
|
||||
"transformer.decoder_layer.{bid}.router", # Grok
|
||||
"transformer.blocks.{bid}.ffn.router.layer", # dbrx
|
||||
"model.layers.{bid}.block_sparse_moe.router.layer", # granitemoe
|
||||
"language_model.model.layers.{bid}.feed_forward.router", # llama4
|
||||
),
|
||||
|
||||
MODEL_TENSOR.FFN_GATE_INP_SHEXP: (
|
||||
@@ -306,7 +318,7 @@ class TensorNameMap:
|
||||
"h.{bid}.mlp.c_fc", # gpt2
|
||||
"transformer.h.{bid}.mlp.fc1", # phi2
|
||||
"model.layers.{bid}.mlp.fc1", # phi2
|
||||
"model.layers.{bid}.mlp.gate_up_proj", # phi3
|
||||
"model.layers.{bid}.mlp.gate_up_proj", # phi3 glm-4-0414
|
||||
"model.layers.layers.{bid}.mlp.up_proj", # plamo
|
||||
"model.layers.{bid}.feed_forward.w3", # internlm2
|
||||
"encoder.layers.{bid}.mlp.fc11", # nomic-bert
|
||||
@@ -315,6 +327,7 @@ class TensorNameMap:
|
||||
"model.layers.{bid}.residual_mlp.w3", # arctic
|
||||
"encoder.layers.{bid}.mlp.dense_h_to_4h", # chatglm
|
||||
"transformer.h.{bid}.mlp.c_fc_1", # exaone
|
||||
"language_model.model.layers.{bid}.feed_forward.up_proj", # llama4
|
||||
),
|
||||
|
||||
MODEL_TENSOR.FFN_UP_EXP: (
|
||||
@@ -323,11 +336,13 @@ class TensorNameMap:
|
||||
"transformer.blocks.{bid}.ffn.experts.mlp.v1", # dbrx
|
||||
"model.layers.{bid}.mlp.experts.up_proj", # qwen2moe olmoe (merged)
|
||||
"model.layers.{bid}.block_sparse_moe.experts.w3", # phimoe (merged)
|
||||
"language_model.model.layers.{bid}.feed_forward.experts.up_proj", # llama4
|
||||
),
|
||||
|
||||
MODEL_TENSOR.FFN_UP_SHEXP: (
|
||||
"model.layers.{bid}.mlp.shared_expert.up_proj", # qwen2moe
|
||||
"model.layers.{bid}.mlp.shared_experts.up_proj", # deepseek deepseek2
|
||||
"language_model.model.layers.{bid}.feed_forward.shared_expert.up_proj", # llama4
|
||||
),
|
||||
|
||||
# AWQ-activation gate
|
||||
@@ -348,6 +363,7 @@ class TensorNameMap:
|
||||
"transformer.h.{bid}.mlp.linear_1", # refact
|
||||
"model.layers.{bid}.residual_mlp.w1", # arctic
|
||||
"transformer.h.{bid}.mlp.c_fc_0", # exaone
|
||||
"language_model.model.layers.{bid}.feed_forward.gate_proj", # llama4
|
||||
),
|
||||
|
||||
MODEL_TENSOR.FFN_GATE_EXP: (
|
||||
@@ -356,11 +372,13 @@ class TensorNameMap:
|
||||
"transformer.blocks.{bid}.ffn.experts.mlp.w1", # dbrx
|
||||
"model.layers.{bid}.mlp.experts.gate_proj", # qwen2moe olmoe (merged)
|
||||
"model.layers.{bid}.block_sparse_moe.experts.w1", # phimoe (merged)
|
||||
"language_model.model.layers.{bid}.feed_forward.experts.gate_proj", # llama4
|
||||
),
|
||||
|
||||
MODEL_TENSOR.FFN_GATE_SHEXP: (
|
||||
"model.layers.{bid}.mlp.shared_expert.gate_proj", # qwen2moe
|
||||
"model.layers.{bid}.mlp.shared_experts.gate_proj", # deepseek deepseek2
|
||||
"language_model.model.layers.{bid}.feed_forward.shared_expert.gate_proj", # llama4
|
||||
),
|
||||
|
||||
# Feed-forward down
|
||||
@@ -389,6 +407,7 @@ class TensorNameMap:
|
||||
"encoder.layer.{bid}.mlp.down_layer", # jina-bert-v2
|
||||
"encoder.layers.{bid}.mlp.dense_4h_to_h", # chatglm
|
||||
"model.layers.h.{bid}.mlp.c_proj", # exaone
|
||||
"language_model.model.layers.{bid}.feed_forward.down_proj", # llama4
|
||||
),
|
||||
|
||||
MODEL_TENSOR.FFN_DOWN_EXP: (
|
||||
@@ -398,11 +417,13 @@ class TensorNameMap:
|
||||
"model.layers.{bid}.mlp.experts.down_proj", # qwen2moe olmoe (merged)
|
||||
"model.layers.{bid}.block_sparse_moe.output_linear", # granitemoe
|
||||
"model.layers.{bid}.block_sparse_moe.experts.w2", # phimoe (merged)
|
||||
"language_model.model.layers.{bid}.feed_forward.experts.down_proj", # llama4
|
||||
),
|
||||
|
||||
MODEL_TENSOR.FFN_DOWN_SHEXP: (
|
||||
"model.layers.{bid}.mlp.shared_expert.down_proj", # qwen2moe
|
||||
"model.layers.{bid}.mlp.shared_experts.down_proj", # deepseek deepseek2
|
||||
"language_model.model.layers.{bid}.feed_forward.shared_expert.down_proj", # llama4
|
||||
),
|
||||
|
||||
MODEL_TENSOR.ATTN_Q_NORM: (
|
||||
|
||||
@@ -1,7 +1,11 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from dataclasses import dataclass
|
||||
from typing import Literal
|
||||
|
||||
import os
|
||||
import json
|
||||
|
||||
|
||||
def fill_templated_filename(filename: str, output_type: str | None) -> str:
|
||||
# Given a file name fill in any type templates e.g. 'some-model-name.{ftype}.gguf'
|
||||
@@ -67,3 +71,194 @@ def naming_convention(model_name: str | None, base_name: str | None, finetune_st
|
||||
kind = f"-{model_type.strip().replace(' ', '-')}" if model_type is not None else ""
|
||||
|
||||
return f"{name}{parameters}{finetune}{version}{encoding}{kind}"
|
||||
|
||||
|
||||
@dataclass
|
||||
class RemoteTensor:
|
||||
dtype: str
|
||||
shape: tuple[int, ...]
|
||||
offset_start: int
|
||||
size: int
|
||||
url: str
|
||||
|
||||
def data(self) -> bytearray:
|
||||
# TODO: handle request errors (maybe with limited retries?)
|
||||
# NOTE: using a bytearray, otherwise PyTorch complains the buffer is not writeable
|
||||
data = bytearray(SafetensorRemote.get_data_by_range(url=self.url, start=self.offset_start, size=self.size))
|
||||
return data
|
||||
|
||||
|
||||
class SafetensorRemote:
|
||||
"""
|
||||
Uility class to handle remote safetensor files.
|
||||
This class is designed to work with Hugging Face model repositories.
|
||||
|
||||
Example (one model has single safetensor file, the other has multiple):
|
||||
for model_id in ["ngxson/TEST-Tiny-Llama4", "Qwen/Qwen2.5-7B-Instruct"]:
|
||||
tensors = SafetensorRemote.get_list_tensors_hf_model(model_id)
|
||||
print(tensors)
|
||||
|
||||
Example reading tensor data:
|
||||
tensors = SafetensorRemote.get_list_tensors_hf_model(model_id)
|
||||
for name, meta in tensors.items():
|
||||
dtype, shape, offset_start, size, remote_safetensor_url = meta
|
||||
# read the tensor data
|
||||
data = SafetensorRemote.get_data_by_range(remote_safetensor_url, offset_start, size)
|
||||
print(data)
|
||||
"""
|
||||
|
||||
BASE_DOMAIN = "https://huggingface.co"
|
||||
ALIGNMENT = 8 # bytes
|
||||
|
||||
@classmethod
|
||||
def get_list_tensors_hf_model(cls, model_id: str) -> dict[str, RemoteTensor]:
|
||||
"""
|
||||
Get list of tensors from a Hugging Face model repository.
|
||||
|
||||
Returns a dictionary of tensor names and their metadata.
|
||||
Each tensor is represented as a tuple of (dtype, shape, offset_start, size, remote_safetensor_url)
|
||||
"""
|
||||
# case 1: model has only one single model.safetensor file
|
||||
is_single_file = cls.check_file_exist(f"{cls.BASE_DOMAIN}/{model_id}/resolve/main/model.safetensors")
|
||||
if is_single_file:
|
||||
url = f"{cls.BASE_DOMAIN}/{model_id}/resolve/main/model.safetensors"
|
||||
return cls.get_list_tensors(url)
|
||||
|
||||
# case 2: model has multiple files
|
||||
index_url = f"{cls.BASE_DOMAIN}/{model_id}/resolve/main/model.safetensors.index.json"
|
||||
is_multiple_files = cls.check_file_exist(index_url)
|
||||
if is_multiple_files:
|
||||
# read the index file
|
||||
index_data = cls.get_data_by_range(index_url, 0)
|
||||
index_str = index_data.decode('utf-8')
|
||||
index_json = json.loads(index_str)
|
||||
assert index_json.get("weight_map") is not None, "weight_map not found in index file"
|
||||
weight_map = index_json["weight_map"]
|
||||
# get the list of files
|
||||
all_files = list(set(weight_map.values()))
|
||||
all_files.sort() # make sure we load shard files in order
|
||||
# get the list of tensors
|
||||
tensors: dict[str, RemoteTensor] = {}
|
||||
for file in all_files:
|
||||
url = f"{cls.BASE_DOMAIN}/{model_id}/resolve/main/{file}"
|
||||
for key, val in cls.get_list_tensors(url).items():
|
||||
tensors[key] = val
|
||||
return tensors
|
||||
|
||||
raise ValueError(f"Model {model_id} does not have any safetensor files")
|
||||
|
||||
@classmethod
|
||||
def get_list_tensors(cls, url: str) -> dict[str, RemoteTensor]:
|
||||
"""
|
||||
Get list of tensors from a remote safetensor file.
|
||||
|
||||
Returns a dictionary of tensor names and their metadata.
|
||||
Each tensor is represented as a tuple of (dtype, shape, offset_start, size)
|
||||
"""
|
||||
metadata, data_start_offset = cls.get_metadata(url)
|
||||
res: dict[str, RemoteTensor] = {}
|
||||
|
||||
for name, meta in metadata.items():
|
||||
if name == "__metadata__":
|
||||
continue
|
||||
if not isinstance(meta, dict):
|
||||
raise ValueError(f"Invalid metadata for tensor '{name}': {meta}")
|
||||
try:
|
||||
dtype = meta["dtype"]
|
||||
shape = meta["shape"]
|
||||
offset_start_relative, offset_end_relative = meta["data_offsets"]
|
||||
size = offset_end_relative - offset_start_relative
|
||||
offset_start = data_start_offset + offset_start_relative
|
||||
res[name] = RemoteTensor(dtype=dtype, shape=tuple(shape), offset_start=offset_start, size=size, url=url)
|
||||
except KeyError as e:
|
||||
raise ValueError(f"Missing key in metadata for tensor '{name}': {e}, meta = {meta}")
|
||||
|
||||
return res
|
||||
|
||||
@classmethod
|
||||
def get_metadata(cls, url: str) -> tuple[dict, int]:
|
||||
"""
|
||||
Get JSON metadata from a remote safetensor file.
|
||||
|
||||
Returns tuple of (metadata, data_start_offset)
|
||||
"""
|
||||
# Request first 5MB of the file (hopefully enough for metadata)
|
||||
read_size = 5 * 1024 * 1024
|
||||
raw_data = cls.get_data_by_range(url, 0, read_size)
|
||||
|
||||
# Parse header
|
||||
# First 8 bytes contain the metadata length as u64 little-endian
|
||||
if len(raw_data) < 8:
|
||||
raise ValueError("Not enough data to read metadata size")
|
||||
metadata_length = int.from_bytes(raw_data[:8], byteorder='little')
|
||||
|
||||
# Calculate the data start offset
|
||||
data_start_offset = 8 + metadata_length
|
||||
alignment = SafetensorRemote.ALIGNMENT
|
||||
if data_start_offset % alignment != 0:
|
||||
data_start_offset += alignment - (data_start_offset % alignment)
|
||||
|
||||
# Check if we have enough data to read the metadata
|
||||
if len(raw_data) < 8 + metadata_length:
|
||||
raise ValueError(f"Could not read complete metadata. Need {8 + metadata_length} bytes, got {len(raw_data)}")
|
||||
|
||||
# Extract metadata bytes and parse as JSON
|
||||
metadata_bytes = raw_data[8:8 + metadata_length]
|
||||
metadata_str = metadata_bytes.decode('utf-8')
|
||||
try:
|
||||
metadata = json.loads(metadata_str)
|
||||
return metadata, data_start_offset
|
||||
except json.JSONDecodeError as e:
|
||||
raise ValueError(f"Failed to parse safetensor metadata as JSON: {e}")
|
||||
|
||||
@classmethod
|
||||
def get_data_by_range(cls, url: str, start: int, size: int = -1) -> bytes:
|
||||
"""
|
||||
Get raw byte data from a remote file by range.
|
||||
If size is not specified, it will read the entire file.
|
||||
"""
|
||||
import requests
|
||||
from urllib.parse import urlparse
|
||||
|
||||
parsed_url = urlparse(url)
|
||||
if not parsed_url.scheme or not parsed_url.netloc:
|
||||
raise ValueError(f"Invalid URL: {url}")
|
||||
|
||||
headers = cls._get_request_headers()
|
||||
if size > -1:
|
||||
headers["Range"] = f"bytes={start}-{start + size}"
|
||||
response = requests.get(url, allow_redirects=True, headers=headers)
|
||||
response.raise_for_status()
|
||||
|
||||
# Get raw byte data
|
||||
return response.content[:size]
|
||||
|
||||
@classmethod
|
||||
def check_file_exist(cls, url: str) -> bool:
|
||||
"""
|
||||
Check if a file exists at the given URL.
|
||||
Returns True if the file exists, False otherwise.
|
||||
"""
|
||||
import requests
|
||||
from urllib.parse import urlparse
|
||||
|
||||
parsed_url = urlparse(url)
|
||||
if not parsed_url.scheme or not parsed_url.netloc:
|
||||
raise ValueError(f"Invalid URL: {url}")
|
||||
|
||||
try:
|
||||
headers = cls._get_request_headers()
|
||||
headers["Range"] = "bytes=0-0"
|
||||
response = requests.head(url, allow_redirects=True, headers=headers)
|
||||
# Success (2xx) or redirect (3xx)
|
||||
return 200 <= response.status_code < 400
|
||||
except requests.RequestException:
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
def _get_request_headers(cls) -> dict[str, str]:
|
||||
"""Prepare common headers for requests."""
|
||||
headers = {"User-Agent": "convert_hf_to_gguf"}
|
||||
if os.environ.get("HF_TOKEN"):
|
||||
headers["Authorization"] = f"Bearer {os.environ['HF_TOKEN']}"
|
||||
return headers
|
||||
|
||||
Reference in New Issue
Block a user