convert : detect filesystem block size for reflinks

* convert : use direct copies when possible

Using os.copy_file_range where available,
and falling back to shutil.copyfileobj otherwise.

* gguf : handle misaligned offset more cleanly
This commit is contained in:
Francis Couture-Harpin
2025-09-04 17:40:11 -04:00
parent 34bd024267
commit 6792f66a93
5 changed files with 132 additions and 104 deletions

View File

@@ -80,6 +80,7 @@ class ModelBase:
is_big_endian: bool
endianess: gguf.GGUFEndian
use_temp_file: bool
use_reflinks: bool
lazy: bool
dry_run: bool
hparams: dict[str, Any]
@@ -119,6 +120,7 @@ class ModelBase:
self.is_big_endian = is_big_endian
self.endianess = gguf.GGUFEndian.BIG if is_big_endian else gguf.GGUFEndian.LITTLE
self.use_temp_file = use_temp_file
self.use_reflinks = use_reflinks
self.lazy = not eager or (remote_hf_model_id is not None)
self.dry_run = dry_run
self.remote_hf_model_id = remote_hf_model_id
@@ -133,7 +135,7 @@ class ModelBase:
# Configure GGUF Writer
self.gguf_writer = gguf.GGUFWriter(path=None, arch=gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file,
split_max_tensors=split_max_tensors, split_max_size=split_max_size, dry_run=dry_run, small_first_shard=small_first_shard,
use_reflinks=use_reflinks)
use_reflinks=self.use_reflinks)
# Mistral specific
self.disable_mistral_community_chat_template = disable_mistral_community_chat_template
@@ -202,7 +204,7 @@ class ModelBase:
logger.info(f"gguf: indexing model part '{part_name}'")
ctx: ContextManager[Any]
if is_safetensors:
ctx = cast(ContextManager[Any], gguf.utility.SafetensorsLocal(self.dir_model / part_name))
ctx = cast(ContextManager[Any], gguf.utility.SafetensorsLocal(self.dir_model / part_name, reflink=self.use_reflinks))
else:
ctx = contextlib.nullcontext(torch.load(str(self.dir_model / part_name), map_location="cpu", mmap=True, weights_only=True))