mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-29 08:41:22 +00:00
convert : fix reflinks for stacked MoE tensors
This commit is contained in:
@@ -462,7 +462,9 @@ class ModelBase:
|
|||||||
|
|
||||||
# workaround BF16 not being supported by Numpy
|
# workaround BF16 not being supported by Numpy
|
||||||
if data_torch.dtype == torch.bfloat16:
|
if data_torch.dtype == torch.bfloat16:
|
||||||
data_torch = data_torch.view(torch.uint8)
|
# Need a contiguous last dimension otherwise byte view doesn't work
|
||||||
|
# (problem can be reproduced with DeepSeek-V2-Lite-Chat)
|
||||||
|
data_torch = data_torch.contiguous().view(torch.uint8)
|
||||||
|
|
||||||
# if data ends up empty, it means data_torch was a scalar tensor -> restore
|
# if data ends up empty, it means data_torch was a scalar tensor -> restore
|
||||||
if len(data_torch.shape) == 0:
|
if len(data_torch.shape) == 0:
|
||||||
|
|||||||
@@ -30,7 +30,7 @@ from .constants import (
|
|||||||
)
|
)
|
||||||
|
|
||||||
from .quants import quant_shape_from_byte_shape
|
from .quants import quant_shape_from_byte_shape
|
||||||
from .utility import LocalTensorRange, best_alignment_offset, copy_tensor_ranges
|
from .utility import LocalTensorRange, best_alignment_offset, reflink_tensor_ranges
|
||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
@@ -470,7 +470,7 @@ class GGUFWriter:
|
|||||||
if self.use_reflinks and len(ranges := getattr(ti.tensor, "_ranges", ())) > 0:
|
if self.use_reflinks and len(ranges := getattr(ti.tensor, "_ranges", ())) > 0:
|
||||||
logger.debug(f"using reflinks for {name}")
|
logger.debug(f"using reflinks for {name}")
|
||||||
start_offset = fout.tell()
|
start_offset = fout.tell()
|
||||||
copy_tensor_ranges(fout, ranges, self.data_alignment)
|
reflink_tensor_ranges(fout, ranges, self.data_alignment)
|
||||||
self.write_padding(fout, fout.tell() - start_offset)
|
self.write_padding(fout, fout.tell() - start_offset)
|
||||||
else:
|
else:
|
||||||
ti.tensor.tofile(fout)
|
ti.tensor.tofile(fout)
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ class LazyMeta(ABCMeta):
|
|||||||
return type(self)._wrap_fn(
|
return type(self)._wrap_fn(
|
||||||
(lambda s, *args, **kwargs: getattr(s, name)(*args, **kwargs)),
|
(lambda s, *args, **kwargs: getattr(s, name)(*args, **kwargs)),
|
||||||
use_self=self,
|
use_self=self,
|
||||||
data_noop=name in ("view", "reshape", "squeeze", "unsqueeze"),
|
data_noop=name in ("view", "reshape", "squeeze", "unsqueeze", "contiguous"),
|
||||||
)
|
)
|
||||||
elif isinstance(meta_attr, self._tensor_type):
|
elif isinstance(meta_attr, self._tensor_type):
|
||||||
# e.g. self.T with torch.Tensor should still be wrapped
|
# e.g. self.T with torch.Tensor should still be wrapped
|
||||||
|
|||||||
@@ -297,7 +297,8 @@ def best_alignment_offset(ranges: tuple[LocalTensorRange, ...], alignment: int):
|
|||||||
best_offset = 0
|
best_offset = 0
|
||||||
best_size = 0
|
best_size = 0
|
||||||
for offset, size in hist.items():
|
for offset, size in hist.items():
|
||||||
if size > best_size:
|
# Ensure minimal alignment is 8-bytes (common with safetensors)
|
||||||
|
if size > best_size and offset % 8 == 0:
|
||||||
best_size = size
|
best_size = size
|
||||||
best_offset = offset
|
best_offset = offset
|
||||||
return best_offset
|
return best_offset
|
||||||
@@ -307,7 +308,7 @@ def best_alignment_offset(ranges: tuple[LocalTensorRange, ...], alignment: int):
|
|||||||
# Copy tensor ranges using os.copy_file_range with aligned offsets and sizes
|
# Copy tensor ranges using os.copy_file_range with aligned offsets and sizes
|
||||||
# to make it more likely that copy-on-write is used where possible.
|
# to make it more likely that copy-on-write is used where possible.
|
||||||
# Block alignment is necessary for BTRFS and XFS (and likely for ZFS too).
|
# Block alignment is necessary for BTRFS and XFS (and likely for ZFS too).
|
||||||
def copy_tensor_ranges(fout: BufferedWriter, ranges: tuple[LocalTensorRange, ...], alignment: int = 4096):
|
def reflink_tensor_ranges(fout: BufferedWriter, ranges: tuple[LocalTensorRange, ...], alignment: int = 4096):
|
||||||
assert len(ranges) > 0
|
assert len(ranges) > 0
|
||||||
dst_offset = fout.tell()
|
dst_offset = fout.tell()
|
||||||
assert dst_offset % alignment == 0, dst_offset % alignment
|
assert dst_offset % alignment == 0, dst_offset % alignment
|
||||||
@@ -335,26 +336,40 @@ def copy_tensor_ranges(fout: BufferedWriter, ranges: tuple[LocalTensorRange, ...
|
|||||||
src = src_files[r.filename]
|
src = src_files[r.filename]
|
||||||
if this_align_offset != align_offset:
|
if this_align_offset != align_offset:
|
||||||
logger.debug(f"copy-on-write can't be used ({i}/{len(ranges)})")
|
logger.debug(f"copy-on-write can't be used ({i}/{len(ranges)})")
|
||||||
if i > 0 and dst_offset % alignment != 0:
|
# relying on os.copy_file_range to fallback to a non-aligned copy
|
||||||
# Write the correct data between blocks even when they are non-consecutive
|
|
||||||
|
# Block 0, 1, 2, 3, 4,
|
||||||
|
# |___0000|0000000|0001111|1111111|111____|
|
||||||
|
#
|
||||||
|
# 1. blocks 0, 1 and 2 are copied from range[0] using os.copy_file_range
|
||||||
|
# 2. block 2 is partially overwritten with contents from range[1]
|
||||||
|
# 3. blocks 3 and 4 are copied from range[1] using os.copy_file_range
|
||||||
|
#
|
||||||
|
# (2 and 3 are repeated with further blocks if there are more ranges)
|
||||||
|
if i == 0:
|
||||||
|
extra_size = -align_offset
|
||||||
|
elif dst_offset % alignment == 0:
|
||||||
|
extra_size = 0
|
||||||
|
else:
|
||||||
extra_size = alignment - (dst_offset % alignment)
|
extra_size = alignment - (dst_offset % alignment)
|
||||||
|
extra_size = min(extra_size, r.size)
|
||||||
src.seek(r.offset)
|
src.seek(r.offset)
|
||||||
buf = src.read(extra_size)
|
buf = src.read(extra_size)
|
||||||
fout.seek(dst_offset)
|
fout.seek(dst_offset)
|
||||||
fout.write(buf)
|
fout.write(buf)
|
||||||
dst_offset += extra_size
|
dst_offset += extra_size
|
||||||
assert dst_offset % alignment == 0, dst_offset % alignment
|
if extra_size == r.size:
|
||||||
offset_src = r.offset + extra_size
|
continue
|
||||||
else:
|
|
||||||
# TODO: is this always correct?
|
|
||||||
offset_src = r.offset - align_offset
|
|
||||||
|
|
||||||
|
assert dst_offset % alignment == 0, dst_offset % alignment
|
||||||
|
|
||||||
|
offset_src = r.offset + extra_size
|
||||||
offset_src_end = r.offset + r.size
|
offset_src_end = r.offset + r.size
|
||||||
if offset_src_end % alignment != 0:
|
if offset_src_end % alignment != 0:
|
||||||
offset_src_end += alignment - (offset_src_end % alignment)
|
offset_src_end += alignment - (offset_src_end % alignment)
|
||||||
size = offset_src_end - offset_src
|
size = offset_src_end - offset_src
|
||||||
os.copy_file_range(src.fileno(), fout.fileno(), size, offset_src, dst_offset)
|
os.copy_file_range(src.fileno(), fout.fileno(), size, offset_src, dst_offset)
|
||||||
dst_offset += r.size
|
dst_offset += r.size - extra_size
|
||||||
|
|
||||||
for f in src_files.values():
|
for f in src_files.values():
|
||||||
f.close()
|
f.close()
|
||||||
|
|||||||
Reference in New Issue
Block a user