mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-10 10:27:03 +00:00
convert : use writeable buffer for remote lazy tensors
This commit is contained in:
@@ -5416,7 +5416,8 @@ class LazyTorchTensor(gguf.LazyBase):
|
|||||||
dtype = cls._dtype_str_map[remote_tensor.dtype]
|
dtype = cls._dtype_str_map[remote_tensor.dtype]
|
||||||
shape = remote_tensor.shape
|
shape = remote_tensor.shape
|
||||||
meta = cls.meta_with_dtype_and_shape(dtype, shape)
|
meta = cls.meta_with_dtype_and_shape(dtype, shape)
|
||||||
lazy = cls(meta=meta, args=(remote_tensor,), func=lambda r: torch.frombuffer(r.data(), dtype=dtype).reshape(shape))
|
func = lambda r: torch.frombuffer(r.data(), dtype=dtype).reshape(shape)
|
||||||
|
lazy = cls(meta=meta, args=(remote_tensor,), func=func)
|
||||||
return cast(torch.Tensor, lazy)
|
return cast(torch.Tensor, lazy)
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
|
|||||||
@@ -81,9 +81,10 @@ class RemoteTensor:
|
|||||||
size: int
|
size: int
|
||||||
url: str
|
url: str
|
||||||
|
|
||||||
def data(self) -> bytes:
|
def data(self) -> bytearray:
|
||||||
# TODO: handle request errors (maybe with limited retries?)
|
# TODO: handle request errors (maybe with limited retries?)
|
||||||
data = SafetensorRemote.get_data_by_range(url=self.url, start=self.offset_start, size=self.size)
|
# NOTE: using a bytearray, otherwise PyTorch complains the buffer is not writeable
|
||||||
|
data = bytearray(SafetensorRemote.get_data_by_range(url=self.url, start=self.offset_start, size=self.size))
|
||||||
return data
|
return data
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user