mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	gguf-py : support lazy tensor splitting (#12809)
* gguf-py : support lazy tensor splitting Splitting usually involves returning tuples of tensors, which need to be handled properly to avoid early eager evaluation. * gguf-py : fix flake8 lint
This commit is contained in:
		| @@ -139,6 +139,16 @@ class LazyBase(ABC, metaclass=LazyMeta): | |||||||
|  |  | ||||||
|             if isinstance(res, cls._tensor_type): |             if isinstance(res, cls._tensor_type): | ||||||
|                 return cls(meta=cls.eager_to_meta(res), args=args, kwargs=kwargs, func=fn) |                 return cls(meta=cls.eager_to_meta(res), args=args, kwargs=kwargs, func=fn) | ||||||
|  |             elif isinstance(res, tuple) and all(isinstance(t, cls._tensor_type) for t in res): | ||||||
|  |                 # share the evaluation between lazy tuple elements | ||||||
|  |                 shared_args: list = [args, None] | ||||||
|  |  | ||||||
|  |                 def eager_tuple_element(a: list[Any], i: int = 0, /, **kw) -> LazyBase: | ||||||
|  |                     assert len(a) == 2 | ||||||
|  |                     if a[1] is None: | ||||||
|  |                         a[1] = fn(*a[0], **kw) | ||||||
|  |                     return a[1][i] | ||||||
|  |                 return tuple(cls(meta=cls.eager_to_meta(res[i]), args=(shared_args, i), kwargs=kwargs, func=eager_tuple_element) for i in range(len(res))) | ||||||
|             else: |             else: | ||||||
|                 del res  # not needed |                 del res  # not needed | ||||||
|                 # non-tensor return likely relies on the contents of the args |                 # non-tensor return likely relies on the contents of the args | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 compilade
					compilade