mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-02 09:12:03 +00:00
convert : write tensors in parallel
This commit is contained in:
@@ -73,7 +73,7 @@ class Model:
|
||||
use_temp_file: bool = False, eager: bool = False,
|
||||
metadata_override: Path | None = None, model_name: str | None = None,
|
||||
split_max_tensors: int = 0, split_max_size: int = 0, dry_run: bool = False,
|
||||
small_first_shard: bool = False, hparams: dict[str, Any] | None = None):
|
||||
small_first_shard: bool = False, hparams: dict[str, Any] | None = None, thread_count: int = 2):
|
||||
if type(self) is Model:
|
||||
raise TypeError(f"{type(self).__name__!r} should not be directly instantiated")
|
||||
|
||||
@@ -109,7 +109,8 @@ class Model:
|
||||
|
||||
# Configure GGUF Writer
|
||||
self.gguf_writer = gguf.GGUFWriter(path=None, arch=gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file,
|
||||
split_max_tensors=split_max_tensors, split_max_size=split_max_size, dry_run=dry_run, small_first_shard=small_first_shard)
|
||||
split_max_tensors=split_max_tensors, split_max_size=split_max_size, dry_run=dry_run, small_first_shard=small_first_shard,
|
||||
thread_count=thread_count)
|
||||
|
||||
@classmethod
|
||||
def __init_subclass__(cls):
|
||||
@@ -5470,6 +5471,10 @@ def parse_args() -> argparse.Namespace:
|
||||
"--print-supported-models", action="store_true",
|
||||
help="Print the supported models"
|
||||
)
|
||||
parser.add_argument(
|
||||
"-t", "--threads", type=int, default=2,
|
||||
help="Number of threads to use when writing the tensors. Make sure you have enough RAM for at least THREADS of the biggest tensors in the model when setting this.",
|
||||
)
|
||||
|
||||
args = parser.parse_args()
|
||||
if not args.print_supported_models and args.model is None:
|
||||
@@ -5554,7 +5559,7 @@ def main() -> None:
|
||||
metadata_override=args.metadata, model_name=args.model_name,
|
||||
split_max_tensors=args.split_max_tensors,
|
||||
split_max_size=split_str_to_n_bytes(args.split_max_size), dry_run=args.dry_run,
|
||||
small_first_shard=args.no_tensor_first_split)
|
||||
small_first_shard=args.no_tensor_first_split, thread_count=args.threads)
|
||||
|
||||
if args.vocab_only:
|
||||
logger.info("Exporting model vocab...")
|
||||
|
||||
Reference in New Issue
Block a user