mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	Merge branch 'master' into compilade/refactor-kv-cache
This commit is contained in:
		@@ -47,11 +47,12 @@ class Model:
 | 
			
		||||
    _model_classes: dict[str, type[Model]] = {}
 | 
			
		||||
 | 
			
		||||
    dir_model: Path
 | 
			
		||||
    ftype: int
 | 
			
		||||
    ftype: gguf.LlamaFileType
 | 
			
		||||
    is_big_endian: bool
 | 
			
		||||
    endianess: gguf.GGUFEndian
 | 
			
		||||
    use_temp_file: bool
 | 
			
		||||
    lazy: bool
 | 
			
		||||
    model_name: str | None
 | 
			
		||||
    part_names: list[str]
 | 
			
		||||
    is_safetensors: bool
 | 
			
		||||
    hparams: dict[str, Any]
 | 
			
		||||
@@ -64,7 +65,7 @@ class Model:
 | 
			
		||||
    # subclasses should define this!
 | 
			
		||||
    model_arch: gguf.MODEL_ARCH
 | 
			
		||||
 | 
			
		||||
    def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool, use_temp_file: bool, eager: bool):
 | 
			
		||||
    def __init__(self, dir_model: Path, ftype: gguf.LlamaFileType, fname_out: Path, is_big_endian: bool, use_temp_file: bool, eager: bool, model_name: str | None):
 | 
			
		||||
        if type(self) is Model:
 | 
			
		||||
            raise TypeError(f"{type(self).__name__!r} should not be directly instantiated")
 | 
			
		||||
        self.dir_model = dir_model
 | 
			
		||||
@@ -73,10 +74,11 @@ class Model:
 | 
			
		||||
        self.endianess = gguf.GGUFEndian.BIG if is_big_endian else gguf.GGUFEndian.LITTLE
 | 
			
		||||
        self.use_temp_file = use_temp_file
 | 
			
		||||
        self.lazy = not eager
 | 
			
		||||
        self.part_names = Model.get_model_part_names(self.dir_model, ".safetensors")
 | 
			
		||||
        self.model_name = model_name
 | 
			
		||||
        self.part_names = Model.get_model_part_names(self.dir_model, "model", ".safetensors")
 | 
			
		||||
        self.is_safetensors = len(self.part_names) > 0
 | 
			
		||||
        if not self.is_safetensors:
 | 
			
		||||
            self.part_names = Model.get_model_part_names(self.dir_model, ".bin")
 | 
			
		||||
            self.part_names = Model.get_model_part_names(self.dir_model, "pytorch_model", ".bin")
 | 
			
		||||
        self.hparams = Model.load_hparams(self.dir_model)
 | 
			
		||||
        self.block_count = self.find_hparam(["n_layers", "num_hidden_layers", "n_layer"])
 | 
			
		||||
        self.tensor_map = gguf.get_tensor_name_map(self.model_arch, self.block_count)
 | 
			
		||||
@@ -94,7 +96,7 @@ class Model:
 | 
			
		||||
        ftype_lw: str = ftype_up.lower()
 | 
			
		||||
        # allow templating the file name with the output ftype, useful with the "auto" ftype
 | 
			
		||||
        self.fname_out = fname_out.parent / fname_out.name.format(ftype_lw, outtype=ftype_lw, ftype=ftype_lw, OUTTYPE=ftype_up, FTYPE=ftype_up)
 | 
			
		||||
        self.gguf_writer = gguf.GGUFWriter(self.fname_out, gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file)
 | 
			
		||||
        self.gguf_writer = gguf.GGUFWriter(path=None, arch=gguf.MODEL_ARCH_NAMES[self.model_arch], endianess=self.endianess, use_temp_file=self.use_temp_file)
 | 
			
		||||
 | 
			
		||||
    @classmethod
 | 
			
		||||
    def __init_subclass__(cls):
 | 
			
		||||
@@ -182,7 +184,7 @@ class Model:
 | 
			
		||||
        return new_name
 | 
			
		||||
 | 
			
		||||
    def set_gguf_parameters(self):
 | 
			
		||||
        self.gguf_writer.add_name(self.dir_model.name)
 | 
			
		||||
        self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
 | 
			
		||||
        self.gguf_writer.add_block_count(self.block_count)
 | 
			
		||||
 | 
			
		||||
        if (n_ctx := self.find_hparam(["max_position_embeddings", "n_ctx"], optional=True)) is not None:
 | 
			
		||||
@@ -324,21 +326,21 @@ class Model:
 | 
			
		||||
 | 
			
		||||
    def write(self):
 | 
			
		||||
        self.write_tensors()
 | 
			
		||||
        self.gguf_writer.write_header_to_file()
 | 
			
		||||
        self.gguf_writer.write_header_to_file(self.fname_out)
 | 
			
		||||
        self.gguf_writer.write_kv_data_to_file()
 | 
			
		||||
        self.gguf_writer.write_tensors_to_file(progress=True)
 | 
			
		||||
        self.gguf_writer.close()
 | 
			
		||||
 | 
			
		||||
    def write_vocab(self):
 | 
			
		||||
        self.gguf_writer.write_header_to_file()
 | 
			
		||||
        self.gguf_writer.write_header_to_file(self.fname_out)
 | 
			
		||||
        self.gguf_writer.write_kv_data_to_file()
 | 
			
		||||
        self.gguf_writer.close()
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def get_model_part_names(dir_model: Path, suffix: str) -> list[str]:
 | 
			
		||||
    def get_model_part_names(dir_model: Path, prefix: str, suffix: str) -> list[str]:
 | 
			
		||||
        part_names: list[str] = []
 | 
			
		||||
        for filename in os.listdir(dir_model):
 | 
			
		||||
            if filename.endswith(suffix):
 | 
			
		||||
            if filename.startswith(prefix) and filename.endswith(suffix):
 | 
			
		||||
                part_names.append(filename)
 | 
			
		||||
 | 
			
		||||
        part_names.sort()
 | 
			
		||||
@@ -665,7 +667,7 @@ class GPTNeoXModel(Model):
 | 
			
		||||
    def set_gguf_parameters(self):
 | 
			
		||||
        block_count = self.hparams["num_hidden_layers"]
 | 
			
		||||
 | 
			
		||||
        self.gguf_writer.add_name(self.dir_model.name)
 | 
			
		||||
        self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
 | 
			
		||||
        self.gguf_writer.add_context_length(self.hparams["max_position_embeddings"])
 | 
			
		||||
        self.gguf_writer.add_embedding_length(self.hparams["hidden_size"])
 | 
			
		||||
        self.gguf_writer.add_block_count(block_count)
 | 
			
		||||
@@ -798,7 +800,7 @@ class MPTModel(Model):
 | 
			
		||||
 | 
			
		||||
    def set_gguf_parameters(self):
 | 
			
		||||
        block_count = self.hparams["n_layers"]
 | 
			
		||||
        self.gguf_writer.add_name(self.dir_model.name)
 | 
			
		||||
        self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
 | 
			
		||||
        self.gguf_writer.add_context_length(self.hparams["max_seq_len"])
 | 
			
		||||
        self.gguf_writer.add_embedding_length(self.hparams["d_model"])
 | 
			
		||||
        self.gguf_writer.add_block_count(block_count)
 | 
			
		||||
@@ -850,7 +852,7 @@ class OrionModel(Model):
 | 
			
		||||
            raise ValueError("gguf: can not find ctx length parameter.")
 | 
			
		||||
 | 
			
		||||
        self.gguf_writer.add_file_type(self.ftype)
 | 
			
		||||
        self.gguf_writer.add_name(self.dir_model.name)
 | 
			
		||||
        self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
 | 
			
		||||
        self.gguf_writer.add_source_hf_repo(hf_repo)
 | 
			
		||||
        self.gguf_writer.add_tensor_data_layout("Meta AI original pth")
 | 
			
		||||
        self.gguf_writer.add_context_length(ctx_length)
 | 
			
		||||
@@ -887,7 +889,7 @@ class BaichuanModel(Model):
 | 
			
		||||
        else:
 | 
			
		||||
            raise ValueError("gguf: can not find ctx length parameter.")
 | 
			
		||||
 | 
			
		||||
        self.gguf_writer.add_name(self.dir_model.name)
 | 
			
		||||
        self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
 | 
			
		||||
        self.gguf_writer.add_source_hf_repo(hf_repo)
 | 
			
		||||
        self.gguf_writer.add_tensor_data_layout("Meta AI original pth")
 | 
			
		||||
        self.gguf_writer.add_context_length(ctx_length)
 | 
			
		||||
@@ -1010,7 +1012,7 @@ class XverseModel(Model):
 | 
			
		||||
        else:
 | 
			
		||||
            raise ValueError("gguf: can not find ctx length parameter.")
 | 
			
		||||
 | 
			
		||||
        self.gguf_writer.add_name(self.dir_model.name)
 | 
			
		||||
        self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
 | 
			
		||||
        self.gguf_writer.add_source_hf_repo(hf_repo)
 | 
			
		||||
        self.gguf_writer.add_tensor_data_layout("Meta AI original pth")
 | 
			
		||||
        self.gguf_writer.add_context_length(ctx_length)
 | 
			
		||||
@@ -1206,7 +1208,7 @@ class StableLMModel(Model):
 | 
			
		||||
        hparams = self.hparams
 | 
			
		||||
        block_count = hparams["num_hidden_layers"]
 | 
			
		||||
 | 
			
		||||
        self.gguf_writer.add_name(self.dir_model.name)
 | 
			
		||||
        self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
 | 
			
		||||
        self.gguf_writer.add_context_length(hparams["max_position_embeddings"])
 | 
			
		||||
        self.gguf_writer.add_embedding_length(hparams["hidden_size"])
 | 
			
		||||
        self.gguf_writer.add_block_count(block_count)
 | 
			
		||||
@@ -1681,7 +1683,7 @@ class GPT2Model(Model):
 | 
			
		||||
    model_arch = gguf.MODEL_ARCH.GPT2
 | 
			
		||||
 | 
			
		||||
    def set_gguf_parameters(self):
 | 
			
		||||
        self.gguf_writer.add_name(self.dir_model.name)
 | 
			
		||||
        self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
 | 
			
		||||
        self.gguf_writer.add_block_count(self.hparams["n_layer"])
 | 
			
		||||
        self.gguf_writer.add_context_length(self.hparams["n_ctx"])
 | 
			
		||||
        self.gguf_writer.add_embedding_length(self.hparams["n_embd"])
 | 
			
		||||
@@ -2248,7 +2250,7 @@ class GemmaModel(Model):
 | 
			
		||||
        hparams = self.hparams
 | 
			
		||||
        block_count = hparams["num_hidden_layers"]
 | 
			
		||||
 | 
			
		||||
        self.gguf_writer.add_name(self.dir_model.name)
 | 
			
		||||
        self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
 | 
			
		||||
        self.gguf_writer.add_context_length(hparams["max_position_embeddings"])
 | 
			
		||||
        self.gguf_writer.add_embedding_length(hparams["hidden_size"])
 | 
			
		||||
        self.gguf_writer.add_block_count(block_count)
 | 
			
		||||
@@ -2348,7 +2350,7 @@ class MambaModel(Model):
 | 
			
		||||
        # Fail early for models which don't have a block expansion factor of 2
 | 
			
		||||
        assert d_inner == 2 * d_model
 | 
			
		||||
 | 
			
		||||
        self.gguf_writer.add_name(self.dir_model.name)
 | 
			
		||||
        self.gguf_writer.add_name(self.dir_model.name if self.model_name is None else self.model_name)
 | 
			
		||||
        self.gguf_writer.add_context_length(2**20) # arbitrary value; for those who use the default
 | 
			
		||||
        self.gguf_writer.add_embedding_length(d_model)
 | 
			
		||||
        self.gguf_writer.add_feed_forward_length(0) # unused, but seemingly required when loading
 | 
			
		||||
@@ -2981,7 +2983,7 @@ def main() -> None:
 | 
			
		||||
            logger.error(f"Model {hparams['architectures'][0]} is not supported")
 | 
			
		||||
            sys.exit(1)
 | 
			
		||||
 | 
			
		||||
        model_instance = model_class(dir_model, ftype_map[args.outtype], fname_out, args.bigendian, args.use_temp_file, args.no_lazy)
 | 
			
		||||
        model_instance = model_class(dir_model, ftype_map[args.outtype], fname_out, args.bigendian, args.use_temp_file, args.no_lazy, args.model_name)
 | 
			
		||||
 | 
			
		||||
        logger.info("Set model parameters")
 | 
			
		||||
        model_instance.set_gguf_parameters()
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user