mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-02 09:12:03 +00:00
convert-*.py: GGUF Naming Convention Refactor and Metadata Override Refactor (#7499)
Main thing is that the default output filename will take this form
{name}{parameters}{finetune}{version}{encoding}{kind}
In addition this add and remove some entries in the KV store and adds a metadata class with automatic heuristics capability to derive some values based on model card content
* No Change:
- Internal GGUF Spec
- `general.architecture`
- `general.quantization_version`
- `general.alignment`
- `general.file_type`
- General Model Details
- `general.name`
- `general.author`
- `general.version`
- `general.description`
- Licensing details
- `general.license`
- Typically represents the converted GGUF repo (Unless made from scratch)
- `general.url`
- Model Source during conversion
- `general.source.url`
* Removed:
- Model Source during conversion
- `general.source.huggingface.repository`
* Added:
- General Model Details
- `general.organization`
- `general.finetune`
- `general.basename`
- `general.quantized_by`
- `general.size_label`
- Licensing details
- `general.license.name`
- `general.license.link`
- Typically represents the converted GGUF repo (Unless made from scratch)
- `general.doi`
- `general.uuid`
- `general.repo_url`
- Model Source during conversion
- `general.source.doi`
- `general.source.uuid`
- `general.source.repo_url`
- Base Model Source
- `general.base_model.count`
- `general.base_model.{id}.name`
- `general.base_model.{id}.author`
- `general.base_model.{id}.version`
- `general.base_model.{id}.organization`
- `general.base_model.{id}.url` (Model Website/Paper)
- `general.base_model.{id}.doi`
- `general.base_model.{id}.uuid`
- `general.base_model.{id}.repo_url` (Model Source Repository (git/svn/etc...))
- Array based KV stores
- `general.tags`
- `general.languages`
- `general.datasets`
---------
Co-authored-by: compilade <git@compilade.net>
Co-authored-by: Xuan Son Nguyen <thichthat@gmail.com>
This commit is contained in:
@@ -251,6 +251,10 @@ def parse_args() -> argparse.Namespace:
|
||||
"--verbose", action="store_true",
|
||||
help="increase output verbosity",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--dry-run", action="store_true",
|
||||
help="only print out what will be done, without writing any new files",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--base", type=Path, required=True,
|
||||
help="directory containing base model file",
|
||||
@@ -300,6 +304,12 @@ if __name__ == '__main__':
|
||||
# load base model
|
||||
logger.info(f"Loading base model: {dir_base_model.name}")
|
||||
hparams = Model.load_hparams(dir_base_model)
|
||||
|
||||
with open(lora_config, "r") as f:
|
||||
lparams: dict[str, Any] = json.load(f)
|
||||
|
||||
alpha: float = lparams["lora_alpha"]
|
||||
|
||||
with torch.inference_mode():
|
||||
try:
|
||||
model_class = Model.from_model_architecture(hparams["architectures"][0])
|
||||
@@ -310,6 +320,14 @@ if __name__ == '__main__':
|
||||
class LoraModel(model_class):
|
||||
model_arch = model_class.model_arch
|
||||
|
||||
def set_type(self):
|
||||
self.gguf_writer.add_type(gguf.GGUFType.ADAPTER)
|
||||
self.gguf_writer.add_string(gguf.Keys.Adapter.TYPE, "lora")
|
||||
|
||||
def set_gguf_parameters(self):
|
||||
self.gguf_writer.add_float32(gguf.Keys.Adapter.LORA_ALPHA, float(alpha))
|
||||
super().set_gguf_parameters()
|
||||
|
||||
def get_tensors(self) -> Iterator[tuple[str, Tensor]]:
|
||||
tensor_map: dict[str, PartialLoraTensor] = {}
|
||||
|
||||
@@ -357,18 +375,9 @@ if __name__ == '__main__':
|
||||
is_big_endian=args.bigendian,
|
||||
use_temp_file=False,
|
||||
eager=args.no_lazy,
|
||||
model_name=None,
|
||||
dry_run=args.dry_run,
|
||||
)
|
||||
|
||||
with open(lora_config, "r") as f:
|
||||
lparams: dict[str, Any] = json.load(f)
|
||||
|
||||
alpha = lparams["lora_alpha"]
|
||||
|
||||
model_instance.gguf_writer.add_string(gguf.Keys.General.TYPE, gguf.GGUFType.ADAPTER)
|
||||
model_instance.gguf_writer.add_string(gguf.Keys.Adapter.TYPE, "lora")
|
||||
model_instance.gguf_writer.add_float32(gguf.Keys.Adapter.LORA_ALPHA, float(alpha))
|
||||
model_instance.gguf_writer.add_quantization_version(gguf.GGML_QUANT_VERSION)
|
||||
logger.info("Exporting model...")
|
||||
model_instance.write()
|
||||
logger.info(f"Model successfully exported to {model_instance.fname_out}")
|
||||
|
||||
Reference in New Issue
Block a user