mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	Merge branch 'master' into compilade/refactor-kv-cache
This commit is contained in:
		| @@ -64,15 +64,27 @@ class Keys: | ||||
|         BASE_MODEL_AUTHOR          = "general.base_model.{id}.author" | ||||
|         BASE_MODEL_VERSION         = "general.base_model.{id}.version" | ||||
|         BASE_MODEL_ORGANIZATION    = "general.base_model.{id}.organization" | ||||
|         BASE_MODEL_DESCRIPTION     = "general.base_model.{id}.description" | ||||
|         BASE_MODEL_URL             = "general.base_model.{id}.url" # Model Website/Paper | ||||
|         BASE_MODEL_DOI             = "general.base_model.{id}.doi" | ||||
|         BASE_MODEL_UUID            = "general.base_model.{id}.uuid" | ||||
|         BASE_MODEL_REPO_URL        = "general.base_model.{id}.repo_url" # Model Source Repository (git/svn/etc...) | ||||
|  | ||||
|         # Dataset Source | ||||
|         DATASET_COUNT           = "general.dataset.count" | ||||
|         DATASET_NAME            = "general.dataset.{id}.name" | ||||
|         DATASET_AUTHOR          = "general.dataset.{id}.author" | ||||
|         DATASET_VERSION         = "general.dataset.{id}.version" | ||||
|         DATASET_ORGANIZATION    = "general.dataset.{id}.organization" | ||||
|         DATASET_DESCRIPTION     = "general.dataset.{id}.description" | ||||
|         DATASET_URL             = "general.dataset.{id}.url" # Model Website/Paper | ||||
|         DATASET_DOI             = "general.dataset.{id}.doi" | ||||
|         DATASET_UUID            = "general.dataset.{id}.uuid" | ||||
|         DATASET_REPO_URL        = "general.dataset.{id}.repo_url" # Model Source Repository (git/svn/etc...) | ||||
|  | ||||
|         # Array based KV stores | ||||
|         TAGS                       = "general.tags" | ||||
|         LANGUAGES                  = "general.languages" | ||||
|         DATASETS                   = "general.datasets" | ||||
|  | ||||
|     class LLM: | ||||
|         VOCAB_SIZE                        = "{arch}.vocab_size" | ||||
| @@ -232,6 +244,7 @@ class MODEL_ARCH(IntEnum): | ||||
|     COMMAND_R    = auto() | ||||
|     DBRX         = auto() | ||||
|     OLMO         = auto() | ||||
|     OLMO_1124    = auto() | ||||
|     OLMOE        = auto() | ||||
|     OPENELM      = auto() | ||||
|     ARCTIC       = auto() | ||||
| @@ -397,6 +410,7 @@ MODEL_ARCH_NAMES: dict[MODEL_ARCH, str] = { | ||||
|     MODEL_ARCH.COMMAND_R:      "command-r", | ||||
|     MODEL_ARCH.DBRX:           "dbrx", | ||||
|     MODEL_ARCH.OLMO:           "olmo", | ||||
|     MODEL_ARCH.OLMO_1124:      "olmo_1124", | ||||
|     MODEL_ARCH.OLMOE:          "olmoe", | ||||
|     MODEL_ARCH.OPENELM:        "openelm", | ||||
|     MODEL_ARCH.ARCTIC:         "arctic", | ||||
| @@ -1093,6 +1107,22 @@ MODEL_TENSORS: dict[MODEL_ARCH, list[MODEL_TENSOR]] = { | ||||
|         MODEL_TENSOR.FFN_DOWN, | ||||
|         MODEL_TENSOR.FFN_UP, | ||||
|     ], | ||||
|     MODEL_ARCH.OLMO_1124: [ | ||||
|         MODEL_TENSOR.TOKEN_EMBD, | ||||
|         MODEL_TENSOR.OUTPUT_NORM, | ||||
|         MODEL_TENSOR.OUTPUT, | ||||
|         MODEL_TENSOR.ATTN_Q, | ||||
|         MODEL_TENSOR.ATTN_K, | ||||
|         MODEL_TENSOR.ATTN_V, | ||||
|         MODEL_TENSOR.ATTN_OUT, | ||||
|         MODEL_TENSOR.ATTN_POST_NORM, | ||||
|         MODEL_TENSOR.ATTN_Q_NORM, | ||||
|         MODEL_TENSOR.ATTN_K_NORM, | ||||
|         MODEL_TENSOR.FFN_POST_NORM, | ||||
|         MODEL_TENSOR.FFN_GATE, | ||||
|         MODEL_TENSOR.FFN_DOWN, | ||||
|         MODEL_TENSOR.FFN_UP, | ||||
|     ], | ||||
|     MODEL_ARCH.OLMOE: [ | ||||
|         MODEL_TENSOR.TOKEN_EMBD, | ||||
|         MODEL_TENSOR.OUTPUT_NORM, | ||||
|   | ||||
| @@ -568,6 +568,9 @@ class GGUFWriter: | ||||
|     def add_base_model_organization(self, source_id: int, organization: str) -> None: | ||||
|         self.add_string(Keys.General.BASE_MODEL_ORGANIZATION.format(id=source_id), organization) | ||||
|  | ||||
|     def add_base_model_description(self, source_id: int, description: str) -> None: | ||||
|         self.add_string(Keys.General.BASE_MODEL_DESCRIPTION.format(id=source_id), description) | ||||
|  | ||||
|     def add_base_model_url(self, source_id: int, url: str) -> None: | ||||
|         self.add_string(Keys.General.BASE_MODEL_URL.format(id=source_id), url) | ||||
|  | ||||
| @@ -580,15 +583,42 @@ class GGUFWriter: | ||||
|     def add_base_model_repo_url(self, source_id: int, repo_url: str) -> None: | ||||
|         self.add_string(Keys.General.BASE_MODEL_REPO_URL.format(id=source_id), repo_url) | ||||
|  | ||||
|     def add_dataset_count(self, source_count: int) -> None: | ||||
|         self.add_uint32(Keys.General.DATASET_COUNT, source_count) | ||||
|  | ||||
|     def add_dataset_name(self, source_id: int, name: str) -> None: | ||||
|         self.add_string(Keys.General.DATASET_NAME.format(id=source_id), name) | ||||
|  | ||||
|     def add_dataset_author(self, source_id: int, author: str) -> None: | ||||
|         self.add_string(Keys.General.DATASET_AUTHOR.format(id=source_id), author) | ||||
|  | ||||
|     def add_dataset_version(self, source_id: int, version: str) -> None: | ||||
|         self.add_string(Keys.General.DATASET_VERSION.format(id=source_id), version) | ||||
|  | ||||
|     def add_dataset_organization(self, source_id: int, organization: str) -> None: | ||||
|         self.add_string(Keys.General.DATASET_ORGANIZATION.format(id=source_id), organization) | ||||
|  | ||||
|     def add_dataset_description(self, source_id: int, description: str) -> None: | ||||
|         self.add_string(Keys.General.DATASET_DESCRIPTION.format(id=source_id), description) | ||||
|  | ||||
|     def add_dataset_url(self, source_id: int, url: str) -> None: | ||||
|         self.add_string(Keys.General.DATASET_URL.format(id=source_id), url) | ||||
|  | ||||
|     def add_dataset_doi(self, source_id: int, doi: str) -> None: | ||||
|         self.add_string(Keys.General.DATASET_DOI.format(id=source_id), doi) | ||||
|  | ||||
|     def add_dataset_uuid(self, source_id: int, uuid: str) -> None: | ||||
|         self.add_string(Keys.General.DATASET_UUID.format(id=source_id), uuid) | ||||
|  | ||||
|     def add_dataset_repo_url(self, source_id: int, repo_url: str) -> None: | ||||
|         self.add_string(Keys.General.DATASET_REPO_URL.format(id=source_id), repo_url) | ||||
|  | ||||
|     def add_tags(self, tags: Sequence[str]) -> None: | ||||
|         self.add_array(Keys.General.TAGS, tags) | ||||
|  | ||||
|     def add_languages(self, languages: Sequence[str]) -> None: | ||||
|         self.add_array(Keys.General.LANGUAGES, languages) | ||||
|  | ||||
|     def add_datasets(self, datasets: Sequence[str]) -> None: | ||||
|         self.add_array(Keys.General.DATASETS, datasets) | ||||
|  | ||||
|     def add_tensor_data_layout(self, layout: str) -> None: | ||||
|         self.add_string(Keys.LLM.TENSOR_DATA_LAYOUT.format(arch=self.arch), layout) | ||||
|  | ||||
|   | ||||
| @@ -41,7 +41,7 @@ class Metadata: | ||||
|     base_models: Optional[list[dict]] = None | ||||
|     tags: Optional[list[str]] = None | ||||
|     languages: Optional[list[str]] = None | ||||
|     datasets: Optional[list[str]] = None | ||||
|     datasets: Optional[list[dict]] = None | ||||
|  | ||||
|     @staticmethod | ||||
|     def load(metadata_override_path: Optional[Path] = None, model_path: Optional[Path] = None, model_name: Optional[str] = None, total_params: int = 0) -> Metadata: | ||||
| @@ -91,9 +91,11 @@ class Metadata: | ||||
|         # Base Models is received here as an array of models | ||||
|         metadata.base_models     = metadata_override.get("general.base_models",        metadata.base_models) | ||||
|  | ||||
|         # Datasets is received here as an array of datasets | ||||
|         metadata.datasets        = metadata_override.get("general.datasets",           metadata.datasets) | ||||
|  | ||||
|         metadata.tags            = metadata_override.get(Keys.General.TAGS,            metadata.tags) | ||||
|         metadata.languages       = metadata_override.get(Keys.General.LANGUAGES,       metadata.languages) | ||||
|         metadata.datasets        = metadata_override.get(Keys.General.DATASETS,        metadata.datasets) | ||||
|  | ||||
|         # Direct Metadata Override (via direct cli argument) | ||||
|         if model_name is not None: | ||||
| @@ -346,12 +348,12 @@ class Metadata: | ||||
|             use_model_card_metadata("author", "model_creator") | ||||
|             use_model_card_metadata("basename", "model_type") | ||||
|  | ||||
|             if "base_model" in model_card: | ||||
|             if "base_model" in model_card or "base_models" in model_card or "base_model_sources" in model_card: | ||||
|                 # This represents the parent models that this is based on | ||||
|                 # Example: stabilityai/stable-diffusion-xl-base-1.0. Can also be a list (for merges) | ||||
|                 # Example of merges: https://huggingface.co/EmbeddedLLM/Mistral-7B-Merge-14-v0.1/blob/main/README.md | ||||
|                 metadata_base_models = [] | ||||
|                 base_model_value = model_card.get("base_model", None) | ||||
|                 base_model_value = model_card.get("base_model", model_card.get("base_models", model_card.get("base_model_sources", None))) | ||||
|  | ||||
|                 if base_model_value is not None: | ||||
|                     if isinstance(base_model_value, str): | ||||
| @@ -364,18 +366,106 @@ class Metadata: | ||||
|  | ||||
|                 for model_id in metadata_base_models: | ||||
|                     # NOTE: model size of base model is assumed to be similar to the size of the current model | ||||
|                     model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params) | ||||
|                     base_model = {} | ||||
|                     if model_full_name_component is not None: | ||||
|                         base_model["name"] = Metadata.id_to_title(model_full_name_component) | ||||
|                     if org_component is not None: | ||||
|                         base_model["organization"] = Metadata.id_to_title(org_component) | ||||
|                     if version is not None: | ||||
|                         base_model["version"] = version | ||||
|                     if org_component is not None and model_full_name_component is not None: | ||||
|                         base_model["repo_url"] = f"https://huggingface.co/{org_component}/{model_full_name_component}" | ||||
|                     if isinstance(model_id, str): | ||||
|                         if model_id.startswith("http://") or model_id.startswith("https://") or model_id.startswith("ssh://"): | ||||
|                             base_model["repo_url"] = model_id | ||||
|  | ||||
|                             # Check if Hugging Face ID is present in URL | ||||
|                             if "huggingface.co" in model_id: | ||||
|                                 match = re.match(r"https?://huggingface.co/([^/]+/[^/]+)$", model_id) | ||||
|                                 if match: | ||||
|                                     model_id_component = match.group(1) | ||||
|                                     model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id_component, total_params) | ||||
|  | ||||
|                                     # Populate model dictionary with extracted components | ||||
|                                     if model_full_name_component is not None: | ||||
|                                         base_model["name"] = Metadata.id_to_title(model_full_name_component) | ||||
|                                     if org_component is not None: | ||||
|                                         base_model["organization"] = Metadata.id_to_title(org_component) | ||||
|                                     if version is not None: | ||||
|                                         base_model["version"] = version | ||||
|  | ||||
|                         else: | ||||
|                             # Likely a Hugging Face ID | ||||
|                             model_full_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(model_id, total_params) | ||||
|  | ||||
|                             # Populate model dictionary with extracted components | ||||
|                             if model_full_name_component is not None: | ||||
|                                 base_model["name"] = Metadata.id_to_title(model_full_name_component) | ||||
|                             if org_component is not None: | ||||
|                                 base_model["organization"] = Metadata.id_to_title(org_component) | ||||
|                             if version is not None: | ||||
|                                 base_model["version"] = version | ||||
|                             if org_component is not None and model_full_name_component is not None: | ||||
|                                 base_model["repo_url"] = f"https://huggingface.co/{org_component}/{model_full_name_component}" | ||||
|  | ||||
|                     elif isinstance(model_id, dict): | ||||
|                         base_model = model_id | ||||
|  | ||||
|                     else: | ||||
|                         logger.error(f"base model entry '{str(model_id)}' not in a known format") | ||||
|  | ||||
|                     metadata.base_models.append(base_model) | ||||
|  | ||||
|             if "datasets" in model_card or "dataset" in model_card or "dataset_sources" in model_card: | ||||
|                 # This represents the datasets that this was trained from | ||||
|                 metadata_datasets = [] | ||||
|                 dataset_value = model_card.get("datasets", model_card.get("dataset", model_card.get("dataset_sources", None))) | ||||
|  | ||||
|                 if dataset_value is not None: | ||||
|                     if isinstance(dataset_value, str): | ||||
|                         metadata_datasets.append(dataset_value) | ||||
|                     elif isinstance(dataset_value, list): | ||||
|                         metadata_datasets.extend(dataset_value) | ||||
|  | ||||
|                 if metadata.datasets is None: | ||||
|                     metadata.datasets = [] | ||||
|  | ||||
|                 for dataset_id in metadata_datasets: | ||||
|                     # NOTE: model size of base model is assumed to be similar to the size of the current model | ||||
|                     dataset = {} | ||||
|                     if isinstance(dataset_id, str): | ||||
|                         if dataset_id.startswith(("http://", "https://", "ssh://")): | ||||
|                             dataset["repo_url"] = dataset_id | ||||
|  | ||||
|                             # Check if Hugging Face ID is present in URL | ||||
|                             if "huggingface.co" in dataset_id: | ||||
|                                 match = re.match(r"https?://huggingface.co/([^/]+/[^/]+)$", dataset_id) | ||||
|                                 if match: | ||||
|                                     dataset_id_component = match.group(1) | ||||
|                                     dataset_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(dataset_id_component, total_params) | ||||
|  | ||||
|                                     # Populate dataset dictionary with extracted components | ||||
|                                     if dataset_name_component is not None: | ||||
|                                         dataset["name"] = Metadata.id_to_title(dataset_name_component) | ||||
|                                     if org_component is not None: | ||||
|                                         dataset["organization"] = Metadata.id_to_title(org_component) | ||||
|                                     if version is not None: | ||||
|                                         dataset["version"] = version | ||||
|  | ||||
|                         else: | ||||
|                             # Likely a Hugging Face ID | ||||
|                             dataset_name_component, org_component, basename, finetune, version, size_label = Metadata.get_model_id_components(dataset_id, total_params) | ||||
|  | ||||
|                             # Populate dataset dictionary with extracted components | ||||
|                             if dataset_name_component is not None: | ||||
|                                 dataset["name"] = Metadata.id_to_title(dataset_name_component) | ||||
|                             if org_component is not None: | ||||
|                                 dataset["organization"] = Metadata.id_to_title(org_component) | ||||
|                             if version is not None: | ||||
|                                 dataset["version"] = version | ||||
|                             if org_component is not None and dataset_name_component is not None: | ||||
|                                 dataset["repo_url"] = f"https://huggingface.co/{org_component}/{dataset_name_component}" | ||||
|  | ||||
|                     elif isinstance(dataset_id, dict): | ||||
|                         dataset = dataset_id | ||||
|  | ||||
|                     else: | ||||
|                         logger.error(f"dataset entry '{str(dataset_id)}' not in a known format") | ||||
|  | ||||
|                     metadata.datasets.append(dataset) | ||||
|  | ||||
|             use_model_card_metadata("license", "license") | ||||
|             use_model_card_metadata("license_name", "license_name") | ||||
|             use_model_card_metadata("license_link", "license_link") | ||||
| @@ -386,9 +476,6 @@ class Metadata: | ||||
|             use_array_model_card_metadata("languages", "languages") | ||||
|             use_array_model_card_metadata("languages", "language") | ||||
|  | ||||
|             use_array_model_card_metadata("datasets", "datasets") | ||||
|             use_array_model_card_metadata("datasets", "dataset") | ||||
|  | ||||
|         # Hugging Face Parameter Heuristics | ||||
|         #################################### | ||||
|  | ||||
| @@ -458,7 +545,10 @@ class Metadata: | ||||
|             gguf_writer.add_size_label(self.size_label) | ||||
|  | ||||
|         if self.license is not None: | ||||
|             gguf_writer.add_license(self.license) | ||||
|             if isinstance(self.license, list): | ||||
|                 gguf_writer.add_license(",".join(self.license)) | ||||
|             else: | ||||
|                 gguf_writer.add_license(self.license) | ||||
|         if self.license_name is not None: | ||||
|             gguf_writer.add_license_name(self.license_name) | ||||
|         if self.license_link is not None: | ||||
| @@ -493,6 +583,8 @@ class Metadata: | ||||
|                     gguf_writer.add_base_model_version(key, base_model_entry["version"]) | ||||
|                 if "organization" in base_model_entry: | ||||
|                     gguf_writer.add_base_model_organization(key, base_model_entry["organization"]) | ||||
|                 if "description" in base_model_entry: | ||||
|                     gguf_writer.add_base_model_description(key, base_model_entry["description"]) | ||||
|                 if "url" in base_model_entry: | ||||
|                     gguf_writer.add_base_model_url(key, base_model_entry["url"]) | ||||
|                 if "doi" in base_model_entry: | ||||
| @@ -502,9 +594,29 @@ class Metadata: | ||||
|                 if "repo_url" in base_model_entry: | ||||
|                     gguf_writer.add_base_model_repo_url(key, base_model_entry["repo_url"]) | ||||
|  | ||||
|         if self.datasets is not None: | ||||
|             gguf_writer.add_dataset_count(len(self.datasets)) | ||||
|             for key, dataset_entry in enumerate(self.datasets): | ||||
|                 if "name" in dataset_entry: | ||||
|                     gguf_writer.add_dataset_name(key, dataset_entry["name"]) | ||||
|                 if "author" in dataset_entry: | ||||
|                     gguf_writer.add_dataset_author(key, dataset_entry["author"]) | ||||
|                 if "version" in dataset_entry: | ||||
|                     gguf_writer.add_dataset_version(key, dataset_entry["version"]) | ||||
|                 if "organization" in dataset_entry: | ||||
|                     gguf_writer.add_dataset_organization(key, dataset_entry["organization"]) | ||||
|                 if "description" in dataset_entry: | ||||
|                     gguf_writer.add_dataset_description(key, dataset_entry["description"]) | ||||
|                 if "url" in dataset_entry: | ||||
|                     gguf_writer.add_dataset_url(key, dataset_entry["url"]) | ||||
|                 if "doi" in dataset_entry: | ||||
|                     gguf_writer.add_dataset_doi(key, dataset_entry["doi"]) | ||||
|                 if "uuid" in dataset_entry: | ||||
|                     gguf_writer.add_dataset_uuid(key, dataset_entry["uuid"]) | ||||
|                 if "repo_url" in dataset_entry: | ||||
|                     gguf_writer.add_dataset_repo_url(key, dataset_entry["repo_url"]) | ||||
|  | ||||
|         if self.tags is not None: | ||||
|             gguf_writer.add_tags(self.tags) | ||||
|         if self.languages is not None: | ||||
|             gguf_writer.add_languages(self.languages) | ||||
|         if self.datasets is not None: | ||||
|             gguf_writer.add_datasets(self.datasets) | ||||
|   | ||||
| @@ -13,7 +13,7 @@ class TensorNameMap: | ||||
|             "transformer.wte",                           # gpt2 gpt-j mpt refact qwen dbrx jais exaone | ||||
|             "transformer.word_embeddings",               # falcon | ||||
|             "word_embeddings",                           # bloom | ||||
|             "model.embed_tokens",                        # llama-hf nemotron olmoe | ||||
|             "model.embed_tokens",                        # llama-hf nemotron olmoe olmo_1124 | ||||
|             "tok_embeddings",                            # llama-pth | ||||
|             "embeddings.word_embeddings",                # bert nomic-bert | ||||
|             "language_model.embedding.word_embeddings",  # persimmon | ||||
| @@ -54,7 +54,7 @@ class TensorNameMap: | ||||
|         # Output | ||||
|         MODEL_TENSOR.OUTPUT: ( | ||||
|             "embed_out",                 # gptneox | ||||
|             "lm_head",                   # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe | ||||
|             "lm_head",                   # gpt2 mpt falcon llama-hf baichuan qwen mamba dbrx jais nemotron exaone olmoe olmo_1124 | ||||
|             "output",                    # llama-pth bloom internlm2 | ||||
|             "word_embeddings_for_head",  # persimmon | ||||
|             "lm_head.linear",            # phi2 | ||||
| @@ -66,7 +66,7 @@ class TensorNameMap: | ||||
|         MODEL_TENSOR.OUTPUT_NORM: ( | ||||
|             "gpt_neox.final_layer_norm",               # gptneox | ||||
|             "transformer.ln_f",                        # gpt2 gpt-j falcon jais exaone | ||||
|             "model.norm",                              # llama-hf baichuan internlm2 olmoe | ||||
|             "model.norm",                              # llama-hf baichuan internlm2 olmoe olmo_1124 | ||||
|             "norm",                                    # llama-pth | ||||
|             "transformer.norm_f",                      # mpt dbrx | ||||
|             "ln_f",                                    # refact bloom qwen gpt2 | ||||
| @@ -145,7 +145,7 @@ class TensorNameMap: | ||||
|  | ||||
|         # Attention query | ||||
|         MODEL_TENSOR.ATTN_Q: ( | ||||
|             "model.layers.{bid}.self_attn.q_proj",                       # llama-hf nemotron olmoe | ||||
|             "model.layers.{bid}.self_attn.q_proj",                       # llama-hf nemotron olmoe olmo_1124 | ||||
|             "layers.{bid}.attention.wq",                                 # llama-pth | ||||
|             "encoder.layer.{bid}.attention.self.query",                  # bert | ||||
|             "transformer.h.{bid}.attn.q_proj",                           # gpt-j | ||||
| @@ -157,7 +157,7 @@ class TensorNameMap: | ||||
|  | ||||
|         # Attention key | ||||
|         MODEL_TENSOR.ATTN_K: ( | ||||
|             "model.layers.{bid}.self_attn.k_proj",                     # llama-hf nemotron olmoe | ||||
|             "model.layers.{bid}.self_attn.k_proj",                     # llama-hf nemotron olmoe olmo_1124 | ||||
|             "layers.{bid}.attention.wk",                               # llama-pth | ||||
|             "encoder.layer.{bid}.attention.self.key",                  # bert | ||||
|             "transformer.h.{bid}.attn.k_proj",                         # gpt-j | ||||
| @@ -170,7 +170,7 @@ class TensorNameMap: | ||||
|  | ||||
|         # Attention value | ||||
|         MODEL_TENSOR.ATTN_V: ( | ||||
|             "model.layers.{bid}.self_attn.v_proj",                       # llama-hf nemotron olmoe | ||||
|             "model.layers.{bid}.self_attn.v_proj",                       # llama-hf nemotron olmoe olmo_1124 | ||||
|             "layers.{bid}.attention.wv",                                 # llama-pth | ||||
|             "encoder.layer.{bid}.attention.self.value",                  # bert | ||||
|             "transformer.h.{bid}.attn.v_proj",                           # gpt-j | ||||
| @@ -188,7 +188,7 @@ class TensorNameMap: | ||||
|             "transformer.blocks.{bid}.attn.out_proj",                       # mpt | ||||
|             "transformer.h.{bid}.self_attention.dense",                     # falcon | ||||
|             "h.{bid}.self_attention.dense",                                 # bloom | ||||
|             "model.layers.{bid}.self_attn.o_proj",                          # llama-hf nemotron olmoe | ||||
|             "model.layers.{bid}.self_attn.o_proj",                          # llama-hf nemotron olmoe olmo_1124 | ||||
|             "layers.{bid}.attention.wo",                                    # llama-pth | ||||
|             "encoder.layer.{bid}.attention.output.dense",                   # bert | ||||
|             "transformer.h.{bid}.attn.out_proj",                            # gpt-j | ||||
| @@ -215,7 +215,7 @@ class TensorNameMap: | ||||
|         ), | ||||
|  | ||||
|         MODEL_TENSOR.ATTN_POST_NORM: ( | ||||
|             "model.layers.{bid}.post_attention_layernorm",     # gemma2 | ||||
|             "model.layers.{bid}.post_attention_layernorm",     # gemma2 olmo_1124 | ||||
|         ), | ||||
|  | ||||
|         # Rotary embeddings | ||||
| @@ -252,7 +252,7 @@ class TensorNameMap: | ||||
|  | ||||
|         # Post feed-forward norm | ||||
|         MODEL_TENSOR.FFN_POST_NORM: ( | ||||
|             "model.layers.{bid}.post_feedforward_layernorm", # gemma2 | ||||
|             "model.layers.{bid}.post_feedforward_layernorm", # gemma2 olmo_1124 | ||||
|         ), | ||||
|  | ||||
|         MODEL_TENSOR.FFN_GATE_INP: ( | ||||
| @@ -276,7 +276,7 @@ class TensorNameMap: | ||||
|             "transformer.blocks.{bid}.ffn.up_proj",                   # mpt | ||||
|             "transformer.h.{bid}.mlp.dense_h_to_4h",                  # falcon | ||||
|             "h.{bid}.mlp.dense_h_to_4h",                              # bloom | ||||
|             "model.layers.{bid}.mlp.up_proj",                         # llama-hf refact nemotron | ||||
|             "model.layers.{bid}.mlp.up_proj",                         # llama-hf refact nemotron olmo_1124 | ||||
|             "layers.{bid}.feed_forward.w3",                           # llama-pth | ||||
|             "encoder.layer.{bid}.intermediate.dense",                 # bert | ||||
|             "transformer.h.{bid}.mlp.fc_in",                          # gpt-j | ||||
| @@ -318,7 +318,7 @@ class TensorNameMap: | ||||
|  | ||||
|         # Feed-forward gate | ||||
|         MODEL_TENSOR.FFN_GATE: ( | ||||
|             "model.layers.{bid}.mlp.gate_proj",           # llama-hf refact | ||||
|             "model.layers.{bid}.mlp.gate_proj",           # llama-hf refact olmo_1124 | ||||
|             "layers.{bid}.feed_forward.w1",               # llama-pth | ||||
|             "transformer.h.{bid}.mlp.w2",                 # qwen | ||||
|             "transformer.h.{bid}.mlp.c_fc2",              # jais | ||||
| @@ -351,7 +351,7 @@ class TensorNameMap: | ||||
|             "transformer.blocks.{bid}.ffn.down_proj",                 # mpt | ||||
|             "transformer.h.{bid}.mlp.dense_4h_to_h",                  # falcon | ||||
|             "h.{bid}.mlp.dense_4h_to_h",                              # bloom | ||||
|             "model.layers.{bid}.mlp.down_proj",                       # llama-hf nemotron | ||||
|             "model.layers.{bid}.mlp.down_proj",                       # llama-hf nemotron olmo_1124 | ||||
|             "layers.{bid}.feed_forward.w2",                           # llama-pth | ||||
|             "encoder.layer.{bid}.output.dense",                       # bert | ||||
|             "transformer.h.{bid}.mlp.fc_out",                         # gpt-j | ||||
| @@ -389,7 +389,7 @@ class TensorNameMap: | ||||
|         MODEL_TENSOR.ATTN_Q_NORM: ( | ||||
|             "language_model.encoder.layers.{bid}.self_attention.q_layernorm", | ||||
|             "model.layers.{bid}.self_attn.q_layernorm",                       # persimmon | ||||
|             "model.layers.{bid}.self_attn.q_norm",                            # cohere olmoe chameleon | ||||
|             "model.layers.{bid}.self_attn.q_norm",                            # cohere olmoe chameleon olmo_1124 | ||||
|             "transformer.blocks.{bid}.attn.q_ln",                             # sea-lion | ||||
|             "encoder.layer.{bid}.attention.self.layer_norm_q",                # jina-bert-v2 | ||||
|             "transformer.layers.{bid}.attn.q_norm",                           # openelm | ||||
| @@ -398,7 +398,7 @@ class TensorNameMap: | ||||
|         MODEL_TENSOR.ATTN_K_NORM: ( | ||||
|             "language_model.encoder.layers.{bid}.self_attention.k_layernorm", | ||||
|             "model.layers.{bid}.self_attn.k_layernorm",                       # persimmon | ||||
|             "model.layers.{bid}.self_attn.k_norm",                            # cohere olmoe chameleon | ||||
|             "model.layers.{bid}.self_attn.k_norm",                            # cohere olmoe chameleon olmo_1124 | ||||
|             "transformer.blocks.{bid}.attn.k_ln",                             # sea-lion | ||||
|             "encoder.layer.{bid}.attention.self.layer_norm_k",                # jina-bert-v2 | ||||
|             "transformer.layers.{bid}.attn.k_norm",                           # openelm | ||||
|   | ||||
| @@ -182,8 +182,43 @@ class TestMetadataMethod(unittest.TestCase): | ||||
|         expect.base_models=[{'name': 'Mistral 7B Merge 14 v0', 'organization': 'EmbeddedLLM', 'version': '14-v0', 'repo_url': 'https://huggingface.co/EmbeddedLLM/Mistral-7B-Merge-14-v0'}, {'name': 'Trinity v1', 'organization': 'Janai Hq', 'version': 'v1', 'repo_url': 'https://huggingface.co/janai-hq/trinity-v1'}] | ||||
|         expect.tags=['Llama-3', 'instruct', 'finetune', 'chatml', 'DPO', 'RLHF', 'gpt4', 'synthetic data', 'distillation', 'function calling', 'json mode', 'axolotl'] | ||||
|         expect.languages=['en'] | ||||
|         expect.datasets=['teknium/OpenHermes-2.5'] | ||||
|         expect.datasets=[{'name': 'OpenHermes 2.5', 'organization': 'Teknium', 'version': '2.5', 'repo_url': 'https://huggingface.co/teknium/OpenHermes-2.5'}] | ||||
|         self.assertEqual(got, expect) | ||||
|  | ||||
|         # Base Model spec is inferred from model id | ||||
|         model_card = {'base_models': 'teknium/OpenHermes-2.5'} | ||||
|         expect = gguf.Metadata(base_models=[{'name': 'OpenHermes 2.5', 'organization': 'Teknium', 'version': '2.5', 'repo_url': 'https://huggingface.co/teknium/OpenHermes-2.5'}]) | ||||
|         got = gguf.Metadata.apply_metadata_heuristic(gguf.Metadata(), model_card, None, None) | ||||
|         self.assertEqual(got, expect) | ||||
|  | ||||
|         # Base Model spec is only url | ||||
|         model_card = {'base_models': ['https://huggingface.co/teknium/OpenHermes-2.5']} | ||||
|         expect = gguf.Metadata(base_models=[{'name': 'OpenHermes 2.5', 'organization': 'Teknium', 'version': '2.5', 'repo_url': 'https://huggingface.co/teknium/OpenHermes-2.5'}]) | ||||
|         got = gguf.Metadata.apply_metadata_heuristic(gguf.Metadata(), model_card, None, None) | ||||
|         self.assertEqual(got, expect) | ||||
|  | ||||
|         # Base Model spec is given directly | ||||
|         model_card = {'base_models': [{'name': 'OpenHermes 2.5', 'organization': 'Teknium', 'version': '2.5', 'repo_url': 'https://huggingface.co/teknium/OpenHermes-2.5'}]} | ||||
|         expect = gguf.Metadata(base_models=[{'name': 'OpenHermes 2.5', 'organization': 'Teknium', 'version': '2.5', 'repo_url': 'https://huggingface.co/teknium/OpenHermes-2.5'}]) | ||||
|         got = gguf.Metadata.apply_metadata_heuristic(gguf.Metadata(), model_card, None, None) | ||||
|         self.assertEqual(got, expect) | ||||
|  | ||||
|         # Dataset spec is inferred from model id | ||||
|         model_card = {'datasets': 'teknium/OpenHermes-2.5'} | ||||
|         expect = gguf.Metadata(datasets=[{'name': 'OpenHermes 2.5', 'organization': 'Teknium', 'version': '2.5', 'repo_url': 'https://huggingface.co/teknium/OpenHermes-2.5'}]) | ||||
|         got = gguf.Metadata.apply_metadata_heuristic(gguf.Metadata(), model_card, None, None) | ||||
|         self.assertEqual(got, expect) | ||||
|  | ||||
|         # Dataset spec is only url | ||||
|         model_card = {'datasets': ['https://huggingface.co/teknium/OpenHermes-2.5']} | ||||
|         expect = gguf.Metadata(datasets=[{'name': 'OpenHermes 2.5', 'organization': 'Teknium', 'version': '2.5', 'repo_url': 'https://huggingface.co/teknium/OpenHermes-2.5'}]) | ||||
|         got = gguf.Metadata.apply_metadata_heuristic(gguf.Metadata(), model_card, None, None) | ||||
|         self.assertEqual(got, expect) | ||||
|  | ||||
|         # Dataset spec is given directly | ||||
|         model_card = {'datasets': [{'name': 'OpenHermes 2.5', 'organization': 'Teknium', 'version': '2.5', 'repo_url': 'https://huggingface.co/teknium/OpenHermes-2.5'}]} | ||||
|         expect = gguf.Metadata(datasets=[{'name': 'OpenHermes 2.5', 'organization': 'Teknium', 'version': '2.5', 'repo_url': 'https://huggingface.co/teknium/OpenHermes-2.5'}]) | ||||
|         got = gguf.Metadata.apply_metadata_heuristic(gguf.Metadata(), model_card, None, None) | ||||
|         self.assertEqual(got, expect) | ||||
|  | ||||
|     def test_apply_metadata_heuristic_from_hf_parameters(self): | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Francis Couture-Harpin
					Francis Couture-Harpin