mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-27 08:21:30 +00:00
model-conversion : add model card template for embeddings [no ci] (#15557)
* model-conversion: add model card template for embeddings [no ci] This commit adds a separate model card template (model repository README.md template) for embedding models. The motivation for this is that there server command for the embedding model is a little different and some addition information can be useful in the model card for embedding models which might not be directly relevant for causal models. * squash! model-conversion: add model card template for embeddings [no ci] Fix pyright lint error. * remove --pooling override and clarify embd_normalize usage
This commit is contained in:
@@ -144,6 +144,15 @@ perplexity-run:
|
||||
hf-create-model:
|
||||
@./scripts/utils/hf-create-model.py -m "${MODEL_NAME}" -ns "${NAMESPACE}" -b "${ORIGINAL_BASE_MODEL}"
|
||||
|
||||
hf-create-model-dry-run:
|
||||
@./scripts/utils/hf-create-model.py -m "${MODEL_NAME}" -ns "${NAMESPACE}" -b "${ORIGINAL_BASE_MODEL}" -d
|
||||
|
||||
hf-create-model-embedding:
|
||||
@./scripts/utils/hf-create-model.py -m "${MODEL_NAME}" -ns "${NAMESPACE}" -b "${ORIGINAL_BASE_MODEL}" -e
|
||||
|
||||
hf-create-model-embedding-dry-run:
|
||||
@./scripts/utils/hf-create-model.py -m "${MODEL_NAME}" -ns "${NAMESPACE}" -b "${ORIGINAL_BASE_MODEL}" -e -d
|
||||
|
||||
hf-create-model-private:
|
||||
@./scripts/utils/hf-create-model.py -m "${MODEL_NAME}" -ns "${NAMESPACE}" -b "${ORIGINAL_BASE_MODEL}" -p
|
||||
|
||||
|
||||
@@ -285,13 +285,21 @@ For the following targets a `HF_TOKEN` environment variable is required.
|
||||
This will create a new model repsository on Hugging Face with the specified
|
||||
model name.
|
||||
```console
|
||||
(venv) $ make hf-create-model MODEL_NAME='TestModel' NAMESPACE="danbev"
|
||||
(venv) $ make hf-create-model MODEL_NAME='TestModel' NAMESPACE="danbev" ORIGINAL_BASE_MODEL="some-base-model"
|
||||
Repository ID: danbev/TestModel-GGUF
|
||||
Repository created: https://huggingface.co/danbev/TestModel-GGUF
|
||||
```
|
||||
Note that we append a `-GGUF` suffix to the model name to ensure a consistent
|
||||
naming convention for GGUF models.
|
||||
|
||||
An embedding model can be created using the following command:
|
||||
```console
|
||||
(venv) $ make hf-create-model-embedding MODEL_NAME='TestEmbeddingModel' NAMESPACE="danbev" ORIGINAL_BASE_MODEL="some-base-model"
|
||||
```
|
||||
The only difference is that the model card for an embedding model will be different
|
||||
with regards to the llama-server command and also how to access/call the embedding
|
||||
endpoint.
|
||||
|
||||
### Upload a GGUF model to model repository
|
||||
The following target uploads a model to an existing Hugging Face model repository.
|
||||
```console
|
||||
|
||||
@@ -0,0 +1,48 @@
|
||||
---
|
||||
base_model:
|
||||
- {base_model}
|
||||
---
|
||||
# {model_name} GGUF
|
||||
|
||||
Recommended way to run this model:
|
||||
|
||||
```sh
|
||||
llama-server -hf {namespace}/{model_name}-GGUF
|
||||
```
|
||||
|
||||
Then the endpoint can be accessed at http://localhost:8080/embedding, for
|
||||
example using `curl`:
|
||||
```console
|
||||
curl --request POST \
|
||||
--url http://localhost:8080/embedding \
|
||||
--header "Content-Type: application/json" \
|
||||
--data '{{"input": "Hello embeddings"}}' \
|
||||
--silent
|
||||
```
|
||||
|
||||
Alternatively, the `llama-embedding` command line tool can be used:
|
||||
```sh
|
||||
llama-embedding -hf {namespace}/{model_name}-GGUF --verbose-prompt -p "Hello embeddings"
|
||||
```
|
||||
|
||||
#### embd_normalize
|
||||
When a model uses pooling, or the pooling method is specified using `--pooling`,
|
||||
the normalization can be controlled by the `embd_normalize` parameter.
|
||||
|
||||
The default value is `2` which means that the embeddings are normalized using
|
||||
the Euclidean norm (L2). Other options are:
|
||||
* -1 No normalization
|
||||
* 0 Max absolute
|
||||
* 1 Taxicab
|
||||
* 2 Euclidean/L2
|
||||
* \>2 P-Norm
|
||||
|
||||
This can be passed in the request body to `llama-server`, for example:
|
||||
```sh
|
||||
--data '{{"input": "Hello embeddings", "embd_normalize": -1}}' \
|
||||
```
|
||||
|
||||
And for `llama-embedding`, by passing `--embd-normalize <value>`, for example:
|
||||
```sh
|
||||
llama-embedding -hf {namespace}/{model_name}-GGUF --embd-normalize -1 -p "Hello embeddings"
|
||||
```
|
||||
@@ -26,21 +26,31 @@ parser.add_argument('--namespace', '-ns', help='Namespace to add the model to',
|
||||
parser.add_argument('--org-base-model', '-b', help='Original Base model name', default="")
|
||||
parser.add_argument('--no-card', action='store_true', help='Skip creating model card')
|
||||
parser.add_argument('--private', '-p', action='store_true', help='Create private model')
|
||||
parser.add_argument('--embedding', '-e', action='store_true', help='Use embedding model card template')
|
||||
parser.add_argument('--dry-run', '-d', action='store_true', help='Print repository info and template without creating repository')
|
||||
|
||||
args = parser.parse_args()
|
||||
|
||||
repo_id = f"{args.namespace}/{args.model_name}-GGUF"
|
||||
print("Repository ID: ", repo_id)
|
||||
|
||||
repo_url = api.create_repo(
|
||||
repo_id=repo_id,
|
||||
repo_type="model",
|
||||
private=args.private,
|
||||
exist_ok=False
|
||||
)
|
||||
repo_url = None
|
||||
if not args.dry_run:
|
||||
repo_url = api.create_repo(
|
||||
repo_id=repo_id,
|
||||
repo_type="model",
|
||||
private=args.private,
|
||||
exist_ok=False
|
||||
)
|
||||
|
||||
if not args.no_card:
|
||||
template_path = "scripts/readme.md.template"
|
||||
if args.embedding:
|
||||
template_path = "scripts/embedding/modelcard.template"
|
||||
else:
|
||||
template_path = "scripts/causal/modelcard.template"
|
||||
|
||||
print("Template path: ", template_path)
|
||||
|
||||
model_card_content = load_template_and_substitute(
|
||||
template_path,
|
||||
model_name=args.model_name,
|
||||
@@ -48,16 +58,21 @@ if not args.no_card:
|
||||
base_model=args.org_base_model,
|
||||
)
|
||||
|
||||
if model_card_content:
|
||||
api.upload_file(
|
||||
path_or_fileobj=model_card_content.encode('utf-8'),
|
||||
path_in_repo="README.md",
|
||||
repo_id=repo_id
|
||||
)
|
||||
print("Model card created successfully.")
|
||||
if args.dry_run:
|
||||
print("\nTemplate Content:\n")
|
||||
print(model_card_content)
|
||||
else:
|
||||
print("Failed to create model card.")
|
||||
if model_card_content:
|
||||
api.upload_file(
|
||||
path_or_fileobj=model_card_content.encode('utf-8'),
|
||||
path_in_repo="README.md",
|
||||
repo_id=repo_id
|
||||
)
|
||||
print("Model card created successfully.")
|
||||
else:
|
||||
print("Failed to create model card.")
|
||||
|
||||
print(f"Repository created: {repo_url}")
|
||||
if not args.dry_run and repo_url:
|
||||
print(f"Repository created: {repo_url}")
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user