mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-27 08:21:30 +00:00
* model-conversion: add model card template for embeddings [no ci] This commit adds a separate model card template (model repository README.md template) for embedding models. The motivation for this is that there server command for the embedding model is a little different and some addition information can be useful in the model card for embedding models which might not be directly relevant for causal models. * squash! model-conversion: add model card template for embeddings [no ci] Fix pyright lint error. * remove --pooling override and clarify embd_normalize usage
173 lines
6.2 KiB
Makefile
173 lines
6.2 KiB
Makefile
# Validation functions
|
|
define validate_model_path
|
|
@if [ -z "$(MODEL_PATH)" ]; then \
|
|
echo "Error: MODEL_PATH must be provided either as:"; \
|
|
echo " 1. Environment variable: export MODEL_PATH=/path/to/model"; \
|
|
echo " 2. Command line argument: make $(1) MODEL_PATH=/path/to/model"; \
|
|
exit 1; \
|
|
fi
|
|
endef
|
|
|
|
define validate_embedding_model_path
|
|
@if [ -z "$(EMBEDDING_MODEL_PATH)" ]; then \
|
|
echo "Error: EMBEDDING_MODEL_PATH must be provided either as:"; \
|
|
echo " 1. Environment variable: export EMBEDDING_MODEL_PATH=/path/to/model"; \
|
|
echo " 2. Command line argument: make $(1) EMBEDDING_MODEL_PATH=/path/to/model"; \
|
|
exit 1; \
|
|
fi
|
|
endef
|
|
|
|
###
|
|
### Casual Model targets/recipes
|
|
###
|
|
causal-convert-model-bf16: OUTTYPE=bf16
|
|
causal-convert-model-bf16: causal-convert-model
|
|
|
|
causal-convert-model:
|
|
$(call validate_model_path,causal-convert-model)
|
|
@MODEL_NAME="$(MODEL_NAME)" OUTTYPE="$(OUTTYPE)" MODEL_PATH="$(MODEL_PATH)" \
|
|
METADATA_OVERRIDE="$(METADATA_OVERRIDE)" \
|
|
./scripts/causal/convert-model.sh
|
|
|
|
causal-run-original-model:
|
|
$(call validate_model_path,causal-run-original-model)
|
|
@MODEL_PATH="$(MODEL_PATH)" ./scripts/causal/run-org-model.py
|
|
|
|
causal-run-converted-model:
|
|
@CONVERTED_MODEL="$(CONVERTED_MODEL)" ./scripts/causal/run-converted-model.sh
|
|
|
|
causal-verify-logits: causal-run-original-model causal-run-converted-model
|
|
@./scripts/causal/compare-logits.py
|
|
@MODEL_PATH="$(MODEL_PATH)" ./scripts/utils/check-nmse.py -m ${MODEL_PATH}
|
|
|
|
causal-run-original-embeddings:
|
|
@./scripts/causal/run-casual-gen-embeddings-org.sh
|
|
|
|
causal-run-converted-embeddings:
|
|
@./scripts/causal/run-converted-model-embeddings-logits.sh
|
|
|
|
causal-verify-embeddings: causal-run-original-embeddings causal-run-converted-embeddings
|
|
@./scripts/causal/compare-embeddings-logits.sh
|
|
|
|
causal-inspect-original-model:
|
|
@./scripts/utils/inspect-org-model.py
|
|
|
|
causal-inspect-converted-model:
|
|
@./scripts/utils/inspect-converted-model.sh
|
|
|
|
causal-start-embedding-server:
|
|
@./scripts/utils/run-embedding-server.sh ${CONVERTED_MODEL}
|
|
|
|
causal-curl-embedding-endpoint: causal-run-original-embeddings
|
|
@./scripts/utils/curl-embedding-server.sh | ./scripts/causal/compare-embeddings-logits.sh
|
|
|
|
causal-quantize-Q8_0: QUANTIZED_TYPE = Q8_0
|
|
causal-quantize-Q8_0: causal-quantize-model
|
|
|
|
causal-quantize-Q4_0: QUANTIZED_TYPE = Q4_0
|
|
causal-quantize-Q4_0: causal-quantize-model
|
|
|
|
causal-quantize-model:
|
|
@CONVERTED_MODEL="$(CONVERTED_MODEL)" QUANTIZED_TYPE="$(QUANTIZED_TYPE)" ./scripts/utils/quantize.sh ${CONVERTED_MODEL} ${QUANTIZED_TYPE}
|
|
@echo "Export the quantized model path to QUANTIZED_MODEL variable in your environment"
|
|
|
|
causal-run-quantized-model:
|
|
@QUANTIZED_MODEL="$(QUANTIZED_MODEL)" ./scripts/causal/run-converted-model.sh ${QUANTIZED_MODEL}
|
|
|
|
|
|
###
|
|
### Embedding Model targets/recipes
|
|
###
|
|
|
|
embedding-convert-model-bf16: OUTTYPE=bf16
|
|
embedding-convert-model-bf16: embedding-convert-model
|
|
|
|
embedding-convert-model:
|
|
$(call validate_embedding_model_path,embedding-convert-model)
|
|
@MODEL_NAME="$(MODEL_NAME)" OUTTYPE="$(OUTTYPE)" MODEL_PATH="$(EMBEDDING_MODEL_PATH)" \
|
|
METADATA_OVERRIDE="$(METADATA_OVERRIDE)" \
|
|
./scripts/embedding/convert-model.sh
|
|
|
|
embedding-run-original-model:
|
|
$(call validate_embedding_model_path,embedding-run-original-model)
|
|
@EMBEDDING_MODEL_PATH="$(EMBEDDING_MODEL_PATH)" ./scripts/embedding/run-original-model.py
|
|
|
|
embedding-run-converted-model:
|
|
@CONVERTED_EMBEDDING_MODEL="$(CONVERTED_EMBEDDING_MODEL)" ./scripts/embedding/run-converted-model.sh ${CONVERTED_EMBEDDING_MODEL}
|
|
|
|
embedding-verify-logits: embedding-run-original-model embedding-run-converted-model
|
|
@./scripts/embedding/compare-embeddings-logits.sh
|
|
|
|
embedding-inspect-original-model:
|
|
$(call validate_embedding_model_path,embedding-inspect-original-model)
|
|
@EMBEDDING_MODEL_PATH="$(EMBEDDING_MODEL_PATH)" ./scripts/utils/inspect-org-model.py -m ${EMBEDDING_MODEL_PATH}
|
|
|
|
embedding-inspect-converted-model:
|
|
@CONVERTED_EMBEDDING_MODEL="$(CONVERTED_EMBEDDING_MODEL)" ./scripts/utils/inspect-converted-model.sh ${CONVERTED_EMBEDDING_MODEL}
|
|
|
|
embedding-start-embedding-server:
|
|
@./scripts/utils/run-embedding-server.sh ${CONVERTED_EMBEDDING_MODEL}
|
|
|
|
embedding-curl-embedding-endpoint:
|
|
@./scripts/utils/curl-embedding-server.sh | ./scripts/embedding/compare-embeddings-logits.sh
|
|
|
|
embedding-quantize-Q8_0: QUANTIZED_TYPE = Q8_0
|
|
embedding-quantize-Q8_0: embedding-quantize-model
|
|
|
|
embedding-quantize-Q4_0: QUANTIZED_TYPE = Q4_0
|
|
embedding-quantize-Q4_0: embedding-quantize-model
|
|
|
|
embedding-quantize-model:
|
|
@./scripts/utils/quantize.sh ${CONVERTED_EMBEDDING_MODEL} ${QUANTIZED_TYPE}
|
|
@echo "Export the quantized model path to QUANTIZED_EMBEDDING_MODEL variable in your environment"
|
|
|
|
embedding-run-quantized-model:
|
|
@./scripts/embedding/run-converted-model.sh ${QUANTIZED_EMBEDDING_MODEL}
|
|
|
|
###
|
|
### Perplexity targets/recipes
|
|
###
|
|
perplexity-data-gen:
|
|
CONVERTED_MODEL="$(CONVERTED_MODEL)" ./scripts/utils/perplexity-gen.sh
|
|
|
|
perplexity-run-full:
|
|
QUANTIZED_MODEL="$(QUANTIZED_MODEL)" LOOGITS_FILE="$(LOGITS_FILE)" \
|
|
./scripts/utils/perplexity-run.sh
|
|
|
|
perplexity-run:
|
|
QUANTIZED_MODEL="$(QUANTIZED_MODEL)" ./scripts/utils/perplexity-run-simple.sh
|
|
|
|
###
|
|
### HuggingFace targets/recipes
|
|
###
|
|
|
|
hf-create-model:
|
|
@./scripts/utils/hf-create-model.py -m "${MODEL_NAME}" -ns "${NAMESPACE}" -b "${ORIGINAL_BASE_MODEL}"
|
|
|
|
hf-create-model-dry-run:
|
|
@./scripts/utils/hf-create-model.py -m "${MODEL_NAME}" -ns "${NAMESPACE}" -b "${ORIGINAL_BASE_MODEL}" -d
|
|
|
|
hf-create-model-embedding:
|
|
@./scripts/utils/hf-create-model.py -m "${MODEL_NAME}" -ns "${NAMESPACE}" -b "${ORIGINAL_BASE_MODEL}" -e
|
|
|
|
hf-create-model-embedding-dry-run:
|
|
@./scripts/utils/hf-create-model.py -m "${MODEL_NAME}" -ns "${NAMESPACE}" -b "${ORIGINAL_BASE_MODEL}" -e -d
|
|
|
|
hf-create-model-private:
|
|
@./scripts/utils/hf-create-model.py -m "${MODEL_NAME}" -ns "${NAMESPACE}" -b "${ORIGINAL_BASE_MODEL}" -p
|
|
|
|
hf-upload-gguf-to-model:
|
|
@./scripts/utils/hf-upload-gguf-model.py -m "${MODEL_PATH}" -r "${REPO_ID}" -o "${NAME_IN_REPO}"
|
|
|
|
hf-create-collection:
|
|
@./scripts/utils/hf-create-collection.py -n "${NAME}" -d "${DESCRIPTION}" -ns "${NAMESPACE}"
|
|
|
|
hf-add-model-to-collection:
|
|
@./scripts/utils/hf-add-model-to-collection.py -c "${COLLECTION}" -m "${MODEL}"
|
|
|
|
|
|
.PHONY: clean
|
|
clean:
|
|
@${RM} -rf data .converted_embedding_model.txt .converted_model.txt .embedding_model_name.txt .model_name.txt
|
|
|