model : jina-embeddings-v3 support (#13693)

* initial jina-embeddings-v3 support

* initial jina-embeddings-v3 support

* initial jina-embeddings-v3 support

* fix vocab parsing with only tokenizer.json

* set mask token lstrip attribute

* additional unk_token_id fallback just in case [no ci]

* revert vocab_size() change [no ci]

* merge tensor loading into general bert

* rope

* add lora embedding and loading (non-functional)

* export separate lora ggufs instead

* add adapter metadata api

* use std::string

* convert_hf_to_lora compatibility

* fix assert

* apply suggestions from review

* apply suggestion from review
This commit is contained in:
Sigbjørn Skjæret
2025-08-28 15:49:50 +02:00
committed by GitHub
parent 55042b3692
commit 84ab83cc0b
14 changed files with 245 additions and 23 deletions

View File

@@ -553,6 +553,24 @@ extern "C" {
struct llama_model * model,
const char * path_lora);
// Functions to access the adapter's GGUF metadata scalar values
// - The functions return the length of the string on success, or -1 on failure
// - The output string is always null-terminated and cleared on failure
// - When retrieving a string, an extra byte must be allocated to account for the null terminator
// - GGUF array values are not supported by these functions
// Get metadata value as a string by key name
LLAMA_API int32_t llama_adapter_meta_val_str(const struct llama_adapter_lora * adapter, const char * key, char * buf, size_t buf_size);
// Get the number of metadata key/value pairs
LLAMA_API int32_t llama_adapter_meta_count(const struct llama_adapter_lora * adapter);
// Get metadata key name by index
LLAMA_API int32_t llama_adapter_meta_key_by_index(const struct llama_adapter_lora * adapter, int32_t i, char * buf, size_t buf_size);
// Get metadata value as a string by index
LLAMA_API int32_t llama_adapter_meta_val_str_by_index(const struct llama_adapter_lora * adapter, int32_t i, char * buf, size_t buf_size);
// Manually free a LoRA adapter
// Note: loaded adapters will be free when the associated model is deleted
LLAMA_API void llama_adapter_lora_free(struct llama_adapter_lora * adapter);