mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-29 08:41:22 +00:00 
			
		
		
		
	llama : add Jina Embeddings architecture (#6826)
* feat: first things to do * feat: create tensors for Jina architecture * fix: use other tensors * feat: embedding gets results * fix: fix usage of ALIBI * fix: clean prints * fix: do some cleanup unused vars * fix: revert changes to Makefile and CMakeLists * fix: revert some changes * fix: fix small detail * fix: fix convert formatting * fix: fix linting and editor * feat: set proper vocab settings * fix: JinaBertForMaskedLM registration * feat: support q_normalization and k_normalization in Jina arch * feat: handle gpt2 tokenizer with Jina architecture * feat: example comments in embedding * feat: rename Jina Bert to Jina Bert V2 * fix: add some changes as per review * feat: proper KQ_pos for Jina embeddings * feat: add capacity to load models ES and DE for Spanish * llama : fix pre-tokenizers * ggml : full ALiBi support * ggml : update ggml_soft_max_ext() CUDA, SYCL * ggml : ggml_flash_attn_ext() support ALiBi (CPU) * ggml : ggml_flash_attn_ext() support ALiBi (Metal) * ggml : fix warning * ggml : ggml_flash_attn_ext() support ALiBi (CUDA) ggml-ci * minor : clean-up * embedding : add warning about missing SEP --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
This commit is contained in:
		| @@ -49,6 +49,12 @@ static void batch_decode(llama_context * ctx, llama_batch & batch, float * outpu | ||||
|         } | ||||
|  | ||||
|         float * out = output + batch.seq_id[i][0] * n_embd; | ||||
|         //TODO: I would also add a parameter here to enable normalization or not. | ||||
|         /*fprintf(stdout, "unnormalized_embedding:"); | ||||
|         for (int hh = 0; hh < n_embd; hh++) { | ||||
|             fprintf(stdout, "%9.6f ", embd[hh]); | ||||
|         } | ||||
|         fprintf(stdout, "\n");*/ | ||||
|         llama_embd_normalize(embd, out, n_embd); | ||||
|     } | ||||
| } | ||||
| @@ -123,10 +129,12 @@ int main(int argc, char ** argv) { | ||||
|         inputs.push_back(inp); | ||||
|     } | ||||
|  | ||||
|     // add SEP if not present | ||||
|     // check if the last token is SEP | ||||
|     // it should be automatically added by the tokenizer when 'tokenizer.ggml.add_eos_token' is set to 'true' | ||||
|     for (auto & inp : inputs) { | ||||
|         if (inp.empty() || inp.back() != llama_token_sep(model)) { | ||||
|             inp.push_back(llama_token_sep(model)); | ||||
|             fprintf(stderr, "%s: warning: last token in the prompt is not SEP\n", __func__); | ||||
|             fprintf(stderr, "%s:          'tokenizer.ggml.add_eos_token' should be set to 'true' in the GGUF header\n", __func__); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Joan Fontanals
					Joan Fontanals