mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	embedding: add raw option for --embd-output-format (#16541)
* Add --embd-output-format raw for plain numeric embedding output This new option outputs embeddings as raw space-separated floats, without JSON or 'embedding N:' prefixes. Useful for downstream vector pipelines and scripting. * Move raw output handling into format handling section * Move raw output handling into else-if block with other format handlers * Use LOG instead of printf for raw embedding output * docs: document 'raw' embedding output format in arg.cpp and README
This commit is contained in:
		@@ -3248,7 +3248,7 @@ common_params_context common_params_parser_init(common_params & params, llama_ex
 | 
			
		||||
    ).set_examples({LLAMA_EXAMPLE_EMBEDDING}));
 | 
			
		||||
    add_opt(common_arg(
 | 
			
		||||
        {"--embd-output-format"}, "FORMAT",
 | 
			
		||||
        "empty = default, \"array\" = [[],[]...], \"json\" = openai style, \"json+\" = same \"json\" + cosine similarity matrix",
 | 
			
		||||
        "empty = default, \"array\" = [[],[]...], \"json\" = openai style, \"json+\" = same \"json\" + cosine similarity matrix, \"raw\" = plain whitespace-delimited output (one embedding per line)",
 | 
			
		||||
        [](common_params & params, const std::string & value) {
 | 
			
		||||
            params.embd_out = value;
 | 
			
		||||
        }
 | 
			
		||||
 
 | 
			
		||||
@@ -38,6 +38,7 @@ The above command will output space-separated float values.
 | 
			
		||||
|            | multiple embeddings          | $[[x_1,...,x_n],[x_1,...,x_n],...,[x_1,...,x_n]]$
 | 
			
		||||
| 'json'     | openai style                 |
 | 
			
		||||
| 'json+'    | add cosine similarity matrix |
 | 
			
		||||
| 'raw'      | plain text output            |
 | 
			
		||||
 | 
			
		||||
### --embd-separator $"string"$
 | 
			
		||||
| $"string"$   | |
 | 
			
		||||
 
 | 
			
		||||
@@ -70,6 +70,29 @@ static void batch_decode(llama_context * ctx, llama_batch & batch, float * outpu
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
// plain, pipe-friendly output: one embedding per line
 | 
			
		||||
static void print_raw_embeddings(const float * emb,
 | 
			
		||||
                                 int n_embd_count,
 | 
			
		||||
                                 int n_embd,
 | 
			
		||||
                                 const llama_model * model,
 | 
			
		||||
                                 enum llama_pooling_type pooling_type,
 | 
			
		||||
                                 int embd_normalize) {
 | 
			
		||||
    const uint32_t n_cls_out = llama_model_n_cls_out(model);
 | 
			
		||||
    const bool is_rank = (pooling_type == LLAMA_POOLING_TYPE_RANK);
 | 
			
		||||
    const int cols = is_rank ? std::min<int>(n_embd, (int) n_cls_out) : n_embd;
 | 
			
		||||
 | 
			
		||||
    for (int j = 0; j < n_embd_count; ++j) {
 | 
			
		||||
        for (int i = 0; i < cols; ++i) {
 | 
			
		||||
            if (embd_normalize == 0) {
 | 
			
		||||
                LOG("%1.0f%s", emb[j * n_embd + i], (i + 1 < cols ? " " : ""));
 | 
			
		||||
            } else {
 | 
			
		||||
                LOG("%1.7f%s", emb[j * n_embd + i], (i + 1 < cols ? " " : ""));
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
        LOG("\n");
 | 
			
		||||
    }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
int main(int argc, char ** argv) {
 | 
			
		||||
    common_params params;
 | 
			
		||||
 | 
			
		||||
@@ -372,6 +395,8 @@ int main(int argc, char ** argv) {
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if (notArray) LOG("\n}\n");
 | 
			
		||||
    } else if (params.embd_out == "raw") {
 | 
			
		||||
        print_raw_embeddings(emb, n_embd_count, n_embd, model, pooling_type, params.embd_normalize);
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    LOG("\n");
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user