mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-27 08:21:30 +00:00
sampling : optimize samplers by reusing bucket sort (#15665)
* sampling : optimize sorting using bucket sort in more places ggml-ci * sampling : do not sort in dist sampler ggml-ci * sampling : avoid heap allocations for sort buffers ggml-ci * common : add option to sort sampling candidates by probability ggml-ci * sampling : revert the change for preserving sort buffers * sampling : use std::copy instead of memcpy * sampling : clarify purpose of partial sort helpers ggml-ci * cont : remove wrong comment [no ci] * common : update comment Co-authored-by: Johannes Gäßler <johannesg@5d6.de> --------- Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
This commit is contained in:
@@ -2550,11 +2550,12 @@ struct server_context {
|
||||
return slot.has_next_token; // continue
|
||||
}
|
||||
|
||||
void populate_token_probs(const server_slot & slot, completion_token_output & result, bool post_sampling, bool special, int idx) {
|
||||
void populate_token_probs(const server_slot & slot, completion_token_output & result, bool post_sampling, bool special, int idx) const {
|
||||
size_t n_probs = slot.params.sampling.n_probs;
|
||||
size_t n_vocab = llama_vocab_n_tokens(vocab);
|
||||
|
||||
if (post_sampling) {
|
||||
const auto * cur_p = common_sampler_get_candidates(slot.smpl);
|
||||
const auto * cur_p = common_sampler_get_candidates(slot.smpl, true);
|
||||
const size_t max_probs = cur_p->size;
|
||||
|
||||
// set probability for sampled token
|
||||
|
||||
@@ -895,7 +895,7 @@ lovely<|t_0.56|><|code_start|><|634|><|596|><|1766|><|1556|><|1306|><|1285|><|14
|
||||
|
||||
codes.push_back(new_token_id);
|
||||
|
||||
const auto * cands = common_sampler_get_candidates(smpl[i]);
|
||||
const auto * cands = common_sampler_get_candidates(smpl[i], false);
|
||||
|
||||
// is it an end of generation? -> mark the stream as finished
|
||||
if (llama_vocab_is_eog(vocab, new_token_id) || n_decode == n_predict) {
|
||||
|
||||
Reference in New Issue
Block a user