sampling : optimize samplers by reusing bucket sort (#15665)

* sampling : optimize sorting using bucket sort in more places

ggml-ci

* sampling : do not sort in dist sampler

ggml-ci

* sampling : avoid heap allocations for sort buffers

ggml-ci

* common : add option to sort sampling candidates by probability

ggml-ci

* sampling : revert the change for preserving sort buffers

* sampling : use std::copy instead of memcpy

* sampling : clarify purpose of partial sort helpers

ggml-ci

* cont : remove wrong comment [no ci]

* common : update comment

Co-authored-by: Johannes Gäßler <johannesg@5d6.de>

---------

Co-authored-by: Johannes Gäßler <johannesg@5d6.de>
This commit is contained in:
Georgi Gerganov
2025-08-31 20:41:02 +03:00
committed by GitHub
parent 0d161f021a
commit e92d53b29e
9 changed files with 227 additions and 171 deletions

View File

@@ -244,7 +244,7 @@ int main(int argc, char ** argv) {
// stochastic verification
common_sampler_sample(smpl, ctx_tgt, drafts[s_keep].i_batch_tgt[i_dft], true);
auto & dist_tgt = *common_sampler_get_candidates(smpl);
auto & dist_tgt = *common_sampler_get_candidates(smpl, true);
float p_tgt = 0.0f;
float p_dft = 0.0f;
@@ -493,7 +493,7 @@ int main(int argc, char ** argv) {
common_sampler_sample(drafts[s].smpl, ctx_dft, drafts[s].i_batch_dft, true);
const auto * cur_p = common_sampler_get_candidates(drafts[s].smpl);
const auto * cur_p = common_sampler_get_candidates(drafts[s].smpl, true);
for (int k = 0; k < std::min(n_seq_dft + 3, (int) cur_p->size); ++k) {
LOG_DBG(" - draft candidate %3d for seq %3d, pos %3d: %6d (%8.3f) '%s'\n",