parallel : add disabled experimental batch chunking in powers of two

2025-11-03 09:22:01 +00:00 · 2023-09-20 20:14:05 +03:00
parent ded9b43cad
commit b2debf65f2
1 changed files with 7 additions and 0 deletions
--- a/examples/parallel/parallel.cpp
+++ b/examples/parallel/parallel.cpp
@@ -253,6 +253,13 @@ int main(int argc, char ** argv) {
        int32_t n_batch = params.n_batch;
        for (int32_t i = 0; i < (int32_t) batch.n_tokens; i += n_batch) {
            // experiment: process in powers of 2
            //if (i + n_batch > (int32_t) batch.n_tokens && n_batch > 32) {
            //    n_batch /= 2;
            //    i -= n_batch;
            //    continue;
            //}
            const int32_t n_tokens = std::min(n_batch, (int32_t) (batch.n_tokens - i));
            llama_batch batch_view = {