diff --git a/ggml/src/ggml-cpu/repack.cpp b/ggml/src/ggml-cpu/repack.cpp index 8da1e0e924..8421c84ce0 100644 --- a/ggml/src/ggml-cpu/repack.cpp +++ b/ggml/src/ggml-cpu/repack.cpp @@ -1678,10 +1678,24 @@ template 0 && (nr / nchunk) < min_chunk_size && nr >= min_chunk_size) { + nchunk = (nr + min_chunk_size - 1) / min_chunk_size; + } + if (nth == 1 || nchunk < nth || disable_chunking) { nchunk = nth; } + // Ensure nchunk doesn't exceed the number of rows divided by minimum chunk size + // This prevents creating too many tiny chunks that could overlap after alignment + const int64_t max_nchunk = (nr + min_chunk_size - 1) / min_chunk_size; + if (nchunk > max_nchunk) { + nchunk = max_nchunk; + } + if (ith == 0) { // Every thread starts at ith, so the first unprocessed chunk is nth. This save a bit of coordination right at the start. ggml_threadpool_chunk_set(params->threadpool, nth); @@ -1695,8 +1709,15 @@ template ne01) { + src0_end = ne01; + } + if (src0_start >= src0_end) { break; } @@ -1808,8 +1829,12 @@ template ne01) { + src0_cur_end = ne01; + } if (src0_cur_start >= src0_cur_end) { return;