diff --git a/ggml/src/ggml-alloc.c b/ggml/src/ggml-alloc.c index c830c09655..91aff205f1 100644 --- a/ggml/src/ggml-alloc.c +++ b/ggml/src/ggml-alloc.c @@ -226,16 +226,23 @@ static struct buffer_address ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * al } if (best_fit_block == -1) { - // no suitable block found, try the last block (this will grow a chunks size) + // no suitable block found, try the last block (this may grow a chunks size) + int64_t best_reuse = INT64_MIN; for (int c = 0; c < alloc->n_chunks; ++c) { struct tallocr_chunk * chunk = alloc->chunks[c]; if (chunk->n_free_blocks > 0) { struct free_block * block = &chunk->free_blocks[chunk->n_free_blocks - 1]; max_avail = MAX(max_avail, block->size); - if (block->size >= size) { + int64_t reuse_factor = chunk->max_size - block->offset - size; + // reuse_factor < 0 : amount of extra memory that needs to be allocated + // reuse_factor = 0 : allocated free space exactly matches tensor size + // reuse_factor > 0 : superfluous memory that will remain unused + bool better_reuse = best_reuse < 0 && reuse_factor > best_reuse; + bool better_fit = reuse_factor >= 0 && reuse_factor < best_reuse; + if (block->size >= size && (better_reuse || better_fit)) { best_fit_chunk = c; best_fit_block = chunk->n_free_blocks - 1; - break; + best_reuse = reuse_factor; } } } @@ -268,7 +275,7 @@ static struct buffer_address ggml_dyn_tallocr_alloc(struct ggml_dyn_tallocr * al #ifdef GGML_ALLOCATOR_DEBUG add_allocated_tensor(alloc, addr, tensor); size_t cur_max = addr.offset + size; - if (cur_max > alloc->max_size[addr.chunk]) { + if (cur_max > chunk->max_size) { // sort allocated_tensors by chunk/offset for (int i = 0; i < 1024; i++) { for (int j = i + 1; j < 1024; j++) {