mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-15 11:17:31 +00:00
automatically calculate compute buffer sizes (without graph allocator)
This commit is contained in:
@@ -1726,7 +1726,7 @@ static ggml_backend_buffer * ggml_backend_cuda_alloc_buffer(ggml_backend * backe
|
||||
void * data;
|
||||
CUDA_CHECK(cudaMalloc(&data, size));
|
||||
|
||||
ggml_backend_buffer * buffer = ggml_allocator_simple_init(data, size, TENSOR_ALIGNMENT);
|
||||
ggml_backend_buffer * buffer = ggml_allocator_default_init(data, size, TENSOR_ALIGNMENT);
|
||||
buffer->interface.free_data = ggml_backend_cuda_free_buffer;
|
||||
buffer->backend_data = data;
|
||||
|
||||
|
||||
Reference in New Issue
Block a user