mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-27 08:21:30 +00:00
cuda : fix tensor size calculation for non-split buffer (#5145)
This commit is contained in:
@@ -30,7 +30,9 @@ size_t ggml_backend_buft_get_alignment(ggml_backend_buffer_type_t buft) {
|
||||
GGML_CALL size_t ggml_backend_buft_get_alloc_size(ggml_backend_buffer_type_t buft, struct ggml_tensor * tensor) {
|
||||
// get_alloc_size is optional, defaults to ggml_nbytes
|
||||
if (buft->iface.get_alloc_size) {
|
||||
return buft->iface.get_alloc_size(buft, tensor);
|
||||
size_t size = buft->iface.get_alloc_size(buft, tensor);
|
||||
assert(size >= ggml_nbytes(tensor));
|
||||
return size;
|
||||
}
|
||||
return ggml_nbytes(tensor);
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user