CUDA: fix bad asserts for partial offload (#13337)

This commit is contained in:
Johannes Gäßler
2025-05-06 13:58:51 +02:00
committed by GitHub
parent 764b85627b
commit 2356fb1d53
6 changed files with 21 additions and 6 deletions

View File

@@ -515,7 +515,8 @@ void ggml_cuda_mul_mat_vec_q(
// If src0 is a temporary compute buffer, clear any potential padding.
if (ggml_backend_buffer_get_usage(src0->buffer) == GGML_BACKEND_BUFFER_USAGE_COMPUTE) {
GGML_ASSERT(ggml_is_contiguous(src0));
GGML_ASSERT(ggml_is_contiguously_allocated(src0));
GGML_ASSERT(!src0->view_src);
const size_t size_data = ggml_nbytes(src0);
const size_t size_alloc = ggml_backend_buffer_get_alloc_size(src0->buffer, src0);
if (size_alloc > size_data) {