CUDA: better error for FA kernel with 0 occupancy (#16643)

2025-10-28 08:31:25 +00:00 · 2025-10-21 15:27:53 +02:00
parent 4926419c4d
commit 51d1a8c997
1 changed files with 1 additions and 0 deletions
--- a/ggml/src/ggml-cuda/fattn-common.cuh
+++ b/ggml/src/ggml-cuda/fattn-common.cuh
@@ -895,6 +895,7 @@ void launch_fattn(
    const dim3 block_dim(warp_size, nwarps, 1);
    int max_blocks_per_sm = 1; // Max. number of active blocks limited by occupancy.
    CUDA_CHECK(cudaOccupancyMaxActiveBlocksPerMultiprocessor(&max_blocks_per_sm, fattn_kernel, block_dim.x * block_dim.y * block_dim.z, nbytes_shared));
    GGML_ASSERT(max_blocks_per_sm > 0);
    int parallel_blocks = max_blocks_per_sm;
    dim3 blocks_num;