From 55945d2ef51b93821d4b6f4a9b994393344a90db Mon Sep 17 00:00:00 2001 From: leejet Date: Sat, 25 Oct 2025 03:39:37 +0800 Subject: [PATCH] ggml: fix CUDA grid launch condition for large block_nums.y in binbcast (#16742) * Fix CUDA grid launch condition for large block_nums.y * add backend ops test * reduce test repetitions --- ggml/src/ggml-cuda/binbcast.cu | 2 +- tests/test-backend-ops.cpp | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/ggml/src/ggml-cuda/binbcast.cu b/ggml/src/ggml-cuda/binbcast.cu index 6024010274..0e6d777b1e 100644 --- a/ggml/src/ggml-cuda/binbcast.cu +++ b/ggml/src/ggml-cuda/binbcast.cu @@ -272,7 +272,7 @@ static void launch_bin_bcast_pack(const ggml_tensor * src0, const ggml_tensor * const uint3 ne12 = init_fastdiv_values((uint32_t) cne1[2]); const uint3 ne13 = init_fastdiv_values((uint32_t) cne1[3]); - if (block_nums.z > 65535) { + if (block_nums.z > 65535 || block_nums.y > 65535) { int block_num = (ne0 * ne1 * ne2 * ne3 + block_size - 1) / block_size; const uint3 prod_012 = init_fastdiv_values((uint32_t) (ne0 * ne1 * ne2)); const uint3 prod_01 = init_fastdiv_values((uint32_t) (ne0 * ne1)); diff --git a/tests/test-backend-ops.cpp b/tests/test-backend-ops.cpp index 991c625979..9eb2b66879 100644 --- a/tests/test-backend-ops.cpp +++ b/tests/test-backend-ops.cpp @@ -6407,6 +6407,7 @@ static std::vector> make_test_cases_eval() { add_test_bin_bcast(type, {1, 1, 640, 1}, {32, 32, 1, 1}); add_test_bin_bcast(type, {5120, 1, 1, 1}, {1, 256, 1, 1}); add_test_bin_bcast(type, {640, 1, 1, 1}, {1, 1, 1, 1}); + add_test_bin_bcast(type, {64, 262144, 1, 1}, {1, 1, 1, 1}); //add_test_bin_bcast(type, {3, 3, 2560, 1280}, {1, 1, 1, 1}); //add_test_bin_bcast(type, {3, 3, 2560, 1280}, {2, 1, 1, 1}); }