mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	ggml: fix CUDA grid launch condition for large block_nums.y in binbcast (#16742)
* Fix CUDA grid launch condition for large block_nums.y * add backend ops test * reduce test repetitions
This commit is contained in:
		| @@ -272,7 +272,7 @@ static void launch_bin_bcast_pack(const ggml_tensor * src0, const ggml_tensor * | |||||||
|         const uint3 ne12 = init_fastdiv_values((uint32_t) cne1[2]); |         const uint3 ne12 = init_fastdiv_values((uint32_t) cne1[2]); | ||||||
|         const uint3 ne13 = init_fastdiv_values((uint32_t) cne1[3]); |         const uint3 ne13 = init_fastdiv_values((uint32_t) cne1[3]); | ||||||
|  |  | ||||||
|         if (block_nums.z > 65535) { |         if (block_nums.z > 65535 || block_nums.y > 65535) { | ||||||
|             int         block_num  = (ne0 * ne1 * ne2 * ne3 + block_size - 1) / block_size; |             int         block_num  = (ne0 * ne1 * ne2 * ne3 + block_size - 1) / block_size; | ||||||
|             const uint3 prod_012    = init_fastdiv_values((uint32_t) (ne0 * ne1 * ne2)); |             const uint3 prod_012    = init_fastdiv_values((uint32_t) (ne0 * ne1 * ne2)); | ||||||
|             const uint3 prod_01     = init_fastdiv_values((uint32_t) (ne0 * ne1)); |             const uint3 prod_01     = init_fastdiv_values((uint32_t) (ne0 * ne1)); | ||||||
|   | |||||||
| @@ -6407,6 +6407,7 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() { | |||||||
|         add_test_bin_bcast(type, {1, 1, 640, 1}, {32, 32, 1, 1}); |         add_test_bin_bcast(type, {1, 1, 640, 1}, {32, 32, 1, 1}); | ||||||
|         add_test_bin_bcast(type, {5120, 1, 1, 1}, {1, 256, 1, 1}); |         add_test_bin_bcast(type, {5120, 1, 1, 1}, {1, 256, 1, 1}); | ||||||
|         add_test_bin_bcast(type, {640, 1, 1, 1}, {1, 1, 1, 1}); |         add_test_bin_bcast(type, {640, 1, 1, 1}, {1, 1, 1, 1}); | ||||||
|  |         add_test_bin_bcast(type, {64, 262144, 1, 1}, {1, 1, 1, 1}); | ||||||
|         //add_test_bin_bcast(type, {3, 3, 2560, 1280}, {1, 1, 1, 1}); |         //add_test_bin_bcast(type, {3, 3, 2560, 1280}, {1, 1, 1, 1}); | ||||||
|         //add_test_bin_bcast(type, {3, 3, 2560, 1280}, {2, 1, 1, 1}); |         //add_test_bin_bcast(type, {3, 3, 2560, 1280}, {2, 1, 1, 1}); | ||||||
|     } |     } | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 leejet
					leejet