mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-27 08:21:30 +00:00
CUDA: Add fastdiv to k_bin_bcast*, giving 1-3% E2E performance (#15872)
* Add fastdiv and fastmodulo to k_bin_bcast kernel * Address review comments * `prod_` instead of `prod` suffix * Add test case for `k_bin_bcast_unravel` in CUDA backend
This commit is contained in:
@@ -6050,6 +6050,9 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
||||
add_test_bin_bcast(type, {10, 5, 4, 3}, {1, 2, 2, 2});
|
||||
add_test_bin_bcast(type, {10, 5, 4, 3}, {2, 2, 2, 2});
|
||||
|
||||
// test case for k_bin_bcast_unravel in CUDA backend
|
||||
add_test_bin_bcast(type, {1, 1, 65536, 1}, {256, 1, 1, 1});
|
||||
|
||||
// stable diffusion
|
||||
add_test_bin_bcast(type, {1280, 1, 1, 1}, {1, 1, 1, 1});
|
||||
add_test_bin_bcast(type, {1280, 1, 1, 1}, {1, 16, 16, 1});
|
||||
|
||||
Reference in New Issue
Block a user