mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-27 08:21:30 +00:00
vulkan: handle mat_mul with A matrix > 4GB (#16176)
* vulkan: handle mat_mul with A matrix > 4GB This change splits mat_mul operations with huge A matrix into chunks in the M dimension. This works well for stable-diffusion use cases where the im2col matrix has very large M. Fix the order of setting the stride in mul_mm_cm2 - setting the dimension clobbers the stride, so stride should be set after. * build fixes
This commit is contained in:
@@ -6200,6 +6200,14 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
||||
test_cases.emplace_back(new test_gla(GGML_TYPE_F32, 32, 64, 32, 4));
|
||||
test_cases.emplace_back(new test_gla(GGML_TYPE_F32, 32, 64, 128, 4));
|
||||
|
||||
#if 0
|
||||
// > 4GB A matrix. Too slow to be enabled by default.
|
||||
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F16, 900000, 3, 2592, {1, 1}, {1, 1}));
|
||||
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F16, 1700000, 96, 2592, {1, 1}, {1, 1}));
|
||||
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F16, 1700000, 3, 2592, {1, 1}, {1, 1}));
|
||||
test_cases.emplace_back(new test_mul_mat(GGML_TYPE_F16, GGML_TYPE_F16, 1700000, 1, 2592, {1, 1}, {1, 1}));
|
||||
#endif
|
||||
|
||||
for (ggml_type type_a : all_types) {
|
||||
for (int i = 1; i < 10; ++i) {
|
||||
test_cases.emplace_back(new test_mul_mat(type_a, GGML_TYPE_F32, 16, i, 256, { 1, 1}, {1, 1}));
|
||||
|
||||
Reference in New Issue
Block a user