mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-10-31 08:51:55 +00:00
metal : pad K, V and Mask when needed
This commit is contained in:
@@ -6627,7 +6627,8 @@ static std::vector<std::unique_ptr<test_case>> make_test_cases_eval() {
|
||||
if (hsk > 64 && nr3 > 1) continue; // skip broadcast for large head sizes
|
||||
for (int nr2 : { 1, 4, 16 }) {
|
||||
if (nr2 == 16 && hsk != 128) continue;
|
||||
for (int kv : { 512, 1024, }) {
|
||||
//for (int kv : { 1, 17, 31, 33, 61, 113, 65, 127, 129, 130, 255, 260, 371, 380, 407, 512, 1024, }) {
|
||||
for (int kv : { 113, 512, 1024, }) {
|
||||
if (nr2 != 1 && kv != 512) continue;
|
||||
for (int nb : { 1, 3, 32, 35, }) {
|
||||
for (ggml_prec prec : {GGML_PREC_F32, GGML_PREC_DEFAULT}) {
|
||||
|
||||
Reference in New Issue
Block a user