mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-02 09:12:03 +00:00
[CANN]MUL_MAT optimization (#12382)
This commit is contained in:
@@ -1689,11 +1689,6 @@ static bool ggml_backend_cann_supports_op(ggml_backend_dev_t dev,
|
||||
case GGML_OP_MUL_MAT: {
|
||||
switch (op->src[0]->type) {
|
||||
case GGML_TYPE_Q8_0:
|
||||
// Current groupsize should not be greater than k-1 in
|
||||
// aclnnWeightQuantBatchMatmulV2GetWorkspaceSize
|
||||
if (op->src[0]->ne[0] <= QK8_0) {
|
||||
return false;
|
||||
}
|
||||
case GGML_TYPE_F16:
|
||||
case GGML_TYPE_F32:
|
||||
case GGML_TYPE_Q4_0:
|
||||
|
||||
Reference in New Issue
Block a user