mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-21 12:16:57 +00:00
ggml-zdnn: activate fp16 and bf16
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
This commit is contained in:
@@ -258,22 +258,30 @@ static bool ggml_zdnn_supports_op(const ggml_backend_zdnn_device_context * ctx_d
|
|||||||
|
|
||||||
case GGML_OP_MUL_MAT:
|
case GGML_OP_MUL_MAT:
|
||||||
{
|
{
|
||||||
const ggml_tensor * src0 = op->src[0];
|
const ggml_tensor * weights = op->src[0];
|
||||||
const ggml_tensor * src1 = op->src[1];
|
const ggml_tensor * inputs = op->src[1];
|
||||||
|
|
||||||
const int64_t ne10 = src1->ne[0];
|
const int64_t ne10 = inputs->ne[0];
|
||||||
const int64_t ne0 = op->ne[0];
|
const int64_t ne0 = op->ne[0];
|
||||||
const int64_t ne1 = op->ne[1];
|
const int64_t ne1 = op->ne[1];
|
||||||
|
|
||||||
const int64_t max_batch = ctx_dev->max_size;
|
const int64_t max_batch = ctx_dev->max_size;
|
||||||
|
|
||||||
return ggml_is_matrix(src0) &&
|
if (!ggml_is_matrix(weights) || !ggml_is_matrix(inputs) ||
|
||||||
ggml_is_matrix(src1) &&
|
!ggml_is_contiguous(weights) || !ggml_is_contiguous(inputs) ||
|
||||||
ggml_is_contiguous(src0) &&
|
weights->view_src != nullptr || inputs->view_src != nullptr ||
|
||||||
ggml_is_contiguous(src1) &&
|
ne0 > max_batch || ne1 > max_batch || ne10 > max_batch) {
|
||||||
src0->view_src == nullptr && src1->view_src == nullptr &&
|
return false;
|
||||||
src0->type == GGML_TYPE_F32 && src1->type == GGML_TYPE_F32 &&
|
}
|
||||||
(ne0 <= max_batch && ne1 <= max_batch && ne10 <= max_batch);
|
|
||||||
|
switch (weights->type) {
|
||||||
|
case GGML_TYPE_F32:
|
||||||
|
case GGML_TYPE_F16:
|
||||||
|
case GGML_TYPE_BF16:
|
||||||
|
return true;
|
||||||
|
default:
|
||||||
|
return false;
|
||||||
|
}
|
||||||
} break;
|
} break;
|
||||||
|
|
||||||
default:
|
default:
|
||||||
|
|||||||
Reference in New Issue
Block a user