mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-08 10:07:01 +00:00
ggml-zdnn: fix incorrect ztensor shape, reduce memory padding
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
This commit is contained in:
@@ -108,13 +108,14 @@ static void ggml_zdnn_free(struct ggml_backend_zdnn_context * ctx) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
struct ggml_backend_zdnn_buffer {
|
struct ggml_backend_zdnn_buffer {
|
||||||
char name[128];
|
|
||||||
void * data;
|
void * data;
|
||||||
size_t size;
|
size_t size;
|
||||||
|
|
||||||
zdnn_tensor_desc pre_tfm_desc;
|
zdnn_tensor_desc pre_tfm_desc;
|
||||||
zdnn_tensor_desc tfm_desc;
|
zdnn_tensor_desc tfm_desc;
|
||||||
zdnn_ztensor ztensor;
|
zdnn_ztensor ztensor;
|
||||||
|
|
||||||
|
char name[GGML_MAX_NAME];
|
||||||
};
|
};
|
||||||
|
|
||||||
struct ggml_backend_zdnn_buffer_context {
|
struct ggml_backend_zdnn_buffer_context {
|
||||||
@@ -240,7 +241,7 @@ static void ggml_zdnn_init_tensor(struct ggml_backend_zdnn_buffer * buffer, stru
|
|||||||
ZDNN_2D,
|
ZDNN_2D,
|
||||||
FP32,
|
FP32,
|
||||||
&buffer->pre_tfm_desc,
|
&buffer->pre_tfm_desc,
|
||||||
1, 1, tensor->ne[1], tensor->ne[0]
|
tensor->ne[1], tensor->ne[0]
|
||||||
);
|
);
|
||||||
} break;
|
} break;
|
||||||
default:
|
default:
|
||||||
|
|||||||
Reference in New Issue
Block a user