mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-08 10:07:01 +00:00
ggml-zdnn: tighten memory usage, change string allocation
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
This commit is contained in:
@@ -59,13 +59,13 @@ typedef unsigned long long ulong64x2_t __attribute__((vector_size(16)));
|
|||||||
struct ggml_backend_zdnn_buffer {
|
struct ggml_backend_zdnn_buffer {
|
||||||
void * data;
|
void * data;
|
||||||
size_t size;
|
size_t size;
|
||||||
|
ggml_backend_zdnn_buffer * extra; // for bias etc.
|
||||||
|
|
||||||
zdnn_tensor_desc pre_tfm_desc;
|
zdnn_tensor_desc pre_tfm_desc;
|
||||||
zdnn_tensor_desc tfm_desc;
|
zdnn_tensor_desc tfm_desc;
|
||||||
zdnn_ztensor ztensor;
|
zdnn_ztensor ztensor;
|
||||||
|
|
||||||
char name[GGML_MAX_NAME];
|
char name[GGML_MAX_NAME];
|
||||||
struct ggml_backend_zdnn_buffer * extra; // for bias etc.
|
|
||||||
|
|
||||||
ggml_backend_zdnn_buffer() : extra(nullptr) {}
|
ggml_backend_zdnn_buffer() : extra(nullptr) {}
|
||||||
};
|
};
|
||||||
|
|||||||
@@ -362,8 +362,7 @@ static enum ggml_status ggml_backend_zdnn_buffer_init_tensor(ggml_backend_buffer
|
|||||||
tensor_buffer = &ctx->buffers[tensor_buffer_idx];
|
tensor_buffer = &ctx->buffers[tensor_buffer_idx];
|
||||||
tensor_buffer->data = tensor->data;
|
tensor_buffer->data = tensor->data;
|
||||||
tensor_buffer->size = tsize;
|
tensor_buffer->size = tsize;
|
||||||
strncpy(tensor_buffer->name, tensor->name, sizeof(tensor_buffer->name) - 1);
|
snprintf(tensor_buffer->name, sizeof(tensor_buffer->name), "%s", tensor->name);
|
||||||
tensor_buffer->name[sizeof(tensor_buffer->name) - 1] = '\0';
|
|
||||||
|
|
||||||
ggml_zdnn_init_tensor(tensor_buffer, tensor);
|
ggml_zdnn_init_tensor(tensor_buffer, tensor);
|
||||||
ctx->n_buffers++;
|
ctx->n_buffers++;
|
||||||
@@ -374,8 +373,7 @@ static enum ggml_status ggml_backend_zdnn_buffer_init_tensor(ggml_backend_buffer
|
|||||||
bias_buffer = &ctx->buffers[bias_buffer_idx];
|
bias_buffer = &ctx->buffers[bias_buffer_idx];
|
||||||
bias_buffer->data = calloc(tensor->ne[0], tensor->ne[0] * sizeof(float));
|
bias_buffer->data = calloc(tensor->ne[0], tensor->ne[0] * sizeof(float));
|
||||||
bias_buffer->size = tensor->ne[0] * sizeof(float);
|
bias_buffer->size = tensor->ne[0] * sizeof(float);
|
||||||
strncpy(bias_buffer->name, "bias", sizeof(bias_buffer->name) - 1);
|
snprintf(bias_buffer->name, sizeof(bias_buffer->name), "%s.bias", tensor->name);
|
||||||
bias_buffer->name[sizeof(bias_buffer->name) - 1] = '\0';
|
|
||||||
|
|
||||||
ggml_zdnn_init_bias_tensor(bias_buffer, tensor);
|
ggml_zdnn_init_bias_tensor(bias_buffer, tensor);
|
||||||
ctx->n_buffers++;
|
ctx->n_buffers++;
|
||||||
|
|||||||
Reference in New Issue
Block a user