mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-08 10:07:01 +00:00
ggml-zdnn: rewrite into mre
Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
This commit is contained in:
@@ -56,18 +56,40 @@ typedef unsigned long long ulong64x2_t __attribute__((vector_size(16)));
|
||||
GGML_ASSERT(status == ZDNN_OK); \
|
||||
} while (0);
|
||||
|
||||
struct ggml_backend_zdnn_device_context {
|
||||
int zdnn_device;
|
||||
int zdnn_device_ref_count;
|
||||
|
||||
bool has_parmblk_1;
|
||||
|
||||
size_t max_size;
|
||||
|
||||
char name[128];
|
||||
};
|
||||
|
||||
struct ggml_backend_zdnn_context {
|
||||
int device;
|
||||
ggml_cgraph * gf;
|
||||
};
|
||||
|
||||
struct ggml_backend_zdnn_buffer {
|
||||
void * data;
|
||||
size_t size;
|
||||
ggml_backend_zdnn_buffer * extra; // for bias etc.
|
||||
|
||||
zdnn_tensor_desc pre_tfm_desc;
|
||||
zdnn_tensor_desc tfm_desc;
|
||||
zdnn_ztensor ztensor;
|
||||
|
||||
char name[GGML_MAX_NAME];
|
||||
};
|
||||
|
||||
ggml_backend_zdnn_buffer() : extra(nullptr) {}
|
||||
struct ggml_backend_zdnn_buffer_context {
|
||||
void * all_data;
|
||||
size_t all_size;
|
||||
bool owned;
|
||||
|
||||
int n_buffers;
|
||||
std::vector<ggml_backend_zdnn_buffer> buffers;
|
||||
};
|
||||
|
||||
#endif // GGML_ZDNN_IMPL
|
||||
|
||||
Reference in New Issue
Block a user