mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-15 11:17:31 +00:00
zdnn: refactor codebase + add docs (#16178)
* zdnn: initial matmul refactor Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: rm static from funcs Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: update ggml-zdnn.h Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: change header files to hpp Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: switch to common.hpp Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: move mulmat forward around Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: rm inline from utils Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * ggml-zdnn: code cleanup Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> * docs: add zDNN docs Signed-off-by: Aaron Teo <aaron.teo1@ibm.com> --------- Signed-off-by: Aaron Teo <aaron.teo1@ibm.com>
This commit is contained in:
59
ggml/src/ggml-zdnn/common.hpp
Normal file
59
ggml/src/ggml-zdnn/common.hpp
Normal file
@@ -0,0 +1,59 @@
|
||||
#ifndef GGML_ZDNN_COMMON_HPP
|
||||
#define GGML_ZDNN_COMMON_HPP
|
||||
|
||||
#include "ggml.h"
|
||||
#include "ggml-impl.h"
|
||||
|
||||
#include "zdnn.h"
|
||||
|
||||
#include <vector>
|
||||
#include <memory>
|
||||
|
||||
#define GGML_ZDNN_NAME "zDNN"
|
||||
#define GGML_ZDNN_VERSION ZDNN_VERNUM
|
||||
|
||||
#define ZDNN_CHECK(stmt) \
|
||||
do { \
|
||||
zdnn_status status = (stmt); \
|
||||
GGML_ASSERT(status == ZDNN_OK); \
|
||||
} while (0);
|
||||
|
||||
struct ggml_backend_zdnn_device_context {
|
||||
int zdnn_device;
|
||||
int zdnn_device_ref_count;
|
||||
|
||||
bool has_parmblkformat_0;
|
||||
bool has_parmblkformat_1; // checks for z17
|
||||
|
||||
size_t max_size;
|
||||
|
||||
char name[128];
|
||||
};
|
||||
|
||||
struct ggml_backend_zdnn_context {
|
||||
int device;
|
||||
ggml_cgraph * gf;
|
||||
};
|
||||
|
||||
struct ggml_backend_zdnn_buffer {
|
||||
void * data;
|
||||
ggml_backend_zdnn_buffer * extra; // for bias, etc.
|
||||
size_t size;
|
||||
|
||||
zdnn_tensor_desc pre_tfm_desc;
|
||||
zdnn_tensor_desc tfm_desc;
|
||||
zdnn_ztensor ztensor;
|
||||
|
||||
char name[GGML_MAX_NAME];
|
||||
};
|
||||
|
||||
struct ggml_backend_zdnn_buffer_context {
|
||||
void * all_data;
|
||||
size_t all_size;
|
||||
bool owned;
|
||||
|
||||
int n_buffers;
|
||||
std::vector<std::unique_ptr<ggml_backend_zdnn_buffer>> buffers;
|
||||
};
|
||||
|
||||
#endif // GGML_ZDNN_COMMON_HPP
|
||||
Reference in New Issue
Block a user