initial implementation of delayed graph allocation

This commit is contained in:
slaren
2023-07-20 15:57:48 +02:00
parent cb205c0d13
commit de69f8f20d
6 changed files with 165 additions and 87 deletions

37
ggml.h
View File

@@ -474,24 +474,18 @@ extern "C" {
int64_t perf_time_us;
};
/*
TODO
enum ggml_alloc_mode {
GGML_ALLOC_IMMEDIATE,
GGML_ALLOC_NONE,
GGML_ALLOC_COMPUTE_SEQ,
GGML_ALLOC_COMPUTE_PAR,
GGML_ALLOC_NONE, // do not allocate tensors
GGML_ALLOC_IMMEDIATE, // allocate tensors immediately
GGML_ALLOC_COMPUTE_SEQ, // delay allocation until graph build time, allocate tensors for sequential graph computation
//GGML_ALLOC_COMPUTE_PAR, // allocate tensors for parallel graph computation
};
*/
// context parameters
struct ggml_init_params {
struct ggml_buffer * buffer;
bool no_alloc; // don't allocate memory for the tensor data
//enum ggml_alloc_mode alloc_mode; // TODO: replace the above with this
enum ggml_type compute_type; // type of intermediate results
enum ggml_alloc_mode alloc_mode; // tensor allocation mode
enum ggml_type compute_type; // type of intermediate results
};
// task types
@@ -559,15 +553,15 @@ extern "C" {
GGML_API struct ggml_context * ggml_init(struct ggml_init_params params);
GGML_API void ggml_free(struct ggml_context * ctx);
GGML_API void ggml_set_alloc_mode(struct ggml_context * ctx, enum ggml_alloc_mode mode);
// TODO: update for ggml_buffer
GGML_API size_t ggml_used_mem(const struct ggml_context * ctx);
GGML_API void ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc);
GGML_API void * ggml_get_mem_buffer (const struct ggml_context * ctx);
GGML_API size_t ggml_get_mem_size (const struct ggml_context * ctx);
GGML_API size_t ggml_get_max_tensor_size(const struct ggml_context * ctx);
GGML_API struct ggml_backend * ggml_get_ctx_backend(struct ggml_context * ctx);
GGML_API struct ggml_buffer * ggml_get_buffer(const struct ggml_context * ctx);
GGML_API struct ggml_tensor * ggml_new_tensor(
struct ggml_context * ctx,
@@ -1130,6 +1124,17 @@ extern "C" {
int mode,
int n_ctx);
// custom RoPE
GGML_API struct ggml_tensor * ggml_rope_custom(
struct ggml_context * ctx,
struct ggml_tensor * a,
int n_past,
int n_dims,
int mode,
float freq_base,
float freq_scale,
int n_ctx);
// custom RoPE, in-place, returns view(a)
GGML_API struct ggml_tensor * ggml_rope_custom_inplace(
struct ggml_context * ctx,