mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	ggml : add view_src and view_offs to ggml_tensor for views (#2874)
* ggml : add view_src and view_offs * update ggml-alloc to use view_src * update ggml_diag_mask to work correctly with automatic inplace * exclude other ops that set an inplace flag from automatic inplace
This commit is contained in:
		
							
								
								
									
										53
									
								
								ggml-alloc.c
									
									
									
									
									
								
							
							
						
						
									
										53
									
								
								ggml-alloc.c
									
									
									
									
									
								
							| @@ -321,8 +321,7 @@ bool ggml_allocr_is_measure(struct ggml_allocr * alloc) { | |||||||
| //////////// compute graph allocator | //////////// compute graph allocator | ||||||
|  |  | ||||||
| static bool ggml_is_view(struct ggml_tensor * t) { | static bool ggml_is_view(struct ggml_tensor * t) { | ||||||
|     return t->op == GGML_OP_RESHAPE || t->op == GGML_OP_VIEW || t->op == GGML_OP_TRANSPOSE || |     return t->view_src != NULL; | ||||||
|            t->op == GGML_OP_PERMUTE || t->op == GGML_OP_CPY; |  | ||||||
| } | } | ||||||
|  |  | ||||||
| static bool ggml_are_same_layout(const struct ggml_tensor * a, const struct ggml_tensor * b) { | static bool ggml_are_same_layout(const struct ggml_tensor * a, const struct ggml_tensor * b) { | ||||||
| @@ -340,28 +339,6 @@ static bool ggml_are_same_layout(const struct ggml_tensor * a, const struct ggml | |||||||
|     return true; |     return true; | ||||||
| } | } | ||||||
|  |  | ||||||
| static struct ggml_tensor * get_view_parent(struct ggml_tensor * t) { |  | ||||||
|     switch (t->op) { |  | ||||||
|         case GGML_OP_PERMUTE: |  | ||||||
|         case GGML_OP_RESHAPE: |  | ||||||
|         case GGML_OP_TRANSPOSE: |  | ||||||
|         case GGML_OP_VIEW: |  | ||||||
|             return t->src[0]; |  | ||||||
|         case GGML_OP_CPY: |  | ||||||
|             return t->src[1]; |  | ||||||
|         default: |  | ||||||
|             return NULL; |  | ||||||
|     } |  | ||||||
| } |  | ||||||
|  |  | ||||||
| static struct ggml_tensor * get_view_source(struct ggml_tensor * t) { |  | ||||||
|     struct ggml_tensor * parent = t; |  | ||||||
|     do { |  | ||||||
|         parent = get_view_parent(parent); |  | ||||||
|     } while (ggml_is_view(parent)); |  | ||||||
|     return parent; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| static bool ggml_op_can_inplace(enum ggml_op op) { | static bool ggml_op_can_inplace(enum ggml_op op) { | ||||||
|     switch (op) { |     switch (op) { | ||||||
|         case GGML_OP_SCALE: |         case GGML_OP_SCALE: | ||||||
| @@ -369,7 +346,6 @@ static bool ggml_op_can_inplace(enum ggml_op op) { | |||||||
|         case GGML_OP_DIAG_MASK_INF: |         case GGML_OP_DIAG_MASK_INF: | ||||||
|         case GGML_OP_ADD: |         case GGML_OP_ADD: | ||||||
|         case GGML_OP_ADD1: |         case GGML_OP_ADD1: | ||||||
|         case GGML_OP_ACC: |  | ||||||
|         case GGML_OP_SUB: |         case GGML_OP_SUB: | ||||||
|         case GGML_OP_MUL: |         case GGML_OP_MUL: | ||||||
|         case GGML_OP_DIV: |         case GGML_OP_DIV: | ||||||
| @@ -379,7 +355,6 @@ static bool ggml_op_can_inplace(enum ggml_op op) { | |||||||
|         case GGML_OP_UNARY: |         case GGML_OP_UNARY: | ||||||
|         case GGML_OP_ROPE: |         case GGML_OP_ROPE: | ||||||
|         case GGML_OP_RMS_NORM: |         case GGML_OP_RMS_NORM: | ||||||
|         case GGML_OP_SET: |  | ||||||
|         case GGML_OP_SOFT_MAX: |         case GGML_OP_SOFT_MAX: | ||||||
|         case GGML_OP_CONT: |         case GGML_OP_CONT: | ||||||
|             return true; |             return true; | ||||||
| @@ -393,24 +368,8 @@ static void allocate_node(struct ggml_allocr * alloc, struct ggml_tensor * node) | |||||||
|     struct hash_node * ht = alloc->hash_table; |     struct hash_node * ht = alloc->hash_table; | ||||||
|     if (node->data == NULL) { |     if (node->data == NULL) { | ||||||
|         if (ggml_is_view(node)) { |         if (ggml_is_view(node)) { | ||||||
|             size_t offset; |             assert(node->view_src->data != NULL); | ||||||
|             switch(node->op) { |             node->data = (char *)node->view_src->data + node->view_offs; | ||||||
|                 case GGML_OP_VIEW: |  | ||||||
|                     memcpy(&offset, node->op_params, sizeof(size_t)); |  | ||||||
|                     node->data = (char *) node->src[0]->data + offset; |  | ||||||
|                     break; |  | ||||||
|                 case GGML_OP_PERMUTE: |  | ||||||
|                 case GGML_OP_RESHAPE: |  | ||||||
|                 case GGML_OP_TRANSPOSE: |  | ||||||
|                     node->data = node->src[0]->data; |  | ||||||
|                     break; |  | ||||||
|                 case GGML_OP_CPY: |  | ||||||
|                     node->data = node->src[1]->data; |  | ||||||
|                     break; |  | ||||||
|                 default: |  | ||||||
|                     GGML_ASSERT(!"unknown view op"); |  | ||||||
|                     break; |  | ||||||
|             } |  | ||||||
|         } else { |         } else { | ||||||
|             // see if we can reuse a parent's buffer (inplace) |             // see if we can reuse a parent's buffer (inplace) | ||||||
|             if (ggml_op_can_inplace(node->op)) { |             if (ggml_op_can_inplace(node->op)) { | ||||||
| @@ -430,7 +389,7 @@ static void allocate_node(struct ggml_allocr * alloc, struct ggml_tensor * node) | |||||||
|                     struct hash_node * p_hn = hash_get(ht, parent); |                     struct hash_node * p_hn = hash_get(ht, parent); | ||||||
|                     if (parent->data != NULL && p_hn->n_children == 1 && p_hn->n_views == 0 && ggml_are_same_layout(node, parent)) { |                     if (parent->data != NULL && p_hn->n_children == 1 && p_hn->n_views == 0 && ggml_are_same_layout(node, parent)) { | ||||||
|                         if (ggml_is_view(parent)) { |                         if (ggml_is_view(parent)) { | ||||||
|                             struct ggml_tensor * view_src = get_view_source(parent); |                             struct ggml_tensor * view_src = parent->view_src; | ||||||
|                             struct hash_node * view_src_hn = hash_get(ht, view_src); |                             struct hash_node * view_src_hn = hash_get(ht, view_src); | ||||||
|                             if (view_src_hn->n_views == 1 && view_src_hn->n_children == 0 && view_src->data == parent->data) { |                             if (view_src_hn->n_views == 1 && view_src_hn->n_children == 0 && view_src->data == parent->data) { | ||||||
|                                 // TODO: the offset of the view parent must be kept to ensure that the op doesn't overwrite |                                 // TODO: the offset of the view parent must be kept to ensure that the op doesn't overwrite | ||||||
| @@ -472,7 +431,7 @@ static size_t ggml_allocator_alloc_graph_tensors_n( | |||||||
|             struct ggml_tensor * node = gf->nodes[i]; |             struct ggml_tensor * node = gf->nodes[i]; | ||||||
|  |  | ||||||
|             if (ggml_is_view(node)) { |             if (ggml_is_view(node)) { | ||||||
|                 struct ggml_tensor * view_src = get_view_source(node); |                 struct ggml_tensor * view_src = node->view_src; | ||||||
|                 hash_get(ht, view_src)->n_views += 1; |                 hash_get(ht, view_src)->n_views += 1; | ||||||
|             } |             } | ||||||
|  |  | ||||||
| @@ -557,7 +516,7 @@ static size_t ggml_allocator_alloc_graph_tensors_n( | |||||||
|  |  | ||||||
|                         if (p_hn->n_children == 0 && p_hn->n_views == 0) { |                         if (p_hn->n_children == 0 && p_hn->n_views == 0) { | ||||||
|                             if (ggml_is_view(parent)) { |                             if (ggml_is_view(parent)) { | ||||||
|                                 struct ggml_tensor * view_src = get_view_source(parent); |                                 struct ggml_tensor * view_src = parent->view_src; | ||||||
|                                 struct hash_node * view_src_hn = hash_get(ht, view_src); |                                 struct hash_node * view_src_hn = hash_get(ht, view_src); | ||||||
|                                 view_src_hn->n_views -= 1; |                                 view_src_hn->n_views -= 1; | ||||||
|                                 AT_PRINTF("view_src %s: %d children, %d views\n", view_src->name, view_src_hn->n_children, view_src_hn->n_views); |                                 AT_PRINTF("view_src %s: %d children, %d views\n", view_src->name, view_src_hn->n_children, view_src_hn->n_views); | ||||||
|   | |||||||
							
								
								
									
										201
									
								
								ggml.c
									
									
									
									
									
								
							
							
						
						
									
										201
									
								
								ggml.c
									
									
									
									
									
								
							| @@ -4104,16 +4104,11 @@ int64_t ggml_nrows(const struct ggml_tensor * tensor) { | |||||||
| } | } | ||||||
|  |  | ||||||
| size_t ggml_nbytes(const struct ggml_tensor * tensor) { | size_t ggml_nbytes(const struct ggml_tensor * tensor) { | ||||||
|     static_assert(GGML_MAX_DIMS == 4, "GGML_MAX_DIMS is not 4 - update this function"); |     size_t nbytes = tensor->ne[0]*tensor->nb[0]/ggml_blck_size(tensor->type); | ||||||
|  |     for (int i = 1; i < GGML_MAX_DIMS; ++i) { | ||||||
|     // this should handle cases where the tensor is not contiguous in memory |         nbytes += (tensor->ne[i] - 1)*tensor->nb[i]; | ||||||
|     // probaby just: |     } | ||||||
|     // |     return nbytes; | ||||||
|     //     return tensor->ne[3]*tensor->nb[3] |  | ||||||
|     // |  | ||||||
|     // is enough, but just in case, adding the second part |  | ||||||
|  |  | ||||||
|     return MAX(tensor->ne[3]*tensor->nb[3], (ggml_nelements(tensor)*ggml_type_size(tensor->type))/ggml_blck_size(tensor->type)); |  | ||||||
| } | } | ||||||
|  |  | ||||||
| size_t ggml_nbytes_pad(const struct ggml_tensor * tensor) { | size_t ggml_nbytes_pad(const struct ggml_tensor * tensor) { | ||||||
| @@ -4567,20 +4562,33 @@ static struct ggml_tensor * ggml_new_tensor_impl( | |||||||
|         enum   ggml_type      type, |         enum   ggml_type      type, | ||||||
|         int                   n_dims, |         int                   n_dims, | ||||||
|         const int64_t       * ne, |         const int64_t       * ne, | ||||||
|         void                * data) { |         struct ggml_tensor  * view_src, | ||||||
|  |         size_t                view_offs) { | ||||||
|  |  | ||||||
|     assert(n_dims >= 1 && n_dims <= GGML_MAX_DIMS); |     assert(n_dims >= 1 && n_dims <= GGML_MAX_DIMS); | ||||||
|  |  | ||||||
|     size_t data_size = 0; |     // find the base tensor and absolute offset | ||||||
|  |     if (view_src != NULL && view_src->view_src != NULL) { | ||||||
|  |         view_offs += view_src->view_offs; | ||||||
|  |         view_src   = view_src->view_src; | ||||||
|  |     } | ||||||
|  |  | ||||||
|     if (data == NULL && !ctx->no_alloc) { |     size_t data_size = ggml_type_size(type)*(ne[0]/ggml_blck_size(type)); | ||||||
|         data_size += ggml_type_size(type)*(ne[0]/ggml_blck_size(type)); |  | ||||||
|     for (int i = 1; i < n_dims; i++) { |     for (int i = 1; i < n_dims; i++) { | ||||||
|         data_size *= ne[i]; |         data_size *= ne[i]; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|  |     GGML_ASSERT(view_src == NULL || data_size + view_offs <= ggml_nbytes(view_src)); | ||||||
|  |  | ||||||
|  |     void * data = view_src != NULL ? view_src->data : NULL; | ||||||
|  |     if (data != NULL) { | ||||||
|  |         data = (char *) data + view_offs; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     if (ctx->scratch.data != NULL && data == NULL) { |     size_t obj_alloc_size = 0; | ||||||
|  |  | ||||||
|  |     if (view_src == NULL && ctx->no_alloc == false) { | ||||||
|  |         if (ctx->scratch.data != NULL) { | ||||||
|             // allocate tensor data in the scratch buffer |             // allocate tensor data in the scratch buffer | ||||||
|             if (ctx->scratch.offs + data_size > ctx->scratch.size) { |             if (ctx->scratch.offs + data_size > ctx->scratch.size) { | ||||||
|                 GGML_PRINT("%s: not enough space in the scratch memory pool (needed %zu, available %zu)\n", |                 GGML_PRINT("%s: not enough space in the scratch memory pool (needed %zu, available %zu)\n", | ||||||
| @@ -4592,11 +4600,13 @@ static struct ggml_tensor * ggml_new_tensor_impl( | |||||||
|             data = (char * const) ctx->scratch.data + ctx->scratch.offs; |             data = (char * const) ctx->scratch.data + ctx->scratch.offs; | ||||||
|  |  | ||||||
|             ctx->scratch.offs += data_size; |             ctx->scratch.offs += data_size; | ||||||
|  |         } else { | ||||||
|         data_size = 0; |             // allocate tensor data in the context's memory pool | ||||||
|  |             obj_alloc_size = data_size; | ||||||
|  |         } | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TENSOR, GGML_TENSOR_SIZE + data_size); |     struct ggml_object * const obj_new = ggml_new_object(ctx, GGML_OBJECT_TENSOR, GGML_TENSOR_SIZE + obj_alloc_size); | ||||||
|  |  | ||||||
|     // TODO: for recoverable errors, we would need to free the data allocated from the scratch buffer here |     // TODO: for recoverable errors, we would need to free the data allocated from the scratch buffer here | ||||||
|  |  | ||||||
| @@ -4616,7 +4626,9 @@ static struct ggml_tensor * ggml_new_tensor_impl( | |||||||
|         /*.perf_runs    =*/ 0, |         /*.perf_runs    =*/ 0, | ||||||
|         /*.perf_cycles  =*/ 0, |         /*.perf_cycles  =*/ 0, | ||||||
|         /*.perf_time_us =*/ 0, |         /*.perf_time_us =*/ 0, | ||||||
|         /*.data         =*/ (data == NULL && !ctx->no_alloc) ? (void *)(result + 1) : data, |         /*.view_src     =*/ view_src, | ||||||
|  |         /*.view_offs    =*/ view_offs, | ||||||
|  |         /*.data         =*/ obj_alloc_size > 0 ? (void *)(result + 1) : data, | ||||||
|         /*.name         =*/ { 0 }, |         /*.name         =*/ { 0 }, | ||||||
|         /*.extra        =*/ NULL, |         /*.extra        =*/ NULL, | ||||||
|         /*.padding      =*/ { 0 }, |         /*.padding      =*/ { 0 }, | ||||||
| @@ -4640,28 +4652,12 @@ static struct ggml_tensor * ggml_new_tensor_impl( | |||||||
|     return result; |     return result; | ||||||
| } | } | ||||||
|  |  | ||||||
| static void ggml_set_op_params(struct ggml_tensor * tensor, const void * params, size_t params_size) { |  | ||||||
|     GGML_ASSERT(tensor != NULL); // silence -Warray-bounds warnings |  | ||||||
|     assert(params_size <= GGML_MAX_OP_PARAMS); |  | ||||||
|     memcpy(tensor->op_params, params, params_size); |  | ||||||
| } |  | ||||||
|  |  | ||||||
| static int32_t ggml_get_op_params_i32(const struct ggml_tensor * tensor, uint32_t i) { |  | ||||||
|     assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t)); |  | ||||||
|     return ((const int32_t *)(tensor->op_params))[i]; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| static void ggml_set_op_params_i32(struct ggml_tensor * tensor, uint32_t i, int32_t value) { |  | ||||||
|     assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t)); |  | ||||||
|     ((int32_t *)(tensor->op_params))[i] = value; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| struct ggml_tensor * ggml_new_tensor( | struct ggml_tensor * ggml_new_tensor( | ||||||
|         struct ggml_context * ctx, |         struct ggml_context * ctx, | ||||||
|         enum   ggml_type      type, |         enum   ggml_type      type, | ||||||
|         int                   n_dims, |         int                   n_dims, | ||||||
|         const int64_t       * ne) { |         const int64_t       * ne) { | ||||||
|     return ggml_new_tensor_impl(ctx, type, n_dims, ne, NULL); |     return ggml_new_tensor_impl(ctx, type, n_dims, ne, NULL, 0); | ||||||
| } | } | ||||||
|  |  | ||||||
| struct ggml_tensor * ggml_new_tensor_1d( | struct ggml_tensor * ggml_new_tensor_1d( | ||||||
| @@ -4726,7 +4722,23 @@ struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value) { | |||||||
| } | } | ||||||
|  |  | ||||||
| struct ggml_tensor * ggml_dup_tensor(struct ggml_context * ctx, const struct ggml_tensor * src) { | struct ggml_tensor * ggml_dup_tensor(struct ggml_context * ctx, const struct ggml_tensor * src) { | ||||||
|     return ggml_new_tensor_impl(ctx, src->type, src->n_dims, src->ne, NULL); |     return ggml_new_tensor(ctx, src->type, src->n_dims, src->ne); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | static void ggml_set_op_params(struct ggml_tensor * tensor, const void * params, size_t params_size) { | ||||||
|  |     GGML_ASSERT(tensor != NULL); // silence -Warray-bounds warnings | ||||||
|  |     assert(params_size <= GGML_MAX_OP_PARAMS); | ||||||
|  |     memcpy(tensor->op_params, params, params_size); | ||||||
|  | } | ||||||
|  |  | ||||||
|  | static int32_t ggml_get_op_params_i32(const struct ggml_tensor * tensor, uint32_t i) { | ||||||
|  |     assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t)); | ||||||
|  |     return ((const int32_t *)(tensor->op_params))[i]; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | static void ggml_set_op_params_i32(struct ggml_tensor * tensor, uint32_t i, int32_t value) { | ||||||
|  |     assert(i < GGML_MAX_OP_PARAMS / sizeof(int32_t)); | ||||||
|  |     ((int32_t *)(tensor->op_params))[i] = value; | ||||||
| } | } | ||||||
|  |  | ||||||
| struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor) { | struct ggml_tensor * ggml_set_zero(struct ggml_tensor * tensor) { | ||||||
| @@ -5012,14 +5024,13 @@ struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * | |||||||
|  |  | ||||||
| struct ggml_tensor * ggml_view_tensor( | struct ggml_tensor * ggml_view_tensor( | ||||||
|         struct ggml_context * ctx, |         struct ggml_context * ctx, | ||||||
|         const struct ggml_tensor * src) { |         struct ggml_tensor  * src) { | ||||||
|     struct ggml_tensor * result = ggml_new_tensor_impl(ctx, src->type, src->n_dims, src->ne, src->data); |     struct ggml_tensor * result = ggml_new_tensor_impl(ctx, src->type, src->n_dims, src->ne, src, 0); | ||||||
|     ggml_format_name(result, "%s (view)", src->name); |     ggml_format_name(result, "%s (view)", src->name); | ||||||
|  |  | ||||||
|     result->nb[0] = src->nb[0]; |     for (int i = 0; i < GGML_MAX_DIMS; i++) { | ||||||
|     result->nb[1] = src->nb[1]; |         result->nb[i] = src->nb[i]; | ||||||
|     result->nb[2] = src->nb[2]; |     } | ||||||
|     result->nb[3] = src->nb[3]; |  | ||||||
|  |  | ||||||
|     return result; |     return result; | ||||||
| } | } | ||||||
| @@ -5592,7 +5603,7 @@ struct ggml_tensor * ggml_repeat_back( | |||||||
|  |  | ||||||
| // ggml_concat | // ggml_concat | ||||||
|  |  | ||||||
| struct ggml_tensor* ggml_concat( | struct ggml_tensor * ggml_concat( | ||||||
|     struct ggml_context* ctx, |     struct ggml_context* ctx, | ||||||
|     struct ggml_tensor* a, |     struct ggml_tensor* a, | ||||||
|     struct ggml_tensor* b) { |     struct ggml_tensor* b) { | ||||||
| @@ -6201,7 +6212,7 @@ struct ggml_tensor * ggml_reshape( | |||||||
|         //GGML_ASSERT(false); |         //GGML_ASSERT(false); | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, b->n_dims, b->ne, a->data); |     struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, b->n_dims, b->ne, a, 0); | ||||||
|     ggml_format_name(result, "%s (reshaped)", a->name); |     ggml_format_name(result, "%s (reshaped)", a->name); | ||||||
|  |  | ||||||
|     result->op   = GGML_OP_RESHAPE; |     result->op   = GGML_OP_RESHAPE; | ||||||
| @@ -6225,7 +6236,7 @@ struct ggml_tensor * ggml_reshape_1d( | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     const int64_t ne[1] = { ne0 }; |     const int64_t ne[1] = { ne0 }; | ||||||
|     struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 1, ne, a->data); |     struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 1, ne, a, 0); | ||||||
|     ggml_format_name(result, "%s (reshaped)", a->name); |     ggml_format_name(result, "%s (reshaped)", a->name); | ||||||
|  |  | ||||||
|     result->op   = GGML_OP_RESHAPE; |     result->op   = GGML_OP_RESHAPE; | ||||||
| @@ -6250,7 +6261,7 @@ struct ggml_tensor * ggml_reshape_2d( | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     const int64_t ne[2] = { ne0, ne1 }; |     const int64_t ne[2] = { ne0, ne1 }; | ||||||
|     struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 2, ne, a->data); |     struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 2, ne, a, 0); | ||||||
|     ggml_format_name(result, "%s (reshaped)", a->name); |     ggml_format_name(result, "%s (reshaped)", a->name); | ||||||
|  |  | ||||||
|     result->op   = GGML_OP_RESHAPE; |     result->op   = GGML_OP_RESHAPE; | ||||||
| @@ -6276,7 +6287,7 @@ struct ggml_tensor * ggml_reshape_3d( | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     const int64_t ne[3] = { ne0, ne1, ne2 }; |     const int64_t ne[3] = { ne0, ne1, ne2 }; | ||||||
|     struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 3, ne, a->data); |     struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 3, ne, a, 0); | ||||||
|     ggml_format_name(result, "%s (reshaped)", a->name); |     ggml_format_name(result, "%s (reshaped)", a->name); | ||||||
|  |  | ||||||
|     result->op   = GGML_OP_RESHAPE; |     result->op   = GGML_OP_RESHAPE; | ||||||
| @@ -6286,7 +6297,6 @@ struct ggml_tensor * ggml_reshape_3d( | |||||||
|     return result; |     return result; | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| struct ggml_tensor * ggml_reshape_4d( | struct ggml_tensor * ggml_reshape_4d( | ||||||
|         struct ggml_context * ctx, |         struct ggml_context * ctx, | ||||||
|         struct ggml_tensor  * a, |         struct ggml_tensor  * a, | ||||||
| @@ -6304,7 +6314,7 @@ struct ggml_tensor * ggml_reshape_4d( | |||||||
|     } |     } | ||||||
|  |  | ||||||
|     const int64_t ne[4] = { ne0, ne1, ne2, ne3 }; |     const int64_t ne[4] = { ne0, ne1, ne2, ne3 }; | ||||||
|     struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 4, ne, a->data); |     struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, 4, ne, a, 0); | ||||||
|     ggml_format_name(result, "%s (reshaped)", a->name); |     ggml_format_name(result, "%s (reshaped)", a->name); | ||||||
|  |  | ||||||
|     result->op   = GGML_OP_RESHAPE; |     result->op   = GGML_OP_RESHAPE; | ||||||
| @@ -6314,34 +6324,12 @@ struct ggml_tensor * ggml_reshape_4d( | |||||||
|     return result; |     return result; | ||||||
| } | } | ||||||
|  |  | ||||||
| // ggml_view_1d | static struct ggml_tensor * ggml_view_impl( | ||||||
|  |  | ||||||
| static struct ggml_tensor * ggml_view_tensor_offset( |  | ||||||
|         struct ggml_context * ctx, |         struct ggml_context * ctx, | ||||||
|         struct ggml_tensor  * a, |         struct ggml_tensor  * a, | ||||||
|         int                   n_dims, |         int                   n_dims, | ||||||
|         const int64_t       * ne, |         const int64_t       * ne, | ||||||
|         size_t                offset) { |         size_t                offset) { | ||||||
|     // don't calculate an offset from an unallocated tensor |  | ||||||
|     void * data = NULL; |  | ||||||
|     if (a->data != NULL) { |  | ||||||
|         data = (char *) a->data + offset; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, n_dims, ne, data); |  | ||||||
|  |  | ||||||
|     ggml_format_name(result, "%s (view)", a->name); |  | ||||||
|  |  | ||||||
|     ggml_set_op_params(result, &offset, sizeof(offset)); |  | ||||||
|  |  | ||||||
|     return result; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| struct ggml_tensor * ggml_view_1d( |  | ||||||
|         struct ggml_context * ctx, |  | ||||||
|         struct ggml_tensor  * a, |  | ||||||
|         int64_t               ne0, |  | ||||||
|         size_t                offset) { |  | ||||||
|  |  | ||||||
|     bool is_node = false; |     bool is_node = false; | ||||||
|  |  | ||||||
| @@ -6349,7 +6337,10 @@ struct ggml_tensor * ggml_view_1d( | |||||||
|         is_node = true; |         is_node = true; | ||||||
|     } |     } | ||||||
|  |  | ||||||
|     struct ggml_tensor * result = ggml_view_tensor_offset(ctx, a, 1, &ne0, offset); |     struct ggml_tensor * result = ggml_new_tensor_impl(ctx, a->type, n_dims, ne, a, offset); | ||||||
|  |     ggml_format_name(result, "%s (view)", a->name); | ||||||
|  |  | ||||||
|  |     ggml_set_op_params(result, &offset, sizeof(offset)); | ||||||
|  |  | ||||||
|     result->op   = GGML_OP_VIEW; |     result->op   = GGML_OP_VIEW; | ||||||
|     result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; |     result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; | ||||||
| @@ -6358,6 +6349,19 @@ struct ggml_tensor * ggml_view_1d( | |||||||
|     return result; |     return result; | ||||||
| } | } | ||||||
|  |  | ||||||
|  | // ggml_view_1d | ||||||
|  |  | ||||||
|  | struct ggml_tensor * ggml_view_1d( | ||||||
|  |         struct ggml_context * ctx, | ||||||
|  |         struct ggml_tensor  * a, | ||||||
|  |         int64_t               ne0, | ||||||
|  |         size_t                offset) { | ||||||
|  |  | ||||||
|  |     struct ggml_tensor * result = ggml_view_impl(ctx, a, 1, &ne0, offset); | ||||||
|  |  | ||||||
|  |     return result; | ||||||
|  | } | ||||||
|  |  | ||||||
| // ggml_view_2d | // ggml_view_2d | ||||||
|  |  | ||||||
| struct ggml_tensor * ggml_view_2d( | struct ggml_tensor * ggml_view_2d( | ||||||
| @@ -6368,24 +6372,14 @@ struct ggml_tensor * ggml_view_2d( | |||||||
|         size_t                nb1, |         size_t                nb1, | ||||||
|         size_t                offset) { |         size_t                offset) { | ||||||
|  |  | ||||||
|     bool is_node = false; |     const int64_t ne[2] = { ne0, ne1 }; | ||||||
|  |  | ||||||
|     if (a->grad) { |     struct ggml_tensor * result = ggml_view_impl(ctx, a, 2, ne, offset); | ||||||
|         is_node = true; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, 1, 1 }; |  | ||||||
|  |  | ||||||
|     struct ggml_tensor * result = ggml_view_tensor_offset(ctx, a, 2, ne, offset); |  | ||||||
|  |  | ||||||
|     result->nb[1] = nb1; |     result->nb[1] = nb1; | ||||||
|     result->nb[2] = result->nb[1]*ne1; |     result->nb[2] = result->nb[1]*ne1; | ||||||
|     result->nb[3] = result->nb[2]; |     result->nb[3] = result->nb[2]; | ||||||
|  |  | ||||||
|     result->op   = GGML_OP_VIEW; |  | ||||||
|     result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; |  | ||||||
|     result->src[0] = a; |  | ||||||
|  |  | ||||||
|     return result; |     return result; | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -6401,24 +6395,14 @@ struct ggml_tensor * ggml_view_3d( | |||||||
|         size_t                nb2, |         size_t                nb2, | ||||||
|         size_t                offset) { |         size_t                offset) { | ||||||
|  |  | ||||||
|     bool is_node = false; |     const int64_t ne[3] = { ne0, ne1, ne2 }; | ||||||
|  |  | ||||||
|     if (a->grad) { |     struct ggml_tensor * result = ggml_view_impl(ctx, a, 3, ne, offset); | ||||||
|         is_node = true; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, ne2, 1 }; |  | ||||||
|  |  | ||||||
|     struct ggml_tensor * result = ggml_view_tensor_offset(ctx, a, 3, ne, offset); |  | ||||||
|  |  | ||||||
|     result->nb[1] = nb1; |     result->nb[1] = nb1; | ||||||
|     result->nb[2] = nb2; |     result->nb[2] = nb2; | ||||||
|     result->nb[3] = result->nb[2]*ne2; |     result->nb[3] = result->nb[2]*ne2; | ||||||
|  |  | ||||||
|     result->op   = GGML_OP_VIEW; |  | ||||||
|     result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; |  | ||||||
|     result->src[0] = a; |  | ||||||
|  |  | ||||||
|     return result; |     return result; | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -6436,24 +6420,14 @@ struct ggml_tensor * ggml_view_4d( | |||||||
|         size_t                nb3, |         size_t                nb3, | ||||||
|         size_t                offset) { |         size_t                offset) { | ||||||
|  |  | ||||||
|     bool is_node = false; |     const int64_t ne[4] = { ne0, ne1, ne2, ne3 }; | ||||||
|  |  | ||||||
|     if (a->grad) { |     struct ggml_tensor * result = ggml_view_impl(ctx, a, 4, ne, offset); | ||||||
|         is_node = true; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     const int64_t ne[GGML_MAX_DIMS] = { ne0, ne1, ne2, ne3 }; |  | ||||||
|  |  | ||||||
|     struct ggml_tensor * result = ggml_view_tensor_offset(ctx, a, 4, ne, offset); |  | ||||||
|  |  | ||||||
|     result->nb[1] = nb1; |     result->nb[1] = nb1; | ||||||
|     result->nb[2] = nb2; |     result->nb[2] = nb2; | ||||||
|     result->nb[3] = nb3; |     result->nb[3] = nb3; | ||||||
|  |  | ||||||
|     result->op   = GGML_OP_VIEW; |  | ||||||
|     result->grad = is_node ? ggml_dup_tensor(ctx, result) : NULL; |  | ||||||
|     result->src[0] = a; |  | ||||||
|  |  | ||||||
|     return result; |     return result; | ||||||
| } | } | ||||||
|  |  | ||||||
| @@ -6640,7 +6614,7 @@ static struct ggml_tensor * ggml_diag_mask_inf_impl( | |||||||
|  |  | ||||||
|     struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |     struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); | ||||||
|  |  | ||||||
|     int32_t params[] = { n_past, inplace ? 1 : 0 }; |     int32_t params[] = { n_past }; | ||||||
|     ggml_set_op_params(result, params, sizeof(params)); |     ggml_set_op_params(result, params, sizeof(params)); | ||||||
|  |  | ||||||
|     result->op   = GGML_OP_DIAG_MASK_INF; |     result->op   = GGML_OP_DIAG_MASK_INF; | ||||||
| @@ -6657,7 +6631,6 @@ struct ggml_tensor * ggml_diag_mask_inf( | |||||||
|     return ggml_diag_mask_inf_impl(ctx, a, n_past, false); |     return ggml_diag_mask_inf_impl(ctx, a, n_past, false); | ||||||
| } | } | ||||||
|  |  | ||||||
|  |  | ||||||
| struct ggml_tensor * ggml_diag_mask_inf_inplace( | struct ggml_tensor * ggml_diag_mask_inf_inplace( | ||||||
|         struct ggml_context * ctx, |         struct ggml_context * ctx, | ||||||
|         struct ggml_tensor  * a, |         struct ggml_tensor  * a, | ||||||
| @@ -6680,7 +6653,7 @@ static struct ggml_tensor * ggml_diag_mask_zero_impl( | |||||||
|  |  | ||||||
|     struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); |     struct ggml_tensor * result = inplace ? ggml_view_tensor(ctx, a) : ggml_dup_tensor(ctx, a); | ||||||
|  |  | ||||||
|     int32_t params[] = { n_past, inplace ? 1 : 0 }; |     int32_t params[] = { n_past }; | ||||||
|     ggml_set_op_params(result, params, sizeof(params)); |     ggml_set_op_params(result, params, sizeof(params)); | ||||||
|  |  | ||||||
|     result->op   = GGML_OP_DIAG_MASK_ZERO; |     result->op   = GGML_OP_DIAG_MASK_ZERO; | ||||||
| @@ -11936,7 +11909,7 @@ static void ggml_compute_forward_diag_mask_f32( | |||||||
|     const int nth = params->nth; |     const int nth = params->nth; | ||||||
|  |  | ||||||
|     const int  n_past  = ((int32_t *) dst->op_params)[0]; |     const int  n_past  = ((int32_t *) dst->op_params)[0]; | ||||||
|     const bool inplace = (bool)((int32_t *) dst->op_params)[1]; |     const bool inplace = src0->data == dst->data; | ||||||
|  |  | ||||||
|     GGML_ASSERT(n_past >= 0); |     GGML_ASSERT(n_past >= 0); | ||||||
|  |  | ||||||
|   | |||||||
							
								
								
									
										5
									
								
								ggml.h
									
									
									
									
									
								
							
							
						
						
									
										5
									
								
								ggml.h
									
									
									
									
									
								
							| @@ -479,6 +479,9 @@ extern "C" { | |||||||
|         int64_t perf_cycles; |         int64_t perf_cycles; | ||||||
|         int64_t perf_time_us; |         int64_t perf_time_us; | ||||||
|  |  | ||||||
|  |         struct ggml_tensor * view_src; | ||||||
|  |         size_t               view_offs; | ||||||
|  |  | ||||||
|         void * data; |         void * data; | ||||||
|  |  | ||||||
|         char name[GGML_MAX_NAME]; |         char name[GGML_MAX_NAME]; | ||||||
| @@ -661,7 +664,7 @@ extern "C" { | |||||||
|     GGML_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value); |     GGML_API struct ggml_tensor * ggml_new_f32(struct ggml_context * ctx, float value); | ||||||
|  |  | ||||||
|     GGML_API struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src); |     GGML_API struct ggml_tensor * ggml_dup_tensor (struct ggml_context * ctx, const struct ggml_tensor * src); | ||||||
|     GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, const struct ggml_tensor * src); |     GGML_API struct ggml_tensor * ggml_view_tensor(struct ggml_context * ctx, struct ggml_tensor * src); | ||||||
|  |  | ||||||
|     GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name); |     GGML_API struct ggml_tensor * ggml_get_tensor(struct ggml_context * ctx, const char * name); | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 slaren
					slaren