mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	ggml : sync (unary ops refactor, static-correctness) (#2370)
* ggml : sync (unary ops, tests) ggml-ci * tests : remove unnecessary funcs
This commit is contained in:
		| @@ -3962,18 +3962,23 @@ bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_ | |||||||
|             } |             } | ||||||
|             func = ggml_cuda_mul; |             func = ggml_cuda_mul; | ||||||
|             break; |             break; | ||||||
|         case GGML_OP_GELU: |         case GGML_OP_UNARY: | ||||||
|  |             switch (ggml_get_unary_op(tensor)) { | ||||||
|  |                 case GGML_UNARY_OP_GELU: | ||||||
|                     if (!any_on_device) { |                     if (!any_on_device) { | ||||||
|                         return false; |                         return false; | ||||||
|                     } |                     } | ||||||
|                     func = ggml_cuda_gelu; |                     func = ggml_cuda_gelu; | ||||||
|                     break; |                     break; | ||||||
|         case GGML_OP_SILU: |                 case GGML_UNARY_OP_SILU: | ||||||
|                     if (!any_on_device) { |                     if (!any_on_device) { | ||||||
|                         return false; |                         return false; | ||||||
|                     } |                     } | ||||||
|                     func = ggml_cuda_silu; |                     func = ggml_cuda_silu; | ||||||
|                     break; |                     break; | ||||||
|  |                 default: | ||||||
|  |                     return false; | ||||||
|  |             } break; | ||||||
|         case GGML_OP_NORM: |         case GGML_OP_NORM: | ||||||
|             if (!any_on_device) { |             if (!any_on_device) { | ||||||
|                 return false; |                 return false; | ||||||
|   | |||||||
							
								
								
									
										16
									
								
								ggml-metal.m
									
									
									
									
									
								
							
							
						
						
									
										16
									
								
								ggml-metal.m
									
									
									
									
									
								
							| @@ -519,7 +519,9 @@ void ggml_metal_graph_compute( | |||||||
|  |  | ||||||
|                             [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; |                             [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; | ||||||
|                         } break; |                         } break; | ||||||
|                     case GGML_OP_SILU: |                     case GGML_OP_UNARY: | ||||||
|  |                         switch (ggml_get_unary_op(gf->nodes[i])) { | ||||||
|  |                             case GGML_UNARY_OP_SILU: | ||||||
|                                 { |                                 { | ||||||
|                                     if (encoder == nil) { |                                     if (encoder == nil) { | ||||||
|                                         encoder = [command_buffer computeCommandEncoder]; |                                         encoder = [command_buffer computeCommandEncoder]; | ||||||
| @@ -533,7 +535,7 @@ void ggml_metal_graph_compute( | |||||||
|  |  | ||||||
|                                     [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; |                                     [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; | ||||||
|                                 } break; |                                 } break; | ||||||
|                     case GGML_OP_RELU: |                             case GGML_UNARY_OP_RELU: | ||||||
|                                 { |                                 { | ||||||
|                                     if (encoder == nil) { |                                     if (encoder == nil) { | ||||||
|                                         encoder = [command_buffer computeCommandEncoder]; |                                         encoder = [command_buffer computeCommandEncoder]; | ||||||
| @@ -547,7 +549,7 @@ void ggml_metal_graph_compute( | |||||||
|  |  | ||||||
|                                     [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; |                                     [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; | ||||||
|                                 } break; |                                 } break; | ||||||
|                     case GGML_OP_GELU: |                             case GGML_UNARY_OP_GELU: | ||||||
|                                 { |                                 { | ||||||
|                                     if (encoder == nil) { |                                     if (encoder == nil) { | ||||||
|                                         encoder = [command_buffer computeCommandEncoder]; |                                         encoder = [command_buffer computeCommandEncoder]; | ||||||
| @@ -561,6 +563,12 @@ void ggml_metal_graph_compute( | |||||||
|  |  | ||||||
|                                     [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; |                                     [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; | ||||||
|                                 } break; |                                 } break; | ||||||
|  |                             default: | ||||||
|  |                                 { | ||||||
|  |                                     fprintf(stderr, "%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op)); | ||||||
|  |                                     GGML_ASSERT(false); | ||||||
|  |                                 } | ||||||
|  |                         } break; | ||||||
|                     case GGML_OP_SOFT_MAX: |                     case GGML_OP_SOFT_MAX: | ||||||
|                         { |                         { | ||||||
|                             if (encoder == nil) { |                             if (encoder == nil) { | ||||||
| @@ -979,10 +987,12 @@ void ggml_metal_graph_compute( | |||||||
|                             [encoder dispatchThreadgroups:MTLSizeMake(ne01, ne02, ne03) threadsPerThreadgroup:MTLSizeMake(nth, 1, 1)]; |                             [encoder dispatchThreadgroups:MTLSizeMake(ne01, ne02, ne03) threadsPerThreadgroup:MTLSizeMake(nth, 1, 1)]; | ||||||
|                         } break; |                         } break; | ||||||
|                     default: |                     default: | ||||||
|  |                         { | ||||||
|                             fprintf(stderr, "%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op)); |                             fprintf(stderr, "%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op)); | ||||||
|                             GGML_ASSERT(false); |                             GGML_ASSERT(false); | ||||||
|                         } |                         } | ||||||
|                 } |                 } | ||||||
|  |             } | ||||||
|  |  | ||||||
|             if (encoder != nil) { |             if (encoder != nil) { | ||||||
|                 [encoder endEncoding]; |                 [encoder endEncoding]; | ||||||
|   | |||||||
							
								
								
									
										60
									
								
								ggml.h
									
									
									
									
									
								
							
							
						
						
									
										60
									
								
								ggml.h
									
									
									
									
									
								
							| @@ -330,16 +330,6 @@ extern "C" { | |||||||
|         GGML_OP_ARGMAX, |         GGML_OP_ARGMAX, | ||||||
|         GGML_OP_REPEAT, |         GGML_OP_REPEAT, | ||||||
|         GGML_OP_REPEAT_BACK, |         GGML_OP_REPEAT_BACK, | ||||||
|         GGML_OP_ABS, |  | ||||||
|         GGML_OP_SGN, |  | ||||||
|         GGML_OP_NEG, |  | ||||||
|         GGML_OP_STEP, |  | ||||||
|         GGML_OP_TANH, |  | ||||||
|         GGML_OP_ELU, |  | ||||||
|         GGML_OP_RELU, |  | ||||||
|         GGML_OP_GELU, |  | ||||||
|         GGML_OP_GELU_QUICK, |  | ||||||
|         GGML_OP_SILU, |  | ||||||
|         GGML_OP_SILU_BACK, |         GGML_OP_SILU_BACK, | ||||||
|         GGML_OP_NORM, // normalize |         GGML_OP_NORM, // normalize | ||||||
|         GGML_OP_RMS_NORM, |         GGML_OP_RMS_NORM, | ||||||
| @@ -378,6 +368,8 @@ extern "C" { | |||||||
|         GGML_OP_WIN_PART, |         GGML_OP_WIN_PART, | ||||||
|         GGML_OP_WIN_UNPART, |         GGML_OP_WIN_UNPART, | ||||||
|  |  | ||||||
|  |         GGML_OP_UNARY, | ||||||
|  |  | ||||||
|         GGML_OP_MAP_UNARY, |         GGML_OP_MAP_UNARY, | ||||||
|         GGML_OP_MAP_BINARY, |         GGML_OP_MAP_BINARY, | ||||||
|  |  | ||||||
| @@ -391,6 +383,18 @@ extern "C" { | |||||||
|         GGML_OP_COUNT, |         GGML_OP_COUNT, | ||||||
|     }; |     }; | ||||||
|  |  | ||||||
|  |     enum ggml_unary_op { | ||||||
|  |         GGML_UNARY_OP_ABS, | ||||||
|  |         GGML_UNARY_OP_SGN, | ||||||
|  |         GGML_UNARY_OP_NEG, | ||||||
|  |         GGML_UNARY_OP_STEP, | ||||||
|  |         GGML_UNARY_OP_TANH, | ||||||
|  |         GGML_UNARY_OP_ELU, | ||||||
|  |         GGML_UNARY_OP_RELU, | ||||||
|  |         GGML_UNARY_OP_GELU, | ||||||
|  |         GGML_UNARY_OP_GELU_QUICK, | ||||||
|  |         GGML_UNARY_OP_SILU, | ||||||
|  |     }; | ||||||
|  |  | ||||||
|     // ggml object |     // ggml object | ||||||
|     struct ggml_object { |     struct ggml_object { | ||||||
| @@ -535,6 +539,7 @@ extern "C" { | |||||||
|  |  | ||||||
|     GGML_API const char * ggml_type_name(enum ggml_type type); |     GGML_API const char * ggml_type_name(enum ggml_type type); | ||||||
|     GGML_API const char * ggml_op_name  (enum ggml_op   op); |     GGML_API const char * ggml_op_name  (enum ggml_op   op); | ||||||
|  |     GGML_API const char * ggml_op_symbol(enum ggml_op   op); | ||||||
|  |  | ||||||
|     GGML_API size_t  ggml_element_size(const struct ggml_tensor * tensor); |     GGML_API size_t  ggml_element_size(const struct ggml_tensor * tensor); | ||||||
|  |  | ||||||
| @@ -558,6 +563,7 @@ extern "C" { | |||||||
|     GGML_API size_t  ggml_used_mem(const struct ggml_context * ctx); |     GGML_API size_t  ggml_used_mem(const struct ggml_context * ctx); | ||||||
|  |  | ||||||
|     GGML_API size_t  ggml_set_scratch (struct ggml_context * ctx, struct ggml_scratch scratch); |     GGML_API size_t  ggml_set_scratch (struct ggml_context * ctx, struct ggml_scratch scratch); | ||||||
|  |     GGML_API bool    ggml_get_no_alloc(struct ggml_context * ctx); | ||||||
|     GGML_API void    ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc); |     GGML_API void    ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc); | ||||||
|  |  | ||||||
|     GGML_API void *  ggml_get_mem_buffer     (const struct ggml_context * ctx); |     GGML_API void *  ggml_get_mem_buffer     (const struct ggml_context * ctx); | ||||||
| @@ -617,9 +623,11 @@ extern "C" { | |||||||
|     GGML_API void *  ggml_get_data    (const struct ggml_tensor * tensor); |     GGML_API void *  ggml_get_data    (const struct ggml_tensor * tensor); | ||||||
|     GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor); |     GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor); | ||||||
|  |  | ||||||
|     GGML_API const char *         ggml_get_name(const struct ggml_tensor * tensor); |     GGML_API enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor); | ||||||
|     GGML_API struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name); |  | ||||||
|     GGML_API struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * fmt, ...); |     GGML_API const char *         ggml_get_name   (const struct ggml_tensor * tensor); | ||||||
|  |     GGML_API struct ggml_tensor * ggml_set_name   (      struct ggml_tensor * tensor, const char * name); | ||||||
|  |     GGML_API struct ggml_tensor * ggml_format_name(      struct ggml_tensor * tensor, const char * fmt, ...); | ||||||
|  |  | ||||||
|     // |     // | ||||||
|     // operations on tensors with backpropagation |     // operations on tensors with backpropagation | ||||||
| @@ -629,6 +637,11 @@ extern "C" { | |||||||
|             struct ggml_context * ctx, |             struct ggml_context * ctx, | ||||||
|             struct ggml_tensor  * a); |             struct ggml_tensor  * a); | ||||||
|  |  | ||||||
|  |     // in-place, returns view(a) | ||||||
|  |     GGML_API struct ggml_tensor * ggml_dup_inplace( | ||||||
|  |             struct ggml_context * ctx, | ||||||
|  |             struct ggml_tensor  * a); | ||||||
|  |  | ||||||
|     GGML_API struct ggml_tensor * ggml_add( |     GGML_API struct ggml_tensor * ggml_add( | ||||||
|             struct ggml_context * ctx, |             struct ggml_context * ctx, | ||||||
|             struct ggml_tensor  * a, |             struct ggml_tensor  * a, | ||||||
| @@ -952,11 +965,22 @@ extern "C" { | |||||||
|             struct ggml_tensor  * a, |             struct ggml_tensor  * a, | ||||||
|             struct ggml_tensor  * b); |             struct ggml_tensor  * b); | ||||||
|  |  | ||||||
|  |     // a -> b, in-place, return view(b) | ||||||
|  |     GGML_API struct ggml_tensor * ggml_cpy_inplace( | ||||||
|  |             struct ggml_context * ctx, | ||||||
|  |             struct ggml_tensor  * a, | ||||||
|  |             struct ggml_tensor  * b); | ||||||
|  |  | ||||||
|     // make contiguous |     // make contiguous | ||||||
|     GGML_API struct ggml_tensor * ggml_cont( |     GGML_API struct ggml_tensor * ggml_cont( | ||||||
|             struct ggml_context * ctx, |             struct ggml_context * ctx, | ||||||
|             struct ggml_tensor  * a); |             struct ggml_tensor  * a); | ||||||
|  |  | ||||||
|  |     // make contiguous, in-place | ||||||
|  |     GGML_API struct ggml_tensor * ggml_cont_inplace( | ||||||
|  |             struct ggml_context * ctx, | ||||||
|  |             struct ggml_tensor  * a); | ||||||
|  |  | ||||||
|     // return view(a), b specifies the new shape |     // return view(a), b specifies the new shape | ||||||
|     // TODO: when we start computing gradient, make a copy instead of view |     // TODO: when we start computing gradient, make a copy instead of view | ||||||
|     GGML_API struct ggml_tensor * ggml_reshape( |     GGML_API struct ggml_tensor * ggml_reshape( | ||||||
| @@ -1268,6 +1292,16 @@ extern "C" { | |||||||
|     typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *); |     typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *); | ||||||
|     typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *); |     typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *); | ||||||
|  |  | ||||||
|  |     GGML_API struct ggml_tensor * ggml_unary( | ||||||
|  |             struct ggml_context * ctx, | ||||||
|  |              struct ggml_tensor * a, | ||||||
|  |              enum ggml_unary_op op); | ||||||
|  |  | ||||||
|  |     GGML_API struct ggml_tensor * ggml_unary_inplace( | ||||||
|  |         struct ggml_context * ctx, | ||||||
|  |         struct ggml_tensor  * a, | ||||||
|  |         enum ggml_unary_op op); | ||||||
|  |  | ||||||
|     GGML_API struct ggml_tensor * ggml_map_unary_f32( |     GGML_API struct ggml_tensor * ggml_map_unary_f32( | ||||||
|             struct ggml_context        * ctx, |             struct ggml_context        * ctx, | ||||||
|             struct ggml_tensor         * a, |             struct ggml_tensor         * a, | ||||||
|   | |||||||
| @@ -64,7 +64,7 @@ void get_random_dims(int64_t * dims, int ndims) { | |||||||
|     } |     } | ||||||
| } | } | ||||||
|  |  | ||||||
| struct ggml_tensor * get_random_tensor( | struct ggml_tensor * get_random_tensor_f32( | ||||||
|         struct ggml_context * ctx0, |         struct ggml_context * ctx0, | ||||||
|         int ndims, |         int ndims, | ||||||
|         int64_t ne[], |         int64_t ne[], | ||||||
| @@ -112,7 +112,55 @@ struct ggml_tensor * get_random_tensor( | |||||||
|     return result; |     return result; | ||||||
| } | } | ||||||
|  |  | ||||||
| struct ggml_tensor * get_random_tensor_int( | struct ggml_tensor * get_random_tensor_f16( | ||||||
|  |         struct ggml_context * ctx0, | ||||||
|  |         int ndims, | ||||||
|  |         int64_t ne[], | ||||||
|  |         float fmin, | ||||||
|  |         float fmax) { | ||||||
|  |     struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_F16, ndims, ne); | ||||||
|  |  | ||||||
|  |     switch (ndims) { | ||||||
|  |         case 1: | ||||||
|  |             for (int i0 = 0; i0 < ne[0]; i0++) { | ||||||
|  |                 ((ggml_fp16_t *)result->data)[i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin); | ||||||
|  |             } | ||||||
|  |             break; | ||||||
|  |         case 2: | ||||||
|  |             for (int i1 = 0; i1 < ne[1]; i1++) { | ||||||
|  |                 for (int i0 = 0; i0 < ne[0]; i0++) { | ||||||
|  |                     ((ggml_fp16_t *)result->data)[i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |             break; | ||||||
|  |         case 3: | ||||||
|  |             for (int i2 = 0; i2 < ne[2]; i2++) { | ||||||
|  |                 for (int i1 = 0; i1 < ne[1]; i1++) { | ||||||
|  |                     for (int i0 = 0; i0 < ne[0]; i0++) { | ||||||
|  |                         ((ggml_fp16_t *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin); | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |             break; | ||||||
|  |         case 4: | ||||||
|  |             for (int i3 = 0; i3 < ne[3]; i3++) { | ||||||
|  |                 for (int i2 = 0; i2 < ne[2]; i2++) { | ||||||
|  |                     for (int i1 = 0; i1 < ne[1]; i1++) { | ||||||
|  |                         for (int i0 = 0; i0 < ne[0]; i0++) { | ||||||
|  |                             ((ggml_fp16_t *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin); | ||||||
|  |                         } | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |             break; | ||||||
|  |         default: | ||||||
|  |             assert(false); | ||||||
|  |     }; | ||||||
|  |  | ||||||
|  |     return result; | ||||||
|  | } | ||||||
|  |  | ||||||
|  | struct ggml_tensor * get_random_tensor_i32( | ||||||
|         struct ggml_context * ctx0, |         struct ggml_context * ctx0, | ||||||
|         int ndims, |         int ndims, | ||||||
|         int64_t ne[], |         int64_t ne[], | ||||||
| @@ -160,23 +208,6 @@ struct ggml_tensor * get_random_tensor_int( | |||||||
|     return result; |     return result; | ||||||
| } | } | ||||||
|  |  | ||||||
| float get_element(const struct ggml_tensor * t, int idx) { |  | ||||||
|     if (t->type == GGML_TYPE_F32) { |  | ||||||
|         return ((float *)t->data)[idx]; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     if (t->type == GGML_TYPE_I32) { |  | ||||||
|         return ((int32_t *)t->data)[idx]; |  | ||||||
|     } |  | ||||||
|  |  | ||||||
|     assert(false); |  | ||||||
|     return INFINITY; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| void set_element(struct ggml_tensor * t, int idx, float value) { |  | ||||||
|     ((float *)t->data)[idx] = value; |  | ||||||
| } |  | ||||||
|  |  | ||||||
| void print_elements(const char* label, const struct ggml_tensor * t) { | void print_elements(const char* label, const struct ggml_tensor * t) { | ||||||
|     if (!t) { |     if (!t) { | ||||||
|         printf("%s: %s = null\n", __func__, label); |         printf("%s: %s = null\n", __func__, label); | ||||||
| @@ -186,7 +217,7 @@ void print_elements(const char* label, const struct ggml_tensor * t) { | |||||||
|     printf("%s: %s = [", __func__, label); |     printf("%s: %s = [", __func__, label); | ||||||
|     for (int k = 0; k < nelements; ++k) { |     for (int k = 0; k < nelements; ++k) { | ||||||
|         if (k > 0) { printf(", "); } |         if (k > 0) { printf(", "); } | ||||||
|         printf("%.5f", get_element(t, k)); |         printf("%.5f", ggml_get_f32_1d(t, k)); | ||||||
|     } |     } | ||||||
|     printf("] shape: ["); |     printf("] shape: ["); | ||||||
|     for (int k = 0; k < t->n_dims; ++k) { |     for (int k = 0; k < t->n_dims; ++k) { | ||||||
| @@ -237,23 +268,23 @@ bool check_gradient( | |||||||
|         const int nelements = ggml_nelements(x[i]); |         const int nelements = ggml_nelements(x[i]); | ||||||
|         for (int k = 0; k < nelements; ++k) { |         for (int k = 0; k < nelements; ++k) { | ||||||
|             // compute gradient using finite differences |             // compute gradient using finite differences | ||||||
|             const float x0 = get_element(x[i], k); |             const float x0 = ggml_get_f32_1d(x[i], k); | ||||||
|             const float xm = x0 - eps; |             const float xm = x0 - eps; | ||||||
|             const float xp = x0 + eps; |             const float xp = x0 + eps; | ||||||
|             set_element(x[i], k, xp); |             ggml_set_f32_1d(x[i], k, xp); | ||||||
|  |  | ||||||
|             ggml_graph_compute_with_ctx(ctx0, &gf, n_threads); |             ggml_graph_compute_with_ctx(ctx0, &gf, n_threads); | ||||||
|  |  | ||||||
|             const float f0 = ggml_get_f32_1d(f, 0); |             const float f0 = ggml_get_f32_1d(f, 0); | ||||||
|  |  | ||||||
|             set_element(x[i], k, xm); |             ggml_set_f32_1d(x[i], k, xm); | ||||||
|  |  | ||||||
|             ggml_graph_compute_with_ctx(ctx0, &gf, n_threads); |             ggml_graph_compute_with_ctx(ctx0, &gf, n_threads); | ||||||
|  |  | ||||||
|             const float f1 = ggml_get_f32_1d(f, 0); |             const float f1 = ggml_get_f32_1d(f, 0); | ||||||
|             const float g0 = (f0 - f1)/(2.0f*eps); |             const float g0 = (f0 - f1)/(2.0f*eps); | ||||||
|  |  | ||||||
|             set_element(x[i], k, x0); |             ggml_set_f32_1d(x[i], k, x0); | ||||||
|  |  | ||||||
|             // compute gradient using backward graph |             // compute gradient using backward graph | ||||||
|             ggml_graph_reset  (&gf); |             ggml_graph_reset  (&gf); | ||||||
| @@ -261,7 +292,7 @@ bool check_gradient( | |||||||
|  |  | ||||||
|             ggml_graph_compute_with_ctx(ctx0, &gb, n_threads); |             ggml_graph_compute_with_ctx(ctx0, &gb, n_threads); | ||||||
|  |  | ||||||
|             const float g1 = get_element(x[i]->grad, k); |             const float g1 = ggml_get_f32_1d(x[i]->grad, k); | ||||||
|  |  | ||||||
|             const float error_abs = fabsf(g0 - g1); |             const float error_abs = fabsf(g0 - g1); | ||||||
|             const float error_rel = g0 != 0 ? fabsf(g0 - g1)/fabsf(g0) : 0; |             const float error_rel = g0 != 0 ? fabsf(g0 - g1)/fabsf(g0) : 0; | ||||||
| @@ -392,19 +423,35 @@ int main(int argc, const char ** argv) { | |||||||
|  |  | ||||||
|         struct ggml_tensor * x[MAX_NARGS]; |         struct ggml_tensor * x[MAX_NARGS]; | ||||||
|  |  | ||||||
|         // add |         // add f32 | ||||||
|         { |         { | ||||||
|             const int nargs = 2; |             const int nargs = 2; | ||||||
|  |  | ||||||
|             for (int ndims = 1; ndims <= 4; ++ndims) { |             for (int ndims = 1; ndims <= 4; ++ndims) { | ||||||
|                 for (int i = 0; i < nargs; ++i) { |                 for (int i = 0; i < nargs; ++i) { | ||||||
|                     x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); |                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||||
|                     ggml_set_param(ctx0, x[i]); |                     ggml_set_param(ctx0, x[i]); | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
|                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_add(ctx0, x[0], x[1])); |                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_add(ctx0, x[0], x[1])); | ||||||
|  |  | ||||||
|                 check_gradient("add", ctx0, x, f, ndims, nargs, 1e-3f, 2e-3f, 2e-3f); |                 check_gradient("add f32", ctx0, x, f, ndims, nargs, 1e-3f, 2e-3f, 2e-3f); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // add f16 | ||||||
|  |         { | ||||||
|  |             const int nargs = 2; | ||||||
|  |  | ||||||
|  |             for (int ndims = 1; ndims <= 4; ++ndims) { | ||||||
|  |                 for (int i = 0; i < nargs; ++i) { | ||||||
|  |                     x[i] = get_random_tensor_f16(ctx0, ndims, ne, -1.0f, 1.0f); | ||||||
|  |                     ggml_set_param(ctx0, x[i]); | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_add(ctx0, x[0], x[1])); | ||||||
|  |  | ||||||
|  |                 check_gradient("add f16", ctx0, x, f, ndims, nargs, 1e-1f, 2e-1f, 2e-1f); | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  |  | ||||||
| @@ -414,7 +461,7 @@ int main(int argc, const char ** argv) { | |||||||
|  |  | ||||||
|             for (int ndims = 1; ndims <= 4; ++ndims) { |             for (int ndims = 1; ndims <= 4; ++ndims) { | ||||||
|                 for (int i = 0; i < nargs; ++i) { |                 for (int i = 0; i < nargs; ++i) { | ||||||
|                     x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); |                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||||
|                     ggml_set_param(ctx0, x[i]); |                     ggml_set_param(ctx0, x[i]); | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
| @@ -430,7 +477,7 @@ int main(int argc, const char ** argv) { | |||||||
|  |  | ||||||
|             for (int ndims = 1; ndims <= 4; ++ndims) { |             for (int ndims = 1; ndims <= 4; ++ndims) { | ||||||
|                 for (int i = 0; i < nargs; ++i) { |                 for (int i = 0; i < nargs; ++i) { | ||||||
|                     x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); |                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||||
|                     ggml_set_param(ctx0, x[i]); |                     ggml_set_param(ctx0, x[i]); | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
| @@ -446,7 +493,7 @@ int main(int argc, const char ** argv) { | |||||||
|  |  | ||||||
|             for (int ndims = 1; ndims <= 4; ++ndims) { |             for (int ndims = 1; ndims <= 4; ++ndims) { | ||||||
|                 for (int i = 0; i < nargs; ++i) { |                 for (int i = 0; i < nargs; ++i) { | ||||||
|                     x[i] = get_random_tensor(ctx0, ndims, ne, 0.5f, 1.0f); |                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, 0.5f, 1.0f); | ||||||
|                     ggml_set_param(ctx0, x[i]); |                     ggml_set_param(ctx0, x[i]); | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
| @@ -462,7 +509,7 @@ int main(int argc, const char ** argv) { | |||||||
|  |  | ||||||
|             for (int ndims = 1; ndims <= 2; ++ndims) { |             for (int ndims = 1; ndims <= 2; ++ndims) { | ||||||
|                 for (int i = 0; i < nargs; ++i) { |                 for (int i = 0; i < nargs; ++i) { | ||||||
|                     x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); |                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||||
|                     ggml_set_param(ctx0, x[i]); |                     ggml_set_param(ctx0, x[i]); | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
| @@ -478,7 +525,7 @@ int main(int argc, const char ** argv) { | |||||||
|  |  | ||||||
|             for (int ndims = 1; ndims <= 2; ++ndims) { |             for (int ndims = 1; ndims <= 2; ++ndims) { | ||||||
|                 for (int i = 0; i < nargs; ++i) { |                 for (int i = 0; i < nargs; ++i) { | ||||||
|                     x[i] = get_random_tensor(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f); |                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f); | ||||||
|                     ggml_set_param(ctx0, x[i]); |                     ggml_set_param(ctx0, x[i]); | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
| @@ -494,7 +541,7 @@ int main(int argc, const char ** argv) { | |||||||
|  |  | ||||||
|             for (int ndims = 1; ndims <= 2; ++ndims) { |             for (int ndims = 1; ndims <= 2; ++ndims) { | ||||||
|                 for (int i = 0; i < nargs; ++i) { |                 for (int i = 0; i < nargs; ++i) { | ||||||
|                     x[i] = get_random_tensor(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f); |                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f); | ||||||
|                     ggml_set_param(ctx0, x[i]); |                     ggml_set_param(ctx0, x[i]); | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
| @@ -510,7 +557,7 @@ int main(int argc, const char ** argv) { | |||||||
|  |  | ||||||
|             for (int ndims = 1; ndims <= 2; ++ndims) { |             for (int ndims = 1; ndims <= 2; ++ndims) { | ||||||
|                 for (int i = 0; i < nargs; ++i) { |                 for (int i = 0; i < nargs; ++i) { | ||||||
|                     x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); |                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||||
|                     ggml_set_param(ctx0, x[i]); |                     ggml_set_param(ctx0, x[i]); | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
| @@ -527,7 +574,7 @@ int main(int argc, const char ** argv) { | |||||||
|  |  | ||||||
|             for (int ndims = 1; ndims <= 4; ++ndims) { |             for (int ndims = 1; ndims <= 4; ++ndims) { | ||||||
|                 for (int i = 0; i < nargs; ++i) { |                 for (int i = 0; i < nargs; ++i) { | ||||||
|                     x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); |                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||||
|                     ggml_set_param(ctx0, x[i]); |                     ggml_set_param(ctx0, x[i]); | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
| @@ -537,6 +584,40 @@ int main(int argc, const char ** argv) { | |||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  |         // mean, not yet fully implemented | ||||||
|  |         if(0) | ||||||
|  |         { | ||||||
|  |             const int nargs = 1; | ||||||
|  |  | ||||||
|  |             for (int ndims = 1; ndims <= 4; ++ndims) { | ||||||
|  |                 for (int i = 0; i < nargs; ++i) { | ||||||
|  |                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||||
|  |                     ggml_set_param(ctx0, x[i]); | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_mean(ctx0, x[0])); | ||||||
|  |  | ||||||
|  |                 check_gradient("mean", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // argmax | ||||||
|  |         if (0) | ||||||
|  |         { | ||||||
|  |             const int nargs = 1; | ||||||
|  |  | ||||||
|  |             for (int ndims = 1; ndims <= 4; ++ndims) { | ||||||
|  |                 for (int i = 0; i < nargs; ++i) { | ||||||
|  |                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||||
|  |                     ggml_set_param(ctx0, x[i]); | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_argmax(ctx0, x[0])); | ||||||
|  |  | ||||||
|  |                 check_gradient("argmax", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|         // repeat |         // repeat | ||||||
|         { |         { | ||||||
|             int64_t ne2[4]; |             int64_t ne2[4]; | ||||||
| @@ -549,15 +630,36 @@ int main(int argc, const char ** argv) { | |||||||
|  |  | ||||||
|             const int nargs = 1; |             const int nargs = 1; | ||||||
|             for (int ndims = 1; ndims <= 2; ++ndims) { |             for (int ndims = 1; ndims <= 2; ++ndims) { | ||||||
|                 x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); |                 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||||
|                 x[1] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f); |                 x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); | ||||||
|                 ggml_set_param(ctx0, x[0]); |                 ggml_set_param(ctx0, x[0]); | ||||||
|  |  | ||||||
|                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sub(ctx0, x[1], ggml_repeat(ctx0, x[0], x[1])))); |                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sub(ctx0, x[1], ggml_repeat(ctx0, x[0], x[1])))); | ||||||
|  |  | ||||||
|                 check_gradient("repeat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY); |                 check_gradient("repeat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY); | ||||||
|             } |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // repeat back | ||||||
|  |         { | ||||||
|  |             int64_t ne2[4]; | ||||||
|  |             get_random_dims(ne2, 4); | ||||||
|  |  | ||||||
|  |             ne2[0] = ne[0] * ne2[0]; | ||||||
|  |             ne2[1] = ne[1] * ne2[1]; | ||||||
|  |             ne2[2] = 1; | ||||||
|  |             ne2[3] = 1; | ||||||
|  |  | ||||||
|  |             const int nargs = 1; | ||||||
|  |             for (int ndims = 1; ndims <= 2; ++ndims) { | ||||||
|  |                 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||||
|  |                 x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); | ||||||
|  |                 ggml_set_param(ctx0, x[0]); | ||||||
|  |  | ||||||
|  |                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sub(ctx0, x[0], ggml_repeat_back(ctx0, x[1], x[0])))); | ||||||
|  |  | ||||||
|  |                 check_gradient("repeat back", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY); | ||||||
|  |             } | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         // abs (finite differences do not work) |         // abs (finite differences do not work) | ||||||
| @@ -566,7 +668,7 @@ int main(int argc, const char ** argv) { | |||||||
|  |  | ||||||
|         //    for (int ndims = 1; ndims <= 2; ++ndims) { |         //    for (int ndims = 1; ndims <= 2; ++ndims) { | ||||||
|         //        for (int i = 0; i < nargs; ++i) { |         //        for (int i = 0; i < nargs; ++i) { | ||||||
|         //            x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); |         //            x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||||
|         //            ggml_set_param(ctx0, x[i]); |         //            ggml_set_param(ctx0, x[i]); | ||||||
|         //        } |         //        } | ||||||
|  |  | ||||||
| @@ -576,17 +678,82 @@ int main(int argc, const char ** argv) { | |||||||
|         //    } |         //    } | ||||||
|         //} |         //} | ||||||
|  |  | ||||||
|  |         // sgn | ||||||
|  |         { | ||||||
|  |             const int nargs = 1; | ||||||
|  |  | ||||||
|  |             for (int ndims = 1; ndims <= 4; ++ndims) { | ||||||
|  |                 for (int i = 0; i < nargs; ++i) { | ||||||
|  |                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||||
|  |                     ggml_set_param(ctx0, x[i]); | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 struct ggml_tensor* f = ggml_sum(ctx0, ggml_sgn(ctx0, x[0])); | ||||||
|  |  | ||||||
|  |                 check_gradient("sgn", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // neg | ||||||
|  |         { | ||||||
|  |             const int nargs = 1; | ||||||
|  |  | ||||||
|  |             for (int ndims = 1; ndims <= 4; ++ndims) { | ||||||
|  |                 for (int i = 0; i < nargs; ++i) { | ||||||
|  |                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||||
|  |                     ggml_set_param(ctx0, x[i]); | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 struct ggml_tensor* f = ggml_sum(ctx0, ggml_neg(ctx0, x[0])); | ||||||
|  |  | ||||||
|  |                 check_gradient("neg", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // step | ||||||
|  |         { | ||||||
|  |             const int nargs = 1; | ||||||
|  |  | ||||||
|  |             for (int ndims = 1; ndims <= 4; ++ndims) { | ||||||
|  |                 for (int i = 0; i < nargs; ++i) { | ||||||
|  |                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||||
|  |                     ggml_set_param(ctx0, x[i]); | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 struct ggml_tensor* f = ggml_sum(ctx0, ggml_step(ctx0, x[0])); | ||||||
|  |  | ||||||
|  |                 check_gradient("step", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // tanh, not yet fully implemented | ||||||
|  |         if(0) | ||||||
|  |         { | ||||||
|  |             const int nargs = 1; | ||||||
|  |  | ||||||
|  |             for (int ndims = 1; ndims <= 4; ++ndims) { | ||||||
|  |                 for (int i = 0; i < nargs; ++i) { | ||||||
|  |                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||||
|  |                     ggml_set_param(ctx0, x[i]); | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 struct ggml_tensor* f = ggml_sum(ctx0, ggml_tanh(ctx0, x[0])); | ||||||
|  |  | ||||||
|  |                 check_gradient("tanh", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|         // mul_mat |         // mul_mat | ||||||
|         { |         { | ||||||
|             const int nargs = 2; |             const int nargs = 2; | ||||||
|  |  | ||||||
|             for (int ndims = 2; ndims <= 2; ++ndims) { |             for (int ndims = 2; ndims <= 2; ++ndims) { | ||||||
|                 x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); |                 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||||
|                 { |                 { | ||||||
|                     int64_t ne2[4]; |                     int64_t ne2[4]; | ||||||
|                     get_random_dims(ne2, 4); |                     get_random_dims(ne2, 4); | ||||||
|                     ne2[0] = ne[0]; |                     ne2[0] = ne[0]; | ||||||
|                     x[1] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f); |                     x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
|                 ggml_set_param(ctx0, x[0]); |                 ggml_set_param(ctx0, x[0]); | ||||||
| @@ -602,13 +769,63 @@ int main(int argc, const char ** argv) { | |||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  |  | ||||||
|  |         // elu, not yet fully implemented | ||||||
|  |         if(0) | ||||||
|  |         { | ||||||
|  |             const int nargs = 1; | ||||||
|  |  | ||||||
|  |             for (int ndims = 1; ndims <= 4; ++ndims) { | ||||||
|  |                 for (int i = 0; i < nargs; ++i) { | ||||||
|  |                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||||
|  |                     ggml_set_param(ctx0, x[i]); | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 struct ggml_tensor* f = ggml_sum(ctx0, ggml_elu(ctx0, x[0])); | ||||||
|  |  | ||||||
|  |                 check_gradient("elu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // relu | ||||||
|  |         { | ||||||
|  |             const int nargs = 1; | ||||||
|  |  | ||||||
|  |             for (int ndims = 1; ndims <= 4; ++ndims) { | ||||||
|  |                 for (int i = 0; i < nargs; ++i) { | ||||||
|  |                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||||
|  |                     ggml_set_param(ctx0, x[i]); | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 struct ggml_tensor* f = ggml_sum(ctx0, ggml_relu(ctx0, x[0])); | ||||||
|  |  | ||||||
|  |                 check_gradient("relu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // gelu, not yet fully implemented | ||||||
|  |         if(0) | ||||||
|  |         { | ||||||
|  |             const int nargs = 1; | ||||||
|  |  | ||||||
|  |             for (int ndims = 1; ndims <= 4; ++ndims) { | ||||||
|  |                 for (int i = 0; i < nargs; ++i) { | ||||||
|  |                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||||
|  |                     ggml_set_param(ctx0, x[i]); | ||||||
|  |                 } | ||||||
|  |  | ||||||
|  |                 struct ggml_tensor* f = ggml_sum(ctx0, ggml_gelu(ctx0, x[0])); | ||||||
|  |  | ||||||
|  |                 check_gradient("gelu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|         // silu |         // silu | ||||||
|         { |         { | ||||||
|             const int nargs = 1; |             const int nargs = 1; | ||||||
|  |  | ||||||
|             for (int ndims = 1; ndims <= 2; ++ndims) { |             for (int ndims = 1; ndims <= 2; ++ndims) { | ||||||
|                 for (int i = 0; i < nargs; ++i) { |                 for (int i = 0; i < nargs; ++i) { | ||||||
|                     x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); |                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||||
|                     ggml_set_param(ctx0, x[i]); |                     ggml_set_param(ctx0, x[i]); | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
| @@ -629,7 +846,7 @@ int main(int argc, const char ** argv) { | |||||||
|  |  | ||||||
|             for (int ndims = 1; ndims <= 2; ++ndims) { |             for (int ndims = 1; ndims <= 2; ++ndims) { | ||||||
|                 for (int i = 0; i < nargs; ++i) { |                 for (int i = 0; i < nargs; ++i) { | ||||||
|                     x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); |                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||||
|                     ggml_set_param(ctx0, x[i]); |                     ggml_set_param(ctx0, x[i]); | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
| @@ -647,8 +864,8 @@ int main(int argc, const char ** argv) { | |||||||
|             ne2[0] = 1; |             ne2[0] = 1; | ||||||
|  |  | ||||||
|             for (int ndims = 1; ndims <= 2; ++ndims) { |             for (int ndims = 1; ndims <= 2; ++ndims) { | ||||||
|                 x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f); |                 x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); | ||||||
|                 x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); |                 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||||
|  |  | ||||||
|                 ggml_set_param(ctx0, x[0]); |                 ggml_set_param(ctx0, x[0]); | ||||||
|                 ggml_set_param(ctx0, x[1]); |                 ggml_set_param(ctx0, x[1]); | ||||||
| @@ -659,20 +876,37 @@ int main(int argc, const char ** argv) { | |||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         // cpy |         // cpy f32 | ||||||
|         { |         { | ||||||
|             const int nargs = 2; |             const int nargs = 2; | ||||||
|  |  | ||||||
|             for (int ndims = 1; ndims <= 2; ++ndims) { |             for (int ndims = 1; ndims <= 2; ++ndims) { | ||||||
|                 for (int i = 0; i < nargs; ++i) { |                 for (int i = 0; i < nargs; ++i) { | ||||||
|                     x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); |                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||||
|                     ggml_set_param(ctx0, x[i]); |                     ggml_set_param(ctx0, x[i]); | ||||||
|                 } |                 } | ||||||
|                 // x[1] is overwritten by x[0], so the gradients don't propagate to x[1] |                 // x[1] is overwritten by x[0], so the gradients don't propagate to x[1] | ||||||
|  |  | ||||||
|                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_cpy(ctx0, x[0], x[1])); |                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_cpy(ctx0, x[0], x[1])); | ||||||
|  |  | ||||||
|                 check_gradient("cpy", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); |                 check_gradient("cpy f32", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // cpy f16 | ||||||
|  |         { | ||||||
|  |             const int nargs = 2; | ||||||
|  |  | ||||||
|  |             for (int ndims = 1; ndims <= 2; ++ndims) { | ||||||
|  |                 for (int i = 0; i < nargs; ++i) { | ||||||
|  |                     x[i] = get_random_tensor_f16(ctx0, ndims, ne, -1.0f, 1.0f); | ||||||
|  |                     ggml_set_param(ctx0, x[i]); | ||||||
|  |                 } | ||||||
|  |                 // x[1] is overwritten by x[0], so the gradients don't propagate to x[1] | ||||||
|  |  | ||||||
|  |                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_cpy(ctx0, x[0], x[1])); | ||||||
|  |  | ||||||
|  |                 check_gradient("cpy f16", ctx0, x, f, ndims, nargs, 1e-1f, 1e-1f, INFINITY); | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  |  | ||||||
| @@ -689,8 +923,8 @@ int main(int argc, const char ** argv) { | |||||||
|                 for (int i = 0; i < ndims; ++i) { |                 for (int i = 0; i < ndims; ++i) { | ||||||
|                     ne2[0] *= ne[i]; |                     ne2[0] *= ne[i]; | ||||||
|                 } |                 } | ||||||
|                 x[0] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f); |                 x[0] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); | ||||||
|                 x[1] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); |                 x[1] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||||
|                 ggml_set_param(ctx0, x[0]); |                 ggml_set_param(ctx0, x[0]); | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -712,8 +946,8 @@ int main(int argc, const char ** argv) { | |||||||
|                 for (int i = 0; i < ndims; ++i) { |                 for (int i = 0; i < ndims; ++i) { | ||||||
|                     ne2[0] *= ne[i]; |                     ne2[0] *= ne[i]; | ||||||
|                 } |                 } | ||||||
|                 x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); |                 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||||
|                 x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f); |                 x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); | ||||||
|                 ggml_set_param(ctx0, x[0]); |                 ggml_set_param(ctx0, x[0]); | ||||||
|  |  | ||||||
|  |  | ||||||
| @@ -729,7 +963,7 @@ int main(int argc, const char ** argv) { | |||||||
|             const int nargs = 2; |             const int nargs = 2; | ||||||
|             for (int ndims = 1; ndims <= 4; ++ndims) { |             for (int ndims = 1; ndims <= 4; ++ndims) { | ||||||
|  |  | ||||||
|                 x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); |                 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||||
|                 ggml_set_param(ctx0, x[0]); |                 ggml_set_param(ctx0, x[0]); | ||||||
|  |  | ||||||
|                 get_random_dims(ne2, 1); |                 get_random_dims(ne2, 1); | ||||||
| @@ -737,7 +971,7 @@ int main(int argc, const char ** argv) { | |||||||
|                     get_random_dims(ne2, 1); |                     get_random_dims(ne2, 1); | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
|                 x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f); |                 x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); | ||||||
|                 ggml_set_param(ctx0, x[1]); |                 ggml_set_param(ctx0, x[1]); | ||||||
|  |  | ||||||
|                 const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1])); |                 const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1])); | ||||||
| @@ -758,7 +992,7 @@ int main(int argc, const char ** argv) { | |||||||
|             const int nargs = 2; |             const int nargs = 2; | ||||||
|             for (int ndims = 2; ndims <= 4; ++ndims) { |             for (int ndims = 2; ndims <= 4; ++ndims) { | ||||||
|  |  | ||||||
|                 x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); |                 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||||
|                 ggml_set_param(ctx0, x[0]); |                 ggml_set_param(ctx0, x[0]); | ||||||
|  |  | ||||||
|                 get_random_dims(ne2, 2); |                 get_random_dims(ne2, 2); | ||||||
| @@ -766,7 +1000,7 @@ int main(int argc, const char ** argv) { | |||||||
|                     get_random_dims(ne2, 2); |                     get_random_dims(ne2, 2); | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
|                 x[1] = get_random_tensor(ctx0, 2, ne2, -1.0f, 1.0f); |                 x[1] = get_random_tensor_f32(ctx0, 2, ne2, -1.0f, 1.0f); | ||||||
|                 ggml_set_param(ctx0, x[1]); |                 ggml_set_param(ctx0, x[1]); | ||||||
|  |  | ||||||
|                 max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); |                 max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); | ||||||
| @@ -790,7 +1024,7 @@ int main(int argc, const char ** argv) { | |||||||
|             const int nargs = 2; |             const int nargs = 2; | ||||||
|             for (int ndims = 3; ndims <= 4; ++ndims) { |             for (int ndims = 3; ndims <= 4; ++ndims) { | ||||||
|  |  | ||||||
|                 x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); |                 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||||
|                 ggml_set_param(ctx0, x[0]); |                 ggml_set_param(ctx0, x[0]); | ||||||
|  |  | ||||||
|                 get_random_dims(ne2, 3); |                 get_random_dims(ne2, 3); | ||||||
| @@ -798,7 +1032,7 @@ int main(int argc, const char ** argv) { | |||||||
|                     get_random_dims(ne2, 3); |                     get_random_dims(ne2, 3); | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
|                 x[1] = get_random_tensor(ctx0, 3, ne2, -1.0f, 1.0f); |                 x[1] = get_random_tensor_f32(ctx0, 3, ne2, -1.0f, 1.0f); | ||||||
|                 ggml_set_param(ctx0, x[1]); |                 ggml_set_param(ctx0, x[1]); | ||||||
|  |  | ||||||
|                 max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); |                 max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); | ||||||
| @@ -824,7 +1058,7 @@ int main(int argc, const char ** argv) { | |||||||
|             const int nargs = 2; |             const int nargs = 2; | ||||||
|             for (int ndims = 4; ndims <= 4; ++ndims) { |             for (int ndims = 4; ndims <= 4; ++ndims) { | ||||||
|  |  | ||||||
|                 x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); |                 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||||
|                 ggml_set_param(ctx0, x[0]); |                 ggml_set_param(ctx0, x[0]); | ||||||
|  |  | ||||||
|                 get_random_dims(ne2, 4); |                 get_random_dims(ne2, 4); | ||||||
| @@ -832,7 +1066,7 @@ int main(int argc, const char ** argv) { | |||||||
|                     get_random_dims(ne2, 4); |                     get_random_dims(ne2, 4); | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
|                 x[1] = get_random_tensor(ctx0, 4, ne2, -1.0f, 1.0f); |                 x[1] = get_random_tensor_f32(ctx0, 4, ne2, -1.0f, 1.0f); | ||||||
|                 ggml_set_param(ctx0, x[1]); |                 ggml_set_param(ctx0, x[1]); | ||||||
|  |  | ||||||
|                 max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); |                 max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); | ||||||
| @@ -858,7 +1092,7 @@ int main(int argc, const char ** argv) { | |||||||
|             const int nargs = 2; |             const int nargs = 2; | ||||||
|             for (int ndims = 1; ndims <= 4; ++ndims) { |             for (int ndims = 1; ndims <= 4; ++ndims) { | ||||||
|  |  | ||||||
|                 x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); |                 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||||
|                 ggml_set_param(ctx0, x[0]); |                 ggml_set_param(ctx0, x[0]); | ||||||
|  |  | ||||||
|                 get_random_dims(ne2, 1); |                 get_random_dims(ne2, 1); | ||||||
| @@ -866,7 +1100,7 @@ int main(int argc, const char ** argv) { | |||||||
|                     get_random_dims(ne2, 1); |                     get_random_dims(ne2, 1); | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
|                 x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f); |                 x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); | ||||||
|                 ggml_set_param(ctx0, x[1]); |                 ggml_set_param(ctx0, x[1]); | ||||||
|  |  | ||||||
|                 const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1])); |                 const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1])); | ||||||
| @@ -887,7 +1121,7 @@ int main(int argc, const char ** argv) { | |||||||
|             const int nargs = 1; |             const int nargs = 1; | ||||||
|             for (int ndims = 2; ndims <= 4; ++ndims) { |             for (int ndims = 2; ndims <= 4; ++ndims) { | ||||||
|  |  | ||||||
|                 x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); |                 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||||
|                 ggml_set_param(ctx0, x[0]); |                 ggml_set_param(ctx0, x[0]); | ||||||
|  |  | ||||||
|                 get_random_dims(ne2, 2); |                 get_random_dims(ne2, 2); | ||||||
| @@ -895,7 +1129,7 @@ int main(int argc, const char ** argv) { | |||||||
|                     get_random_dims(ne2, 2); |                     get_random_dims(ne2, 2); | ||||||
|                 } |                 } | ||||||
|  |  | ||||||
|                 x[1] = get_random_tensor(ctx0, 2, ne2, -1.0f, 1.0f); |                 x[1] = get_random_tensor_f32(ctx0, 2, ne2, -1.0f, 1.0f); | ||||||
|                 ggml_set_param(ctx0, x[1]); |                 ggml_set_param(ctx0, x[1]); | ||||||
|  |  | ||||||
|                 max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); |                 max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); | ||||||
| @@ -915,7 +1149,7 @@ int main(int argc, const char ** argv) { | |||||||
|             const int nargs = 1; |             const int nargs = 1; | ||||||
|             for (int ndims = 1; ndims <= 4; ++ndims) { |             for (int ndims = 1; ndims <= 4; ++ndims) { | ||||||
|  |  | ||||||
|                 x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); |                 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||||
|  |  | ||||||
|                 ggml_set_param(ctx0, x[0]); |                 ggml_set_param(ctx0, x[0]); | ||||||
|  |  | ||||||
| @@ -941,7 +1175,7 @@ int main(int argc, const char ** argv) { | |||||||
|             const int nargs = 1; |             const int nargs = 1; | ||||||
|             for (int ndims = 1; ndims <= 4; ++ndims) { |             for (int ndims = 1; ndims <= 4; ++ndims) { | ||||||
|  |  | ||||||
|                 x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); |                 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||||
|  |  | ||||||
|                 get_random_dims(ne2, 2); |                 get_random_dims(ne2, 2); | ||||||
|                 while (ne2[0]*ne2[1] > ggml_nelements(x[0])) { |                 while (ne2[0]*ne2[1] > ggml_nelements(x[0])) { | ||||||
| @@ -971,7 +1205,7 @@ int main(int argc, const char ** argv) { | |||||||
|             const int nargs = 1; |             const int nargs = 1; | ||||||
|             for (int ndims = 1; ndims <= 4; ++ndims) { |             for (int ndims = 1; ndims <= 4; ++ndims) { | ||||||
|  |  | ||||||
|                 x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); |                 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||||
|  |  | ||||||
|                 get_random_dims(ne2, 3); |                 get_random_dims(ne2, 3); | ||||||
|                 while (ne2[0]*ne2[1]*ne2[2] > ggml_nelements(x[0])) { |                 while (ne2[0]*ne2[1]*ne2[2] > ggml_nelements(x[0])) { | ||||||
| @@ -1010,7 +1244,7 @@ int main(int argc, const char ** argv) { | |||||||
|                 for (int i=ndims; i<4; ++i) { |                 for (int i=ndims; i<4; ++i) { | ||||||
|                     ne2[i] = 1; |                     ne2[i] = 1; | ||||||
|                 } |                 } | ||||||
|                 x[0] = get_random_tensor(ctx0, 4, ne2, -1.0f, 1.0f); |                 x[0] = get_random_tensor_f32(ctx0, 4, ne2, -1.0f, 1.0f); | ||||||
|  |  | ||||||
|                 ggml_set_param(ctx0, x[0]); |                 ggml_set_param(ctx0, x[0]); | ||||||
|  |  | ||||||
| @@ -1043,7 +1277,7 @@ int main(int argc, const char ** argv) { | |||||||
|                 for (int i=ndims; i<4; ++i) { |                 for (int i=ndims; i<4; ++i) { | ||||||
|                     ne2[i] = 1; |                     ne2[i] = 1; | ||||||
|                 } |                 } | ||||||
|                 x[0] = get_random_tensor(ctx0, 4, ne2, -1.0f, 1.0f); |                 x[0] = get_random_tensor_f32(ctx0, 4, ne2, -1.0f, 1.0f); | ||||||
|  |  | ||||||
|                 ggml_set_param(ctx0, x[0]); |                 ggml_set_param(ctx0, x[0]); | ||||||
|  |  | ||||||
| @@ -1060,8 +1294,8 @@ int main(int argc, const char ** argv) { | |||||||
|             int64_t ne3[4] = {1+irand(ne[1]), 1, 1, 1}; |             int64_t ne3[4] = {1+irand(ne[1]), 1, 1, 1}; | ||||||
|             const int nargs = 1; |             const int nargs = 1; | ||||||
|             const int ndims = 2; |             const int ndims = 2; | ||||||
|             x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f); |             x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); | ||||||
|             x[1] = get_random_tensor_int(ctx0, 1, ne3, 0, ne2[1]); |             x[1] = get_random_tensor_i32(ctx0, 1, ne3, 0, ne2[1]); | ||||||
|  |  | ||||||
|             ggml_set_param(ctx0, x[0]); |             ggml_set_param(ctx0, x[0]); | ||||||
|  |  | ||||||
| @@ -1075,7 +1309,7 @@ int main(int argc, const char ** argv) { | |||||||
|             const int nargs = 1; |             const int nargs = 1; | ||||||
|             const int ndims = 2; |             const int ndims = 2; | ||||||
|  |  | ||||||
|             x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); |             x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||||
|             ggml_set_param(ctx0, x[0]); |             ggml_set_param(ctx0, x[0]); | ||||||
|  |  | ||||||
|             int n_past = irand(ne[0]); |             int n_past = irand(ne[0]); | ||||||
| @@ -1090,7 +1324,7 @@ int main(int argc, const char ** argv) { | |||||||
|             const int nargs = 1; |             const int nargs = 1; | ||||||
|             const int ndims = 2; |             const int ndims = 2; | ||||||
|  |  | ||||||
|             x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); |             x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||||
|             ggml_set_param(ctx0, x[0]); |             ggml_set_param(ctx0, x[0]); | ||||||
|  |  | ||||||
|             int n_past = irand(ne[0]); |             int n_past = irand(ne[0]); | ||||||
| @@ -1108,7 +1342,7 @@ int main(int argc, const char ** argv) { | |||||||
|             get_random_dims(ne2, 4); |             get_random_dims(ne2, 4); | ||||||
|  |  | ||||||
|             for (int ndims = 1; ndims <= 3; ++ndims) { |             for (int ndims = 1; ndims <= 3; ++ndims) { | ||||||
|                 x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f); |                 x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); | ||||||
|                 ggml_set_param(ctx0, x[0]); |                 ggml_set_param(ctx0, x[0]); | ||||||
|  |  | ||||||
|                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_soft_max(ctx0, x[0])); |                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_soft_max(ctx0, x[0])); | ||||||
| @@ -1125,8 +1359,8 @@ int main(int argc, const char ** argv) { | |||||||
|             get_random_dims(ne2, 4); |             get_random_dims(ne2, 4); | ||||||
|  |  | ||||||
|             for (int ndims = 1; ndims <= 3; ++ndims) { |             for (int ndims = 1; ndims <= 3; ++ndims) { | ||||||
|                 x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f); |                 x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); | ||||||
|                 x[1] = get_random_tensor(ctx0, ndims, ne2, 0.0f, 1.0f); |                 x[1] = get_random_tensor_f32(ctx0, ndims, ne2, 0.0f, 1.0f); | ||||||
|                 ggml_set_param(ctx0, x[0]); |                 ggml_set_param(ctx0, x[0]); | ||||||
|  |  | ||||||
|                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_cross_entropy_loss(ctx0, x[0], x[1])); |                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_cross_entropy_loss(ctx0, x[0], x[1])); | ||||||
| @@ -1136,7 +1370,7 @@ int main(int argc, const char ** argv) { | |||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         // rope |         // rope f32 | ||||||
|         { |         { | ||||||
|             const int nargs = 1; |             const int nargs = 1; | ||||||
|  |  | ||||||
| @@ -1148,7 +1382,7 @@ int main(int argc, const char ** argv) { | |||||||
|             for (int ndims = 3; ndims <= 4; ++ndims) { |             for (int ndims = 3; ndims <= 4; ++ndims) { | ||||||
|                 for (int mode = 0; mode < 4; ++mode) { |                 for (int mode = 0; mode < 4; ++mode) { | ||||||
|                     for (int n_past = 1; n_past < ne2[2]; ++n_past) { |                     for (int n_past = 1; n_past < ne2[2]; ++n_past) { | ||||||
|                         x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f); |                         x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); | ||||||
|  |  | ||||||
|                         ggml_set_param(ctx0, x[0]); |                         ggml_set_param(ctx0, x[0]); | ||||||
|  |  | ||||||
| @@ -1163,14 +1397,48 @@ int main(int argc, const char ** argv) { | |||||||
|  |  | ||||||
|                         struct ggml_tensor * f = ggml_sum(ctx0, ggml_rope(ctx0, x[0], n_past, n_rot, mode, 0)); |                         struct ggml_tensor * f = ggml_sum(ctx0, ggml_rope(ctx0, x[0], n_past, n_rot, mode, 0)); | ||||||
|  |  | ||||||
|                         GGML_PRINT_DEBUG("rope: n_past: %d n_rot: %d mode: %d\n", n_past, n_rot, mode); |                         GGML_PRINT_DEBUG("rope f32: n_past: %d n_rot: %d mode: %d\n", n_past, n_rot, mode); | ||||||
|                         check_gradient("rope", ctx0, x, f, ndims, nargs, 1e-2f, 1e-3f, INFINITY); |                         check_gradient("rope f32", ctx0, x, f, ndims, nargs, 1e-2f, 1e-3f, INFINITY); | ||||||
|                     } |                     } | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|  |  | ||||||
|         // flash_attn |         // rope f16 | ||||||
|  |         { | ||||||
|  |             const int nargs = 1; | ||||||
|  |  | ||||||
|  |             int64_t ne2[4]; | ||||||
|  |             get_random_dims(ne2, 4); | ||||||
|  |             ne2[0] += ne2[0] % 2; | ||||||
|  |             int n_rot = ne2[0]; | ||||||
|  |  | ||||||
|  |             for (int ndims = 3; ndims <= 4; ++ndims) { | ||||||
|  |                 for (int mode = 0; mode < 4; ++mode) { | ||||||
|  |                     for (int n_past = 1; n_past < ne2[2]; ++n_past) { | ||||||
|  |                         x[0] = get_random_tensor_f16(ctx0, ndims, ne2, -1.0f, 1.0f); | ||||||
|  |  | ||||||
|  |                         ggml_set_param(ctx0, x[0]); | ||||||
|  |  | ||||||
|  |                         const bool skip_past = (mode & 1); | ||||||
|  |                         if (skip_past) { | ||||||
|  |                             // we have no past, so this would have to work on uninitialized memory. | ||||||
|  |                             // we only test the gradients here; | ||||||
|  |                             // skip_past should have no influence on gradient computation. | ||||||
|  |                             // so when other modes work, we assume that this does as well. | ||||||
|  |                             continue; | ||||||
|  |                         } | ||||||
|  |  | ||||||
|  |                         struct ggml_tensor * f = ggml_sum(ctx0, ggml_rope(ctx0, x[0], n_past, n_rot, mode, 0)); | ||||||
|  |  | ||||||
|  |                         GGML_PRINT_DEBUG("rope f16: n_past: %d n_rot: %d mode: %d\n", n_past, n_rot, mode); | ||||||
|  |                         check_gradient("rope f16", ctx0, x, f, ndims, nargs, 1e-1f, 1e-1f, INFINITY); | ||||||
|  |                     } | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // flash_attn f32 | ||||||
|         { |         { | ||||||
|             const int nargs = 3; |             const int nargs = 3; | ||||||
|  |  | ||||||
| @@ -1196,16 +1464,57 @@ int main(int argc, const char ** argv) { | |||||||
|                         nek[3] = 1; |                         nek[3] = 1; | ||||||
|                         nev[3] = 1; |                         nev[3] = 1; | ||||||
|                     } |                     } | ||||||
|                     x[0] = get_random_tensor(ctx0, ndims, neq, -0.1250f, 0.1250f); |                     x[0] = get_random_tensor_f32(ctx0, ndims, neq, -0.1250f, 0.1250f); | ||||||
|                     x[1] = get_random_tensor(ctx0, ndims, nek, -0.1250f, 0.1250f); |                     x[1] = get_random_tensor_f32(ctx0, ndims, nek, -0.1250f, 0.1250f); | ||||||
|                     x[2] = get_random_tensor(ctx0, ndims, nev, -0.1250f, 0.1250f); |                     x[2] = get_random_tensor_f32(ctx0, ndims, nev, -0.1250f, 0.1250f); | ||||||
|                     ggml_set_param(ctx0, x[0]); |                     ggml_set_param(ctx0, x[0]); | ||||||
|                     ggml_set_param(ctx0, x[1]); |                     ggml_set_param(ctx0, x[1]); | ||||||
|                     ggml_set_param(ctx0, x[2]); |                     ggml_set_param(ctx0, x[2]); | ||||||
|  |  | ||||||
|                     struct ggml_tensor * f = ggml_sum(ctx0, ggml_flash_attn(ctx0, x[0], x[1], x[2], (masked == 0))); |                     struct ggml_tensor * f = ggml_sum(ctx0, ggml_flash_attn(ctx0, x[0], x[1], x[2], (masked == 0))); | ||||||
|  |  | ||||||
|                     check_gradient("flash_attn", ctx0, x, f, ndims, nargs, 1.5e-4f, INFINITY, 3.5f); |                     check_gradient("flash_attn f32", ctx0, x, f, ndims, nargs, 1.5e-4f, INFINITY, 3.5f); | ||||||
|  |                 } | ||||||
|  |             } | ||||||
|  |         } | ||||||
|  |  | ||||||
|  |         // flash_attn f16, not yet fully implemented | ||||||
|  |         if(0) | ||||||
|  |         { | ||||||
|  |             const int nargs = 3; | ||||||
|  |  | ||||||
|  |             int64_t ne2[4]; | ||||||
|  |  | ||||||
|  |             get_random_dims(ne2, 4); | ||||||
|  |             int64_t D = ne2[0]; | ||||||
|  |             int64_t N = ne2[1]; | ||||||
|  |             int64_t M = ne2[2] + N; | ||||||
|  |             int64_t B = ne2[3]; | ||||||
|  |  | ||||||
|  |             for (int masked = 0; masked <= 1; ++masked) { | ||||||
|  |                 for (int ndims = 2; ndims <= 4; ++ndims) { | ||||||
|  |                     int64_t neq[4] = { D, N, B, ne[3] }; | ||||||
|  |                     int64_t nek[4] = { D, M, B, ne[3] }; | ||||||
|  |                     int64_t nev[4] = { M, D, B, ne[3] }; | ||||||
|  |                     if (ndims == 2) { | ||||||
|  |                         neq[2] = 1; neq[3] = 1; | ||||||
|  |                         nek[2] = 1; nek[3] = 1; | ||||||
|  |                         nev[2] = 1; nev[3] = 1; | ||||||
|  |                     } else if (ndims == 3) { | ||||||
|  |                         neq[3] = 1; | ||||||
|  |                         nek[3] = 1; | ||||||
|  |                         nev[3] = 1; | ||||||
|  |                     } | ||||||
|  |                     x[0] = get_random_tensor_f16(ctx0, ndims, neq, -0.1250f, 0.1250f); | ||||||
|  |                     x[1] = get_random_tensor_f16(ctx0, ndims, nek, -0.1250f, 0.1250f); | ||||||
|  |                     x[2] = get_random_tensor_f16(ctx0, ndims, nev, -0.1250f, 0.1250f); | ||||||
|  |                     ggml_set_param(ctx0, x[0]); | ||||||
|  |                     ggml_set_param(ctx0, x[1]); | ||||||
|  |                     ggml_set_param(ctx0, x[2]); | ||||||
|  |  | ||||||
|  |                     struct ggml_tensor * f = ggml_sum(ctx0, ggml_flash_attn(ctx0, x[0], x[1], x[2], (masked == 0))); | ||||||
|  |  | ||||||
|  |                     check_gradient("flash_attn f16", ctx0, x, f, ndims, nargs, 1.5e-4f, INFINITY, 3.5f); | ||||||
|                 } |                 } | ||||||
|             } |             } | ||||||
|         } |         } | ||||||
|   | |||||||
| @@ -125,9 +125,9 @@ int main(void) { | |||||||
|     }; |     }; | ||||||
|     struct ggml_context * ctx = ggml_init(params); |     struct ggml_context * ctx = ggml_init(params); | ||||||
|  |  | ||||||
|     int64_t ne1[4] = {4, 1024, 1, 1}; |     int64_t ne1[4] = {4, 128, 1, 1}; | ||||||
|     int64_t ne2[4] = {4, 2048, 1, 1};; |     int64_t ne2[4] = {4, 256, 1, 1};; | ||||||
|     int64_t ne3[4] = {1024, 2048, 1, 1}; |     int64_t ne3[4] = {128, 256, 1, 1}; | ||||||
|  |  | ||||||
|     struct ggml_tensor * a = get_random_tensor(ctx, 2, ne1, -1, +1); |     struct ggml_tensor * a = get_random_tensor(ctx, 2, ne1, -1, +1); | ||||||
|     struct ggml_tensor * b = get_random_tensor(ctx, 2, ne2, -1, +1); |     struct ggml_tensor * b = get_random_tensor(ctx, 2, ne2, -1, +1); | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov