mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	ggml : sync (unary ops refactor, static-correctness) (#2370)
* ggml : sync (unary ops, tests) ggml-ci * tests : remove unnecessary funcs
This commit is contained in:
		| @@ -3962,18 +3962,23 @@ bool ggml_cuda_compute_forward(struct ggml_compute_params * params, struct ggml_ | ||||
|             } | ||||
|             func = ggml_cuda_mul; | ||||
|             break; | ||||
|         case GGML_OP_GELU: | ||||
|         case GGML_OP_UNARY: | ||||
|             switch (ggml_get_unary_op(tensor)) { | ||||
|                 case GGML_UNARY_OP_GELU: | ||||
|                     if (!any_on_device) { | ||||
|                         return false; | ||||
|                     } | ||||
|                     func = ggml_cuda_gelu; | ||||
|                     break; | ||||
|         case GGML_OP_SILU: | ||||
|                 case GGML_UNARY_OP_SILU: | ||||
|                     if (!any_on_device) { | ||||
|                         return false; | ||||
|                     } | ||||
|                     func = ggml_cuda_silu; | ||||
|                     break; | ||||
|                 default: | ||||
|                     return false; | ||||
|             } break; | ||||
|         case GGML_OP_NORM: | ||||
|             if (!any_on_device) { | ||||
|                 return false; | ||||
|   | ||||
							
								
								
									
										16
									
								
								ggml-metal.m
									
									
									
									
									
								
							
							
						
						
									
										16
									
								
								ggml-metal.m
									
									
									
									
									
								
							| @@ -519,7 +519,9 @@ void ggml_metal_graph_compute( | ||||
|  | ||||
|                             [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; | ||||
|                         } break; | ||||
|                     case GGML_OP_SILU: | ||||
|                     case GGML_OP_UNARY: | ||||
|                         switch (ggml_get_unary_op(gf->nodes[i])) { | ||||
|                             case GGML_UNARY_OP_SILU: | ||||
|                                 { | ||||
|                                     if (encoder == nil) { | ||||
|                                         encoder = [command_buffer computeCommandEncoder]; | ||||
| @@ -533,7 +535,7 @@ void ggml_metal_graph_compute( | ||||
|  | ||||
|                                     [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; | ||||
|                                 } break; | ||||
|                     case GGML_OP_RELU: | ||||
|                             case GGML_UNARY_OP_RELU: | ||||
|                                 { | ||||
|                                     if (encoder == nil) { | ||||
|                                         encoder = [command_buffer computeCommandEncoder]; | ||||
| @@ -547,7 +549,7 @@ void ggml_metal_graph_compute( | ||||
|  | ||||
|                                     [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; | ||||
|                                 } break; | ||||
|                     case GGML_OP_GELU: | ||||
|                             case GGML_UNARY_OP_GELU: | ||||
|                                 { | ||||
|                                     if (encoder == nil) { | ||||
|                                         encoder = [command_buffer computeCommandEncoder]; | ||||
| @@ -561,6 +563,12 @@ void ggml_metal_graph_compute( | ||||
|  | ||||
|                                     [encoder dispatchThreadgroups:MTLSizeMake(n, 1, 1) threadsPerThreadgroup:MTLSizeMake(1, 1, 1)]; | ||||
|                                 } break; | ||||
|                             default: | ||||
|                                 { | ||||
|                                     fprintf(stderr, "%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op)); | ||||
|                                     GGML_ASSERT(false); | ||||
|                                 } | ||||
|                         } break; | ||||
|                     case GGML_OP_SOFT_MAX: | ||||
|                         { | ||||
|                             if (encoder == nil) { | ||||
| @@ -979,10 +987,12 @@ void ggml_metal_graph_compute( | ||||
|                             [encoder dispatchThreadgroups:MTLSizeMake(ne01, ne02, ne03) threadsPerThreadgroup:MTLSizeMake(nth, 1, 1)]; | ||||
|                         } break; | ||||
|                     default: | ||||
|                         { | ||||
|                             fprintf(stderr, "%s: node %3d, op = %8s not implemented\n", __func__, i, ggml_op_name(dst->op)); | ||||
|                             GGML_ASSERT(false); | ||||
|                         } | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             if (encoder != nil) { | ||||
|                 [encoder endEncoding]; | ||||
|   | ||||
							
								
								
									
										60
									
								
								ggml.h
									
									
									
									
									
								
							
							
						
						
									
										60
									
								
								ggml.h
									
									
									
									
									
								
							| @@ -330,16 +330,6 @@ extern "C" { | ||||
|         GGML_OP_ARGMAX, | ||||
|         GGML_OP_REPEAT, | ||||
|         GGML_OP_REPEAT_BACK, | ||||
|         GGML_OP_ABS, | ||||
|         GGML_OP_SGN, | ||||
|         GGML_OP_NEG, | ||||
|         GGML_OP_STEP, | ||||
|         GGML_OP_TANH, | ||||
|         GGML_OP_ELU, | ||||
|         GGML_OP_RELU, | ||||
|         GGML_OP_GELU, | ||||
|         GGML_OP_GELU_QUICK, | ||||
|         GGML_OP_SILU, | ||||
|         GGML_OP_SILU_BACK, | ||||
|         GGML_OP_NORM, // normalize | ||||
|         GGML_OP_RMS_NORM, | ||||
| @@ -378,6 +368,8 @@ extern "C" { | ||||
|         GGML_OP_WIN_PART, | ||||
|         GGML_OP_WIN_UNPART, | ||||
|  | ||||
|         GGML_OP_UNARY, | ||||
|  | ||||
|         GGML_OP_MAP_UNARY, | ||||
|         GGML_OP_MAP_BINARY, | ||||
|  | ||||
| @@ -391,6 +383,18 @@ extern "C" { | ||||
|         GGML_OP_COUNT, | ||||
|     }; | ||||
|  | ||||
|     enum ggml_unary_op { | ||||
|         GGML_UNARY_OP_ABS, | ||||
|         GGML_UNARY_OP_SGN, | ||||
|         GGML_UNARY_OP_NEG, | ||||
|         GGML_UNARY_OP_STEP, | ||||
|         GGML_UNARY_OP_TANH, | ||||
|         GGML_UNARY_OP_ELU, | ||||
|         GGML_UNARY_OP_RELU, | ||||
|         GGML_UNARY_OP_GELU, | ||||
|         GGML_UNARY_OP_GELU_QUICK, | ||||
|         GGML_UNARY_OP_SILU, | ||||
|     }; | ||||
|  | ||||
|     // ggml object | ||||
|     struct ggml_object { | ||||
| @@ -535,6 +539,7 @@ extern "C" { | ||||
|  | ||||
|     GGML_API const char * ggml_type_name(enum ggml_type type); | ||||
|     GGML_API const char * ggml_op_name  (enum ggml_op   op); | ||||
|     GGML_API const char * ggml_op_symbol(enum ggml_op   op); | ||||
|  | ||||
|     GGML_API size_t  ggml_element_size(const struct ggml_tensor * tensor); | ||||
|  | ||||
| @@ -558,6 +563,7 @@ extern "C" { | ||||
|     GGML_API size_t  ggml_used_mem(const struct ggml_context * ctx); | ||||
|  | ||||
|     GGML_API size_t  ggml_set_scratch (struct ggml_context * ctx, struct ggml_scratch scratch); | ||||
|     GGML_API bool    ggml_get_no_alloc(struct ggml_context * ctx); | ||||
|     GGML_API void    ggml_set_no_alloc(struct ggml_context * ctx, bool no_alloc); | ||||
|  | ||||
|     GGML_API void *  ggml_get_mem_buffer     (const struct ggml_context * ctx); | ||||
| @@ -617,9 +623,11 @@ extern "C" { | ||||
|     GGML_API void *  ggml_get_data    (const struct ggml_tensor * tensor); | ||||
|     GGML_API float * ggml_get_data_f32(const struct ggml_tensor * tensor); | ||||
|  | ||||
|     GGML_API const char *         ggml_get_name(const struct ggml_tensor * tensor); | ||||
|     GGML_API struct ggml_tensor * ggml_set_name(struct ggml_tensor * tensor, const char * name); | ||||
|     GGML_API struct ggml_tensor * ggml_format_name(struct ggml_tensor * tensor, const char * fmt, ...); | ||||
|     GGML_API enum ggml_unary_op ggml_get_unary_op(const struct ggml_tensor * tensor); | ||||
|  | ||||
|     GGML_API const char *         ggml_get_name   (const struct ggml_tensor * tensor); | ||||
|     GGML_API struct ggml_tensor * ggml_set_name   (      struct ggml_tensor * tensor, const char * name); | ||||
|     GGML_API struct ggml_tensor * ggml_format_name(      struct ggml_tensor * tensor, const char * fmt, ...); | ||||
|  | ||||
|     // | ||||
|     // operations on tensors with backpropagation | ||||
| @@ -629,6 +637,11 @@ extern "C" { | ||||
|             struct ggml_context * ctx, | ||||
|             struct ggml_tensor  * a); | ||||
|  | ||||
|     // in-place, returns view(a) | ||||
|     GGML_API struct ggml_tensor * ggml_dup_inplace( | ||||
|             struct ggml_context * ctx, | ||||
|             struct ggml_tensor  * a); | ||||
|  | ||||
|     GGML_API struct ggml_tensor * ggml_add( | ||||
|             struct ggml_context * ctx, | ||||
|             struct ggml_tensor  * a, | ||||
| @@ -952,11 +965,22 @@ extern "C" { | ||||
|             struct ggml_tensor  * a, | ||||
|             struct ggml_tensor  * b); | ||||
|  | ||||
|     // a -> b, in-place, return view(b) | ||||
|     GGML_API struct ggml_tensor * ggml_cpy_inplace( | ||||
|             struct ggml_context * ctx, | ||||
|             struct ggml_tensor  * a, | ||||
|             struct ggml_tensor  * b); | ||||
|  | ||||
|     // make contiguous | ||||
|     GGML_API struct ggml_tensor * ggml_cont( | ||||
|             struct ggml_context * ctx, | ||||
|             struct ggml_tensor  * a); | ||||
|  | ||||
|     // make contiguous, in-place | ||||
|     GGML_API struct ggml_tensor * ggml_cont_inplace( | ||||
|             struct ggml_context * ctx, | ||||
|             struct ggml_tensor  * a); | ||||
|  | ||||
|     // return view(a), b specifies the new shape | ||||
|     // TODO: when we start computing gradient, make a copy instead of view | ||||
|     GGML_API struct ggml_tensor * ggml_reshape( | ||||
| @@ -1268,6 +1292,16 @@ extern "C" { | ||||
|     typedef void (*ggml_custom2_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *); | ||||
|     typedef void (*ggml_custom3_op_f32_t)(struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *, const struct ggml_tensor *); | ||||
|  | ||||
|     GGML_API struct ggml_tensor * ggml_unary( | ||||
|             struct ggml_context * ctx, | ||||
|              struct ggml_tensor * a, | ||||
|              enum ggml_unary_op op); | ||||
|  | ||||
|     GGML_API struct ggml_tensor * ggml_unary_inplace( | ||||
|         struct ggml_context * ctx, | ||||
|         struct ggml_tensor  * a, | ||||
|         enum ggml_unary_op op); | ||||
|  | ||||
|     GGML_API struct ggml_tensor * ggml_map_unary_f32( | ||||
|             struct ggml_context        * ctx, | ||||
|             struct ggml_tensor         * a, | ||||
|   | ||||
| @@ -64,7 +64,7 @@ void get_random_dims(int64_t * dims, int ndims) { | ||||
|     } | ||||
| } | ||||
|  | ||||
| struct ggml_tensor * get_random_tensor( | ||||
| struct ggml_tensor * get_random_tensor_f32( | ||||
|         struct ggml_context * ctx0, | ||||
|         int ndims, | ||||
|         int64_t ne[], | ||||
| @@ -112,7 +112,55 @@ struct ggml_tensor * get_random_tensor( | ||||
|     return result; | ||||
| } | ||||
|  | ||||
| struct ggml_tensor * get_random_tensor_int( | ||||
| struct ggml_tensor * get_random_tensor_f16( | ||||
|         struct ggml_context * ctx0, | ||||
|         int ndims, | ||||
|         int64_t ne[], | ||||
|         float fmin, | ||||
|         float fmax) { | ||||
|     struct ggml_tensor * result = ggml_new_tensor(ctx0, GGML_TYPE_F16, ndims, ne); | ||||
|  | ||||
|     switch (ndims) { | ||||
|         case 1: | ||||
|             for (int i0 = 0; i0 < ne[0]; i0++) { | ||||
|                 ((ggml_fp16_t *)result->data)[i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin); | ||||
|             } | ||||
|             break; | ||||
|         case 2: | ||||
|             for (int i1 = 0; i1 < ne[1]; i1++) { | ||||
|                 for (int i0 = 0; i0 < ne[0]; i0++) { | ||||
|                     ((ggml_fp16_t *)result->data)[i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin); | ||||
|                 } | ||||
|             } | ||||
|             break; | ||||
|         case 3: | ||||
|             for (int i2 = 0; i2 < ne[2]; i2++) { | ||||
|                 for (int i1 = 0; i1 < ne[1]; i1++) { | ||||
|                     for (int i0 = 0; i0 < ne[0]; i0++) { | ||||
|                         ((ggml_fp16_t *)result->data)[i2*ne[1]*ne[0] + i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin); | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|             break; | ||||
|         case 4: | ||||
|             for (int i3 = 0; i3 < ne[3]; i3++) { | ||||
|                 for (int i2 = 0; i2 < ne[2]; i2++) { | ||||
|                     for (int i1 = 0; i1 < ne[1]; i1++) { | ||||
|                         for (int i0 = 0; i0 < ne[0]; i0++) { | ||||
|                             ((ggml_fp16_t *)result->data)[i3*ne[2]*ne[1]*ne[0] + i2*ne[1]*ne[0] + i1*ne[0] + i0] = ggml_fp32_to_fp16(frand()*(fmax - fmin) + fmin); | ||||
|                         } | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|             break; | ||||
|         default: | ||||
|             assert(false); | ||||
|     }; | ||||
|  | ||||
|     return result; | ||||
| } | ||||
|  | ||||
| struct ggml_tensor * get_random_tensor_i32( | ||||
|         struct ggml_context * ctx0, | ||||
|         int ndims, | ||||
|         int64_t ne[], | ||||
| @@ -160,23 +208,6 @@ struct ggml_tensor * get_random_tensor_int( | ||||
|     return result; | ||||
| } | ||||
|  | ||||
| float get_element(const struct ggml_tensor * t, int idx) { | ||||
|     if (t->type == GGML_TYPE_F32) { | ||||
|         return ((float *)t->data)[idx]; | ||||
|     } | ||||
|  | ||||
|     if (t->type == GGML_TYPE_I32) { | ||||
|         return ((int32_t *)t->data)[idx]; | ||||
|     } | ||||
|  | ||||
|     assert(false); | ||||
|     return INFINITY; | ||||
| } | ||||
|  | ||||
| void set_element(struct ggml_tensor * t, int idx, float value) { | ||||
|     ((float *)t->data)[idx] = value; | ||||
| } | ||||
|  | ||||
| void print_elements(const char* label, const struct ggml_tensor * t) { | ||||
|     if (!t) { | ||||
|         printf("%s: %s = null\n", __func__, label); | ||||
| @@ -186,7 +217,7 @@ void print_elements(const char* label, const struct ggml_tensor * t) { | ||||
|     printf("%s: %s = [", __func__, label); | ||||
|     for (int k = 0; k < nelements; ++k) { | ||||
|         if (k > 0) { printf(", "); } | ||||
|         printf("%.5f", get_element(t, k)); | ||||
|         printf("%.5f", ggml_get_f32_1d(t, k)); | ||||
|     } | ||||
|     printf("] shape: ["); | ||||
|     for (int k = 0; k < t->n_dims; ++k) { | ||||
| @@ -237,23 +268,23 @@ bool check_gradient( | ||||
|         const int nelements = ggml_nelements(x[i]); | ||||
|         for (int k = 0; k < nelements; ++k) { | ||||
|             // compute gradient using finite differences | ||||
|             const float x0 = get_element(x[i], k); | ||||
|             const float x0 = ggml_get_f32_1d(x[i], k); | ||||
|             const float xm = x0 - eps; | ||||
|             const float xp = x0 + eps; | ||||
|             set_element(x[i], k, xp); | ||||
|             ggml_set_f32_1d(x[i], k, xp); | ||||
|  | ||||
|             ggml_graph_compute_with_ctx(ctx0, &gf, n_threads); | ||||
|  | ||||
|             const float f0 = ggml_get_f32_1d(f, 0); | ||||
|  | ||||
|             set_element(x[i], k, xm); | ||||
|             ggml_set_f32_1d(x[i], k, xm); | ||||
|  | ||||
|             ggml_graph_compute_with_ctx(ctx0, &gf, n_threads); | ||||
|  | ||||
|             const float f1 = ggml_get_f32_1d(f, 0); | ||||
|             const float g0 = (f0 - f1)/(2.0f*eps); | ||||
|  | ||||
|             set_element(x[i], k, x0); | ||||
|             ggml_set_f32_1d(x[i], k, x0); | ||||
|  | ||||
|             // compute gradient using backward graph | ||||
|             ggml_graph_reset  (&gf); | ||||
| @@ -261,7 +292,7 @@ bool check_gradient( | ||||
|  | ||||
|             ggml_graph_compute_with_ctx(ctx0, &gb, n_threads); | ||||
|  | ||||
|             const float g1 = get_element(x[i]->grad, k); | ||||
|             const float g1 = ggml_get_f32_1d(x[i]->grad, k); | ||||
|  | ||||
|             const float error_abs = fabsf(g0 - g1); | ||||
|             const float error_rel = g0 != 0 ? fabsf(g0 - g1)/fabsf(g0) : 0; | ||||
| @@ -392,19 +423,35 @@ int main(int argc, const char ** argv) { | ||||
|  | ||||
|         struct ggml_tensor * x[MAX_NARGS]; | ||||
|  | ||||
|         // add | ||||
|         // add f32 | ||||
|         { | ||||
|             const int nargs = 2; | ||||
|  | ||||
|             for (int ndims = 1; ndims <= 4; ++ndims) { | ||||
|                 for (int i = 0; i < nargs; ++i) { | ||||
|                     x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                     ggml_set_param(ctx0, x[i]); | ||||
|                 } | ||||
|  | ||||
|                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_add(ctx0, x[0], x[1])); | ||||
|  | ||||
|                 check_gradient("add", ctx0, x, f, ndims, nargs, 1e-3f, 2e-3f, 2e-3f); | ||||
|                 check_gradient("add f32", ctx0, x, f, ndims, nargs, 1e-3f, 2e-3f, 2e-3f); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // add f16 | ||||
|         { | ||||
|             const int nargs = 2; | ||||
|  | ||||
|             for (int ndims = 1; ndims <= 4; ++ndims) { | ||||
|                 for (int i = 0; i < nargs; ++i) { | ||||
|                     x[i] = get_random_tensor_f16(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                     ggml_set_param(ctx0, x[i]); | ||||
|                 } | ||||
|  | ||||
|                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_add(ctx0, x[0], x[1])); | ||||
|  | ||||
|                 check_gradient("add f16", ctx0, x, f, ndims, nargs, 1e-1f, 2e-1f, 2e-1f); | ||||
|             } | ||||
|         } | ||||
|  | ||||
| @@ -414,7 +461,7 @@ int main(int argc, const char ** argv) { | ||||
|  | ||||
|             for (int ndims = 1; ndims <= 4; ++ndims) { | ||||
|                 for (int i = 0; i < nargs; ++i) { | ||||
|                     x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                     ggml_set_param(ctx0, x[i]); | ||||
|                 } | ||||
|  | ||||
| @@ -430,7 +477,7 @@ int main(int argc, const char ** argv) { | ||||
|  | ||||
|             for (int ndims = 1; ndims <= 4; ++ndims) { | ||||
|                 for (int i = 0; i < nargs; ++i) { | ||||
|                     x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                     ggml_set_param(ctx0, x[i]); | ||||
|                 } | ||||
|  | ||||
| @@ -446,7 +493,7 @@ int main(int argc, const char ** argv) { | ||||
|  | ||||
|             for (int ndims = 1; ndims <= 4; ++ndims) { | ||||
|                 for (int i = 0; i < nargs; ++i) { | ||||
|                     x[i] = get_random_tensor(ctx0, ndims, ne, 0.5f, 1.0f); | ||||
|                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, 0.5f, 1.0f); | ||||
|                     ggml_set_param(ctx0, x[i]); | ||||
|                 } | ||||
|  | ||||
| @@ -462,7 +509,7 @@ int main(int argc, const char ** argv) { | ||||
|  | ||||
|             for (int ndims = 1; ndims <= 2; ++ndims) { | ||||
|                 for (int i = 0; i < nargs; ++i) { | ||||
|                     x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                     ggml_set_param(ctx0, x[i]); | ||||
|                 } | ||||
|  | ||||
| @@ -478,7 +525,7 @@ int main(int argc, const char ** argv) { | ||||
|  | ||||
|             for (int ndims = 1; ndims <= 2; ++ndims) { | ||||
|                 for (int i = 0; i < nargs; ++i) { | ||||
|                     x[i] = get_random_tensor(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f); | ||||
|                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f); | ||||
|                     ggml_set_param(ctx0, x[i]); | ||||
|                 } | ||||
|  | ||||
| @@ -494,7 +541,7 @@ int main(int argc, const char ** argv) { | ||||
|  | ||||
|             for (int ndims = 1; ndims <= 2; ++ndims) { | ||||
|                 for (int i = 0; i < nargs; ++i) { | ||||
|                     x[i] = get_random_tensor(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f); | ||||
|                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, 2.0f*1e-3f, 1.0f); | ||||
|                     ggml_set_param(ctx0, x[i]); | ||||
|                 } | ||||
|  | ||||
| @@ -510,7 +557,7 @@ int main(int argc, const char ** argv) { | ||||
|  | ||||
|             for (int ndims = 1; ndims <= 2; ++ndims) { | ||||
|                 for (int i = 0; i < nargs; ++i) { | ||||
|                     x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                     ggml_set_param(ctx0, x[i]); | ||||
|                 } | ||||
|  | ||||
| @@ -527,7 +574,7 @@ int main(int argc, const char ** argv) { | ||||
|  | ||||
|             for (int ndims = 1; ndims <= 4; ++ndims) { | ||||
|                 for (int i = 0; i < nargs; ++i) { | ||||
|                     x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                     ggml_set_param(ctx0, x[i]); | ||||
|                 } | ||||
|  | ||||
| @@ -537,6 +584,40 @@ int main(int argc, const char ** argv) { | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // mean, not yet fully implemented | ||||
|         if(0) | ||||
|         { | ||||
|             const int nargs = 1; | ||||
|  | ||||
|             for (int ndims = 1; ndims <= 4; ++ndims) { | ||||
|                 for (int i = 0; i < nargs; ++i) { | ||||
|                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                     ggml_set_param(ctx0, x[i]); | ||||
|                 } | ||||
|  | ||||
|                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_mean(ctx0, x[0])); | ||||
|  | ||||
|                 check_gradient("mean", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // argmax | ||||
|         if (0) | ||||
|         { | ||||
|             const int nargs = 1; | ||||
|  | ||||
|             for (int ndims = 1; ndims <= 4; ++ndims) { | ||||
|                 for (int i = 0; i < nargs; ++i) { | ||||
|                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                     ggml_set_param(ctx0, x[i]); | ||||
|                 } | ||||
|  | ||||
|                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_argmax(ctx0, x[0])); | ||||
|  | ||||
|                 check_gradient("argmax", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // repeat | ||||
|         { | ||||
|             int64_t ne2[4]; | ||||
| @@ -549,15 +630,36 @@ int main(int argc, const char ** argv) { | ||||
|  | ||||
|             const int nargs = 1; | ||||
|             for (int ndims = 1; ndims <= 2; ++ndims) { | ||||
|                 x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                 x[1] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f); | ||||
|                 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                 x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); | ||||
|                 ggml_set_param(ctx0, x[0]); | ||||
|  | ||||
|                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sub(ctx0, x[1], ggml_repeat(ctx0, x[0], x[1])))); | ||||
|  | ||||
|                 check_gradient("repeat", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // repeat back | ||||
|         { | ||||
|             int64_t ne2[4]; | ||||
|             get_random_dims(ne2, 4); | ||||
|  | ||||
|             ne2[0] = ne[0] * ne2[0]; | ||||
|             ne2[1] = ne[1] * ne2[1]; | ||||
|             ne2[2] = 1; | ||||
|             ne2[3] = 1; | ||||
|  | ||||
|             const int nargs = 1; | ||||
|             for (int ndims = 1; ndims <= 2; ++ndims) { | ||||
|                 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                 x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); | ||||
|                 ggml_set_param(ctx0, x[0]); | ||||
|  | ||||
|                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_sqr(ctx0, ggml_sub(ctx0, x[0], ggml_repeat_back(ctx0, x[1], x[0])))); | ||||
|  | ||||
|                 check_gradient("repeat back", ctx0, x, f, ndims, nargs, 1e-3f, 1e-2f, INFINITY); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // abs (finite differences do not work) | ||||
| @@ -566,7 +668,7 @@ int main(int argc, const char ** argv) { | ||||
|  | ||||
|         //    for (int ndims = 1; ndims <= 2; ++ndims) { | ||||
|         //        for (int i = 0; i < nargs; ++i) { | ||||
|         //            x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|         //            x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|         //            ggml_set_param(ctx0, x[i]); | ||||
|         //        } | ||||
|  | ||||
| @@ -576,17 +678,82 @@ int main(int argc, const char ** argv) { | ||||
|         //    } | ||||
|         //} | ||||
|  | ||||
|         // sgn | ||||
|         { | ||||
|             const int nargs = 1; | ||||
|  | ||||
|             for (int ndims = 1; ndims <= 4; ++ndims) { | ||||
|                 for (int i = 0; i < nargs; ++i) { | ||||
|                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                     ggml_set_param(ctx0, x[i]); | ||||
|                 } | ||||
|  | ||||
|                 struct ggml_tensor* f = ggml_sum(ctx0, ggml_sgn(ctx0, x[0])); | ||||
|  | ||||
|                 check_gradient("sgn", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // neg | ||||
|         { | ||||
|             const int nargs = 1; | ||||
|  | ||||
|             for (int ndims = 1; ndims <= 4; ++ndims) { | ||||
|                 for (int i = 0; i < nargs; ++i) { | ||||
|                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                     ggml_set_param(ctx0, x[i]); | ||||
|                 } | ||||
|  | ||||
|                 struct ggml_tensor* f = ggml_sum(ctx0, ggml_neg(ctx0, x[0])); | ||||
|  | ||||
|                 check_gradient("neg", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // step | ||||
|         { | ||||
|             const int nargs = 1; | ||||
|  | ||||
|             for (int ndims = 1; ndims <= 4; ++ndims) { | ||||
|                 for (int i = 0; i < nargs; ++i) { | ||||
|                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                     ggml_set_param(ctx0, x[i]); | ||||
|                 } | ||||
|  | ||||
|                 struct ggml_tensor* f = ggml_sum(ctx0, ggml_step(ctx0, x[0])); | ||||
|  | ||||
|                 check_gradient("step", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // tanh, not yet fully implemented | ||||
|         if(0) | ||||
|         { | ||||
|             const int nargs = 1; | ||||
|  | ||||
|             for (int ndims = 1; ndims <= 4; ++ndims) { | ||||
|                 for (int i = 0; i < nargs; ++i) { | ||||
|                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                     ggml_set_param(ctx0, x[i]); | ||||
|                 } | ||||
|  | ||||
|                 struct ggml_tensor* f = ggml_sum(ctx0, ggml_tanh(ctx0, x[0])); | ||||
|  | ||||
|                 check_gradient("tanh", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // mul_mat | ||||
|         { | ||||
|             const int nargs = 2; | ||||
|  | ||||
|             for (int ndims = 2; ndims <= 2; ++ndims) { | ||||
|                 x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                 { | ||||
|                     int64_t ne2[4]; | ||||
|                     get_random_dims(ne2, 4); | ||||
|                     ne2[0] = ne[0]; | ||||
|                     x[1] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f); | ||||
|                     x[1] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); | ||||
|                 } | ||||
|  | ||||
|                 ggml_set_param(ctx0, x[0]); | ||||
| @@ -602,13 +769,63 @@ int main(int argc, const char ** argv) { | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // elu, not yet fully implemented | ||||
|         if(0) | ||||
|         { | ||||
|             const int nargs = 1; | ||||
|  | ||||
|             for (int ndims = 1; ndims <= 4; ++ndims) { | ||||
|                 for (int i = 0; i < nargs; ++i) { | ||||
|                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                     ggml_set_param(ctx0, x[i]); | ||||
|                 } | ||||
|  | ||||
|                 struct ggml_tensor* f = ggml_sum(ctx0, ggml_elu(ctx0, x[0])); | ||||
|  | ||||
|                 check_gradient("elu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // relu | ||||
|         { | ||||
|             const int nargs = 1; | ||||
|  | ||||
|             for (int ndims = 1; ndims <= 4; ++ndims) { | ||||
|                 for (int i = 0; i < nargs; ++i) { | ||||
|                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                     ggml_set_param(ctx0, x[i]); | ||||
|                 } | ||||
|  | ||||
|                 struct ggml_tensor* f = ggml_sum(ctx0, ggml_relu(ctx0, x[0])); | ||||
|  | ||||
|                 check_gradient("relu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // gelu, not yet fully implemented | ||||
|         if(0) | ||||
|         { | ||||
|             const int nargs = 1; | ||||
|  | ||||
|             for (int ndims = 1; ndims <= 4; ++ndims) { | ||||
|                 for (int i = 0; i < nargs; ++i) { | ||||
|                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                     ggml_set_param(ctx0, x[i]); | ||||
|                 } | ||||
|  | ||||
|                 struct ggml_tensor* f = ggml_sum(ctx0, ggml_gelu(ctx0, x[0])); | ||||
|  | ||||
|                 check_gradient("gelu", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, 1e-3f); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // silu | ||||
|         { | ||||
|             const int nargs = 1; | ||||
|  | ||||
|             for (int ndims = 1; ndims <= 2; ++ndims) { | ||||
|                 for (int i = 0; i < nargs; ++i) { | ||||
|                     x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                     ggml_set_param(ctx0, x[i]); | ||||
|                 } | ||||
|  | ||||
| @@ -629,7 +846,7 @@ int main(int argc, const char ** argv) { | ||||
|  | ||||
|             for (int ndims = 1; ndims <= 2; ++ndims) { | ||||
|                 for (int i = 0; i < nargs; ++i) { | ||||
|                     x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                     ggml_set_param(ctx0, x[i]); | ||||
|                 } | ||||
|  | ||||
| @@ -647,8 +864,8 @@ int main(int argc, const char ** argv) { | ||||
|             ne2[0] = 1; | ||||
|  | ||||
|             for (int ndims = 1; ndims <= 2; ++ndims) { | ||||
|                 x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f); | ||||
|                 x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                 x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); | ||||
|                 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|  | ||||
|                 ggml_set_param(ctx0, x[0]); | ||||
|                 ggml_set_param(ctx0, x[1]); | ||||
| @@ -659,20 +876,37 @@ int main(int argc, const char ** argv) { | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // cpy | ||||
|         // cpy f32 | ||||
|         { | ||||
|             const int nargs = 2; | ||||
|  | ||||
|             for (int ndims = 1; ndims <= 2; ++ndims) { | ||||
|                 for (int i = 0; i < nargs; ++i) { | ||||
|                     x[i] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                     x[i] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                     ggml_set_param(ctx0, x[i]); | ||||
|                 } | ||||
|                 // x[1] is overwritten by x[0], so the gradients don't propagate to x[1] | ||||
|  | ||||
|                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_cpy(ctx0, x[0], x[1])); | ||||
|  | ||||
|                 check_gradient("cpy", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); | ||||
|                 check_gradient("cpy f32", ctx0, x, f, ndims, nargs, 1e-3f, 1e-3f, INFINITY); | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // cpy f16 | ||||
|         { | ||||
|             const int nargs = 2; | ||||
|  | ||||
|             for (int ndims = 1; ndims <= 2; ++ndims) { | ||||
|                 for (int i = 0; i < nargs; ++i) { | ||||
|                     x[i] = get_random_tensor_f16(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                     ggml_set_param(ctx0, x[i]); | ||||
|                 } | ||||
|                 // x[1] is overwritten by x[0], so the gradients don't propagate to x[1] | ||||
|  | ||||
|                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_cpy(ctx0, x[0], x[1])); | ||||
|  | ||||
|                 check_gradient("cpy f16", ctx0, x, f, ndims, nargs, 1e-1f, 1e-1f, INFINITY); | ||||
|             } | ||||
|         } | ||||
|  | ||||
| @@ -689,8 +923,8 @@ int main(int argc, const char ** argv) { | ||||
|                 for (int i = 0; i < ndims; ++i) { | ||||
|                     ne2[0] *= ne[i]; | ||||
|                 } | ||||
|                 x[0] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f); | ||||
|                 x[1] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                 x[0] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); | ||||
|                 x[1] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                 ggml_set_param(ctx0, x[0]); | ||||
|  | ||||
|  | ||||
| @@ -712,8 +946,8 @@ int main(int argc, const char ** argv) { | ||||
|                 for (int i = 0; i < ndims; ++i) { | ||||
|                     ne2[0] *= ne[i]; | ||||
|                 } | ||||
|                 x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                 x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f); | ||||
|                 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                 x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); | ||||
|                 ggml_set_param(ctx0, x[0]); | ||||
|  | ||||
|  | ||||
| @@ -729,7 +963,7 @@ int main(int argc, const char ** argv) { | ||||
|             const int nargs = 2; | ||||
|             for (int ndims = 1; ndims <= 4; ++ndims) { | ||||
|  | ||||
|                 x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                 ggml_set_param(ctx0, x[0]); | ||||
|  | ||||
|                 get_random_dims(ne2, 1); | ||||
| @@ -737,7 +971,7 @@ int main(int argc, const char ** argv) { | ||||
|                     get_random_dims(ne2, 1); | ||||
|                 } | ||||
|  | ||||
|                 x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f); | ||||
|                 x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); | ||||
|                 ggml_set_param(ctx0, x[1]); | ||||
|  | ||||
|                 const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1])); | ||||
| @@ -758,7 +992,7 @@ int main(int argc, const char ** argv) { | ||||
|             const int nargs = 2; | ||||
|             for (int ndims = 2; ndims <= 4; ++ndims) { | ||||
|  | ||||
|                 x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                 ggml_set_param(ctx0, x[0]); | ||||
|  | ||||
|                 get_random_dims(ne2, 2); | ||||
| @@ -766,7 +1000,7 @@ int main(int argc, const char ** argv) { | ||||
|                     get_random_dims(ne2, 2); | ||||
|                 } | ||||
|  | ||||
|                 x[1] = get_random_tensor(ctx0, 2, ne2, -1.0f, 1.0f); | ||||
|                 x[1] = get_random_tensor_f32(ctx0, 2, ne2, -1.0f, 1.0f); | ||||
|                 ggml_set_param(ctx0, x[1]); | ||||
|  | ||||
|                 max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); | ||||
| @@ -790,7 +1024,7 @@ int main(int argc, const char ** argv) { | ||||
|             const int nargs = 2; | ||||
|             for (int ndims = 3; ndims <= 4; ++ndims) { | ||||
|  | ||||
|                 x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                 ggml_set_param(ctx0, x[0]); | ||||
|  | ||||
|                 get_random_dims(ne2, 3); | ||||
| @@ -798,7 +1032,7 @@ int main(int argc, const char ** argv) { | ||||
|                     get_random_dims(ne2, 3); | ||||
|                 } | ||||
|  | ||||
|                 x[1] = get_random_tensor(ctx0, 3, ne2, -1.0f, 1.0f); | ||||
|                 x[1] = get_random_tensor_f32(ctx0, 3, ne2, -1.0f, 1.0f); | ||||
|                 ggml_set_param(ctx0, x[1]); | ||||
|  | ||||
|                 max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); | ||||
| @@ -824,7 +1058,7 @@ int main(int argc, const char ** argv) { | ||||
|             const int nargs = 2; | ||||
|             for (int ndims = 4; ndims <= 4; ++ndims) { | ||||
|  | ||||
|                 x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                 ggml_set_param(ctx0, x[0]); | ||||
|  | ||||
|                 get_random_dims(ne2, 4); | ||||
| @@ -832,7 +1066,7 @@ int main(int argc, const char ** argv) { | ||||
|                     get_random_dims(ne2, 4); | ||||
|                 } | ||||
|  | ||||
|                 x[1] = get_random_tensor(ctx0, 4, ne2, -1.0f, 1.0f); | ||||
|                 x[1] = get_random_tensor_f32(ctx0, 4, ne2, -1.0f, 1.0f); | ||||
|                 ggml_set_param(ctx0, x[1]); | ||||
|  | ||||
|                 max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); | ||||
| @@ -858,7 +1092,7 @@ int main(int argc, const char ** argv) { | ||||
|             const int nargs = 2; | ||||
|             for (int ndims = 1; ndims <= 4; ++ndims) { | ||||
|  | ||||
|                 x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                 ggml_set_param(ctx0, x[0]); | ||||
|  | ||||
|                 get_random_dims(ne2, 1); | ||||
| @@ -866,7 +1100,7 @@ int main(int argc, const char ** argv) { | ||||
|                     get_random_dims(ne2, 1); | ||||
|                 } | ||||
|  | ||||
|                 x[1] = get_random_tensor(ctx0, 1, ne2, -1.0f, 1.0f); | ||||
|                 x[1] = get_random_tensor_f32(ctx0, 1, ne2, -1.0f, 1.0f); | ||||
|                 ggml_set_param(ctx0, x[1]); | ||||
|  | ||||
|                 const int max_offset = MAX(0, ggml_nelements(x[0]) - ggml_nelements(x[1])); | ||||
| @@ -887,7 +1121,7 @@ int main(int argc, const char ** argv) { | ||||
|             const int nargs = 1; | ||||
|             for (int ndims = 2; ndims <= 4; ++ndims) { | ||||
|  | ||||
|                 x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                 ggml_set_param(ctx0, x[0]); | ||||
|  | ||||
|                 get_random_dims(ne2, 2); | ||||
| @@ -895,7 +1129,7 @@ int main(int argc, const char ** argv) { | ||||
|                     get_random_dims(ne2, 2); | ||||
|                 } | ||||
|  | ||||
|                 x[1] = get_random_tensor(ctx0, 2, ne2, -1.0f, 1.0f); | ||||
|                 x[1] = get_random_tensor_f32(ctx0, 2, ne2, -1.0f, 1.0f); | ||||
|                 ggml_set_param(ctx0, x[1]); | ||||
|  | ||||
|                 max_offsets[0] = MAX(0, x[0]->ne[0] - x[1]->ne[0]); | ||||
| @@ -915,7 +1149,7 @@ int main(int argc, const char ** argv) { | ||||
|             const int nargs = 1; | ||||
|             for (int ndims = 1; ndims <= 4; ++ndims) { | ||||
|  | ||||
|                 x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|  | ||||
|                 ggml_set_param(ctx0, x[0]); | ||||
|  | ||||
| @@ -941,7 +1175,7 @@ int main(int argc, const char ** argv) { | ||||
|             const int nargs = 1; | ||||
|             for (int ndims = 1; ndims <= 4; ++ndims) { | ||||
|  | ||||
|                 x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|  | ||||
|                 get_random_dims(ne2, 2); | ||||
|                 while (ne2[0]*ne2[1] > ggml_nelements(x[0])) { | ||||
| @@ -971,7 +1205,7 @@ int main(int argc, const char ** argv) { | ||||
|             const int nargs = 1; | ||||
|             for (int ndims = 1; ndims <= 4; ++ndims) { | ||||
|  | ||||
|                 x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|                 x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|  | ||||
|                 get_random_dims(ne2, 3); | ||||
|                 while (ne2[0]*ne2[1]*ne2[2] > ggml_nelements(x[0])) { | ||||
| @@ -1010,7 +1244,7 @@ int main(int argc, const char ** argv) { | ||||
|                 for (int i=ndims; i<4; ++i) { | ||||
|                     ne2[i] = 1; | ||||
|                 } | ||||
|                 x[0] = get_random_tensor(ctx0, 4, ne2, -1.0f, 1.0f); | ||||
|                 x[0] = get_random_tensor_f32(ctx0, 4, ne2, -1.0f, 1.0f); | ||||
|  | ||||
|                 ggml_set_param(ctx0, x[0]); | ||||
|  | ||||
| @@ -1043,7 +1277,7 @@ int main(int argc, const char ** argv) { | ||||
|                 for (int i=ndims; i<4; ++i) { | ||||
|                     ne2[i] = 1; | ||||
|                 } | ||||
|                 x[0] = get_random_tensor(ctx0, 4, ne2, -1.0f, 1.0f); | ||||
|                 x[0] = get_random_tensor_f32(ctx0, 4, ne2, -1.0f, 1.0f); | ||||
|  | ||||
|                 ggml_set_param(ctx0, x[0]); | ||||
|  | ||||
| @@ -1060,8 +1294,8 @@ int main(int argc, const char ** argv) { | ||||
|             int64_t ne3[4] = {1+irand(ne[1]), 1, 1, 1}; | ||||
|             const int nargs = 1; | ||||
|             const int ndims = 2; | ||||
|             x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f); | ||||
|             x[1] = get_random_tensor_int(ctx0, 1, ne3, 0, ne2[1]); | ||||
|             x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); | ||||
|             x[1] = get_random_tensor_i32(ctx0, 1, ne3, 0, ne2[1]); | ||||
|  | ||||
|             ggml_set_param(ctx0, x[0]); | ||||
|  | ||||
| @@ -1075,7 +1309,7 @@ int main(int argc, const char ** argv) { | ||||
|             const int nargs = 1; | ||||
|             const int ndims = 2; | ||||
|  | ||||
|             x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|             x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|             ggml_set_param(ctx0, x[0]); | ||||
|  | ||||
|             int n_past = irand(ne[0]); | ||||
| @@ -1090,7 +1324,7 @@ int main(int argc, const char ** argv) { | ||||
|             const int nargs = 1; | ||||
|             const int ndims = 2; | ||||
|  | ||||
|             x[0] = get_random_tensor(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|             x[0] = get_random_tensor_f32(ctx0, ndims, ne, -1.0f, 1.0f); | ||||
|             ggml_set_param(ctx0, x[0]); | ||||
|  | ||||
|             int n_past = irand(ne[0]); | ||||
| @@ -1108,7 +1342,7 @@ int main(int argc, const char ** argv) { | ||||
|             get_random_dims(ne2, 4); | ||||
|  | ||||
|             for (int ndims = 1; ndims <= 3; ++ndims) { | ||||
|                 x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f); | ||||
|                 x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); | ||||
|                 ggml_set_param(ctx0, x[0]); | ||||
|  | ||||
|                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_soft_max(ctx0, x[0])); | ||||
| @@ -1125,8 +1359,8 @@ int main(int argc, const char ** argv) { | ||||
|             get_random_dims(ne2, 4); | ||||
|  | ||||
|             for (int ndims = 1; ndims <= 3; ++ndims) { | ||||
|                 x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f); | ||||
|                 x[1] = get_random_tensor(ctx0, ndims, ne2, 0.0f, 1.0f); | ||||
|                 x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); | ||||
|                 x[1] = get_random_tensor_f32(ctx0, ndims, ne2, 0.0f, 1.0f); | ||||
|                 ggml_set_param(ctx0, x[0]); | ||||
|  | ||||
|                 struct ggml_tensor * f = ggml_sum(ctx0, ggml_cross_entropy_loss(ctx0, x[0], x[1])); | ||||
| @@ -1136,7 +1370,7 @@ int main(int argc, const char ** argv) { | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // rope | ||||
|         // rope f32 | ||||
|         { | ||||
|             const int nargs = 1; | ||||
|  | ||||
| @@ -1148,7 +1382,7 @@ int main(int argc, const char ** argv) { | ||||
|             for (int ndims = 3; ndims <= 4; ++ndims) { | ||||
|                 for (int mode = 0; mode < 4; ++mode) { | ||||
|                     for (int n_past = 1; n_past < ne2[2]; ++n_past) { | ||||
|                         x[0] = get_random_tensor(ctx0, ndims, ne2, -1.0f, 1.0f); | ||||
|                         x[0] = get_random_tensor_f32(ctx0, ndims, ne2, -1.0f, 1.0f); | ||||
|  | ||||
|                         ggml_set_param(ctx0, x[0]); | ||||
|  | ||||
| @@ -1163,14 +1397,48 @@ int main(int argc, const char ** argv) { | ||||
|  | ||||
|                         struct ggml_tensor * f = ggml_sum(ctx0, ggml_rope(ctx0, x[0], n_past, n_rot, mode, 0)); | ||||
|  | ||||
|                         GGML_PRINT_DEBUG("rope: n_past: %d n_rot: %d mode: %d\n", n_past, n_rot, mode); | ||||
|                         check_gradient("rope", ctx0, x, f, ndims, nargs, 1e-2f, 1e-3f, INFINITY); | ||||
|                         GGML_PRINT_DEBUG("rope f32: n_past: %d n_rot: %d mode: %d\n", n_past, n_rot, mode); | ||||
|                         check_gradient("rope f32", ctx0, x, f, ndims, nargs, 1e-2f, 1e-3f, INFINITY); | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // flash_attn | ||||
|         // rope f16 | ||||
|         { | ||||
|             const int nargs = 1; | ||||
|  | ||||
|             int64_t ne2[4]; | ||||
|             get_random_dims(ne2, 4); | ||||
|             ne2[0] += ne2[0] % 2; | ||||
|             int n_rot = ne2[0]; | ||||
|  | ||||
|             for (int ndims = 3; ndims <= 4; ++ndims) { | ||||
|                 for (int mode = 0; mode < 4; ++mode) { | ||||
|                     for (int n_past = 1; n_past < ne2[2]; ++n_past) { | ||||
|                         x[0] = get_random_tensor_f16(ctx0, ndims, ne2, -1.0f, 1.0f); | ||||
|  | ||||
|                         ggml_set_param(ctx0, x[0]); | ||||
|  | ||||
|                         const bool skip_past = (mode & 1); | ||||
|                         if (skip_past) { | ||||
|                             // we have no past, so this would have to work on uninitialized memory. | ||||
|                             // we only test the gradients here; | ||||
|                             // skip_past should have no influence on gradient computation. | ||||
|                             // so when other modes work, we assume that this does as well. | ||||
|                             continue; | ||||
|                         } | ||||
|  | ||||
|                         struct ggml_tensor * f = ggml_sum(ctx0, ggml_rope(ctx0, x[0], n_past, n_rot, mode, 0)); | ||||
|  | ||||
|                         GGML_PRINT_DEBUG("rope f16: n_past: %d n_rot: %d mode: %d\n", n_past, n_rot, mode); | ||||
|                         check_gradient("rope f16", ctx0, x, f, ndims, nargs, 1e-1f, 1e-1f, INFINITY); | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // flash_attn f32 | ||||
|         { | ||||
|             const int nargs = 3; | ||||
|  | ||||
| @@ -1196,16 +1464,57 @@ int main(int argc, const char ** argv) { | ||||
|                         nek[3] = 1; | ||||
|                         nev[3] = 1; | ||||
|                     } | ||||
|                     x[0] = get_random_tensor(ctx0, ndims, neq, -0.1250f, 0.1250f); | ||||
|                     x[1] = get_random_tensor(ctx0, ndims, nek, -0.1250f, 0.1250f); | ||||
|                     x[2] = get_random_tensor(ctx0, ndims, nev, -0.1250f, 0.1250f); | ||||
|                     x[0] = get_random_tensor_f32(ctx0, ndims, neq, -0.1250f, 0.1250f); | ||||
|                     x[1] = get_random_tensor_f32(ctx0, ndims, nek, -0.1250f, 0.1250f); | ||||
|                     x[2] = get_random_tensor_f32(ctx0, ndims, nev, -0.1250f, 0.1250f); | ||||
|                     ggml_set_param(ctx0, x[0]); | ||||
|                     ggml_set_param(ctx0, x[1]); | ||||
|                     ggml_set_param(ctx0, x[2]); | ||||
|  | ||||
|                     struct ggml_tensor * f = ggml_sum(ctx0, ggml_flash_attn(ctx0, x[0], x[1], x[2], (masked == 0))); | ||||
|  | ||||
|                     check_gradient("flash_attn", ctx0, x, f, ndims, nargs, 1.5e-4f, INFINITY, 3.5f); | ||||
|                     check_gradient("flash_attn f32", ctx0, x, f, ndims, nargs, 1.5e-4f, INFINITY, 3.5f); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // flash_attn f16, not yet fully implemented | ||||
|         if(0) | ||||
|         { | ||||
|             const int nargs = 3; | ||||
|  | ||||
|             int64_t ne2[4]; | ||||
|  | ||||
|             get_random_dims(ne2, 4); | ||||
|             int64_t D = ne2[0]; | ||||
|             int64_t N = ne2[1]; | ||||
|             int64_t M = ne2[2] + N; | ||||
|             int64_t B = ne2[3]; | ||||
|  | ||||
|             for (int masked = 0; masked <= 1; ++masked) { | ||||
|                 for (int ndims = 2; ndims <= 4; ++ndims) { | ||||
|                     int64_t neq[4] = { D, N, B, ne[3] }; | ||||
|                     int64_t nek[4] = { D, M, B, ne[3] }; | ||||
|                     int64_t nev[4] = { M, D, B, ne[3] }; | ||||
|                     if (ndims == 2) { | ||||
|                         neq[2] = 1; neq[3] = 1; | ||||
|                         nek[2] = 1; nek[3] = 1; | ||||
|                         nev[2] = 1; nev[3] = 1; | ||||
|                     } else if (ndims == 3) { | ||||
|                         neq[3] = 1; | ||||
|                         nek[3] = 1; | ||||
|                         nev[3] = 1; | ||||
|                     } | ||||
|                     x[0] = get_random_tensor_f16(ctx0, ndims, neq, -0.1250f, 0.1250f); | ||||
|                     x[1] = get_random_tensor_f16(ctx0, ndims, nek, -0.1250f, 0.1250f); | ||||
|                     x[2] = get_random_tensor_f16(ctx0, ndims, nev, -0.1250f, 0.1250f); | ||||
|                     ggml_set_param(ctx0, x[0]); | ||||
|                     ggml_set_param(ctx0, x[1]); | ||||
|                     ggml_set_param(ctx0, x[2]); | ||||
|  | ||||
|                     struct ggml_tensor * f = ggml_sum(ctx0, ggml_flash_attn(ctx0, x[0], x[1], x[2], (masked == 0))); | ||||
|  | ||||
|                     check_gradient("flash_attn f16", ctx0, x, f, ndims, nargs, 1.5e-4f, INFINITY, 3.5f); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|   | ||||
| @@ -125,9 +125,9 @@ int main(void) { | ||||
|     }; | ||||
|     struct ggml_context * ctx = ggml_init(params); | ||||
|  | ||||
|     int64_t ne1[4] = {4, 1024, 1, 1}; | ||||
|     int64_t ne2[4] = {4, 2048, 1, 1};; | ||||
|     int64_t ne3[4] = {1024, 2048, 1, 1}; | ||||
|     int64_t ne1[4] = {4, 128, 1, 1}; | ||||
|     int64_t ne2[4] = {4, 256, 1, 1};; | ||||
|     int64_t ne3[4] = {128, 256, 1, 1}; | ||||
|  | ||||
|     struct ggml_tensor * a = get_random_tensor(ctx, 2, ne1, -1, +1); | ||||
|     struct ggml_tensor * b = get_random_tensor(ctx, 2, ne2, -1, +1); | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov