mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	ggml : multi-thread ggml_rope() (~3-4 times faster on M1) (#781)
This commit is contained in:
		
							
								
								
									
										41
									
								
								ggml.c
									
									
									
									
									
								
							
							
						
						
									
										41
									
								
								ggml.c
									
									
									
									
									
								
							| @@ -7238,7 +7238,6 @@ static void ggml_compute_forward_rope_f32( | ||||
|         const struct ggml_tensor * src0, | ||||
|         const struct ggml_tensor * src1, | ||||
|         struct ggml_tensor * dst) { | ||||
|     assert(params->ith == 0); | ||||
|     assert(src1->type == GGML_TYPE_I32); | ||||
|     assert(ggml_nelements(src1) == 3); | ||||
|  | ||||
| @@ -7265,11 +7264,28 @@ static void ggml_compute_forward_rope_f32( | ||||
|  | ||||
|     assert(nb0 == sizeof(float)); | ||||
|  | ||||
|     // TODO: optimize | ||||
|     const int ith = params->ith; | ||||
|     const int nth = params->nth; | ||||
|  | ||||
|     const int nr = ggml_nrows(src0); | ||||
|  | ||||
|     // rows per thread | ||||
|     const int dr = (nr + nth - 1)/nth; | ||||
|  | ||||
|     // row range for this thread | ||||
|     const int ir0 = dr*ith; | ||||
|     const int ir1 = MIN(ir0 + dr, nr); | ||||
|  | ||||
|     // row index used to determine which thread to use | ||||
|     int ir = 0; | ||||
|  | ||||
|     for (int64_t i3 = 0; i3 < ne3; i3++) { | ||||
|         for (int64_t i2 = (mode == 0 ? 0 : n_past); i2 < ne2; i2++) { | ||||
|             const int p = (mode == 0 ? n_past + i2 : i2); | ||||
|             for (int64_t i1 = 0; i1 < ne1; i1++) { | ||||
|                 if (ir++ < ir0) continue; | ||||
|                 if (ir   > ir1) break; | ||||
|  | ||||
|                 for (int i0 = 0; i0 < n_dims; i0 += 2) { | ||||
|                     const float theta = powf(10000.0, ((float)-i0)/n_dims); | ||||
|  | ||||
| @@ -7295,7 +7311,6 @@ static void ggml_compute_forward_rope_f16( | ||||
|         const struct ggml_tensor * src0, | ||||
|         const struct ggml_tensor * src1, | ||||
|         struct ggml_tensor * dst) { | ||||
|     assert(params->ith == 0); | ||||
|     assert(src1->type == GGML_TYPE_I32); | ||||
|     assert(ggml_nelements(src1) == 3); | ||||
|  | ||||
| @@ -7322,10 +7337,28 @@ static void ggml_compute_forward_rope_f16( | ||||
|  | ||||
|     assert(nb0 == sizeof(ggml_fp16_t)); | ||||
|  | ||||
|     const int ith = params->ith; | ||||
|     const int nth = params->nth; | ||||
|  | ||||
|     const int nr = ggml_nrows(src0); | ||||
|  | ||||
|     // rows per thread | ||||
|     const int dr = (nr + nth - 1)/nth; | ||||
|  | ||||
|     // row range for this thread | ||||
|     const int ir0 = dr*ith; | ||||
|     const int ir1 = MIN(ir0 + dr, nr); | ||||
|  | ||||
|     // row index used to determine which thread to use | ||||
|     int ir = 0; | ||||
|  | ||||
|     for (int64_t i3 = 0; i3 < ne3; i3++) { | ||||
|         for (int64_t i2 = (mode == 0 ? 0 : n_past); i2 < ne2; i2++) { | ||||
|             const int p = (mode == 0 ? n_past + i2 : i2); | ||||
|             for (int64_t i1 = 0; i1 < ne1; i1++) { | ||||
|                 if (ir++ < ir0) continue; | ||||
|                 if (ir   > ir1) break; | ||||
|  | ||||
|                 for (int i0 = 0; i0 < n_dims; i0 += 2) { | ||||
|                     const float theta = powf(10000.0, ((float)-i0)/n_dims); | ||||
|  | ||||
| @@ -9424,7 +9457,7 @@ void ggml_graph_compute(struct ggml_context * ctx, struct ggml_cgraph * cgraph) | ||||
|                     } break; | ||||
|                 case GGML_OP_ROPE: | ||||
|                     { | ||||
|                         node->n_tasks = 1; | ||||
|                         node->n_tasks = n_threads; | ||||
|                     } break; | ||||
|                 case GGML_OP_CONV_1D_1S: | ||||
|                 case GGML_OP_CONV_1D_2S: | ||||
|   | ||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov