mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	ggml : optimize rope function to avoid call powf in the tight loop (#807)
This commit is contained in:
		
							
								
								
									
										22
									
								
								ggml.c
									
									
									
									
									
								
							
							
						
						
									
										22
									
								
								ggml.c
									
									
									
									
									
								
							| @@ -7507,19 +7507,20 @@ static void ggml_compute_forward_rope_f32( | |||||||
|     // row index used to determine which thread to use |     // row index used to determine which thread to use | ||||||
|     int ir = 0; |     int ir = 0; | ||||||
|  |  | ||||||
|  |     const float theta_scale = powf(10000.0, ((float)-2)/n_dims); | ||||||
|  |  | ||||||
|     for (int64_t i3 = 0; i3 < ne3; i3++) { |     for (int64_t i3 = 0; i3 < ne3; i3++) { | ||||||
|         for (int64_t i2 = (mode == 0 ? 0 : n_past); i2 < ne2; i2++) { |         for (int64_t i2 = (mode == 0 ? 0 : n_past); i2 < ne2; i2++) { | ||||||
|             const int p = (mode == 0 ? n_past + i2 : i2); |             const int p = (mode == 0 ? n_past + i2 : i2); | ||||||
|             for (int64_t i1 = 0; i1 < ne1; i1++) { |             for (int64_t i1 = 0; i1 < ne1; i1++) { | ||||||
|                 if (ir++ < ir0) continue; |                 if (ir++ < ir0) continue; | ||||||
|                 if (ir   > ir1) break; |                 if (ir   > ir1) break; | ||||||
|  |                 float theta = (float)p; | ||||||
|                 for (int i0 = 0; i0 < n_dims; i0 += 2) { |                 for (int i0 = 0; i0 < n_dims; i0 += 2) { | ||||||
|                     const float theta = powf(10000.0, ((float)-i0)/n_dims); |                     const float cos_theta = cosf(theta); | ||||||
|  |                     const float sin_theta = sinf(theta); | ||||||
|                     const float cos_theta = cosf(p*theta); |  | ||||||
|                     const float sin_theta = sinf(p*theta); |  | ||||||
|  |  | ||||||
|  |                     theta *= theta_scale; | ||||||
|                     const float * const src = (float *)((char *) src0->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); |                     const float * const src = (float *)((char *) src0->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); | ||||||
|                           float * dst_data  = (float *)((char *)  dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); |                           float * dst_data  = (float *)((char *)  dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); | ||||||
|  |  | ||||||
| @@ -7580,19 +7581,20 @@ static void ggml_compute_forward_rope_f16( | |||||||
|     // row index used to determine which thread to use |     // row index used to determine which thread to use | ||||||
|     int ir = 0; |     int ir = 0; | ||||||
|  |  | ||||||
|  |     const float theta_scale = powf(10000.0, ((float)-2)/n_dims); | ||||||
|  |  | ||||||
|     for (int64_t i3 = 0; i3 < ne3; i3++) { |     for (int64_t i3 = 0; i3 < ne3; i3++) { | ||||||
|         for (int64_t i2 = (mode == 0 ? 0 : n_past); i2 < ne2; i2++) { |         for (int64_t i2 = (mode == 0 ? 0 : n_past); i2 < ne2; i2++) { | ||||||
|             const int p = (mode == 0 ? n_past + i2 : i2); |             const int p = (mode == 0 ? n_past + i2 : i2); | ||||||
|             for (int64_t i1 = 0; i1 < ne1; i1++) { |             for (int64_t i1 = 0; i1 < ne1; i1++) { | ||||||
|                 if (ir++ < ir0) continue; |                 if (ir++ < ir0) continue; | ||||||
|                 if (ir   > ir1) break; |                 if (ir   > ir1) break; | ||||||
|  |                 float theta = (float)p; | ||||||
|                 for (int i0 = 0; i0 < n_dims; i0 += 2) { |                 for (int i0 = 0; i0 < n_dims; i0 += 2) { | ||||||
|                     const float theta = powf(10000.0, ((float)-i0)/n_dims); |                     const float cos_theta = cosf(theta); | ||||||
|  |                     const float sin_theta = sinf(theta); | ||||||
|                     const float cos_theta = cosf(p*theta); |  | ||||||
|                     const float sin_theta = sinf(p*theta); |  | ||||||
|  |  | ||||||
|  |                     theta *= theta_scale; | ||||||
|                     const ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); |                     const ggml_fp16_t * const src = (ggml_fp16_t *)((char *) src0->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); | ||||||
|                           ggml_fp16_t * dst_data  = (ggml_fp16_t *)((char *)  dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); |                           ggml_fp16_t * dst_data  = (ggml_fp16_t *)((char *)  dst->data + i3*nb3 + i2*nb2 + i1*nb1 + i0*nb0); | ||||||
|  |  | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 Howard Su
					Howard Su