mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-29 08:41:22 +00:00 
			
		
		
		
	ggml : use __builtin_amdgcn_sudot4 in __dp4a for gfx11 (#4787)
This commit is contained in:
		 Konstantin Zhuravlyov
					Konstantin Zhuravlyov
				
			
				
					committed by
					
						 GitHub
						GitHub
					
				
			
			
				
	
			
			
			 GitHub
						GitHub
					
				
			
						parent
						
							67984921a7
						
					
				
				
					commit
					63ee677efd
				
			| @@ -183,7 +183,7 @@ static __device__ __forceinline__ int __vsubss4(const int a, const int b) { | ||||
| static __device__ __forceinline__ int __dp4a(const int a, const int b, int c) { | ||||
| #if defined(__gfx906__) || defined(__gfx908__) || defined(__gfx90a__) || defined(__gfx1030__) | ||||
|     c = __builtin_amdgcn_sdot4(a, b, c, false); | ||||
| #elif defined(__gfx1100__) | ||||
| #elif defined(RDNA3) | ||||
|     c = __builtin_amdgcn_sudot4( true, a, true, b, c, false); | ||||
| #elif defined(__gfx1010__) || defined(__gfx900__) | ||||
|     int tmp1; | ||||
|   | ||||
		Reference in New Issue
	
	Block a user