mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-04 09:32:00 +00:00 
			
		
		
		
	CANN: Support Ascend310P to accelerate F32 and F16 Model (#10216)
* CANN Support Ascend310P to accelerate F32 and F16 Model * Add compile option soc type macro ASCEND_310P to ggml-cann lib * Remove unused code * Remove the ascend soc_type hard code compile option in CMakelist.txt
This commit is contained in:
		@@ -110,9 +110,12 @@ class GET_ROW_Q4_0 {
 | 
			
		||||
        LocalTensor<float> output_local = output_queue.AllocTensor<float>();
 | 
			
		||||
 | 
			
		||||
        // TODO: cast more data to speed up.
 | 
			
		||||
#ifdef ASCEND_310P
 | 
			
		||||
        // TODO: 310P support quantification
 | 
			
		||||
#else
 | 
			
		||||
        Cast(cast_local, input_local, RoundMode::CAST_NONE, QK4_0);
 | 
			
		||||
        Cast(output_local, cast_local, RoundMode::CAST_NONE, QK4_0);
 | 
			
		||||
 | 
			
		||||
#endif
 | 
			
		||||
        // Only mul need compile by group.
 | 
			
		||||
        half scale = scale_gm.GetValue(scale_offset);
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user