CANN: Support Ascend310P to accelerate F32 and F16 Model (#10216)

* CANN Support Ascend310P to accelerate F32 and F16 Model

* Add compile option soc type macro ASCEND_310P to ggml-cann lib

* Remove unused code

* Remove the ascend soc_type hard code compile option in CMakelist.txt
This commit is contained in:
leo-pony
2024-11-22 14:07:20 +08:00
committed by GitHub
parent a5e47592b6
commit c18610b4ee
7 changed files with 123 additions and 41 deletions

View File

@@ -110,9 +110,12 @@ class GET_ROW_Q4_0 {
LocalTensor<float> output_local = output_queue.AllocTensor<float>();
// TODO: cast more data to speed up.
#ifdef ASCEND_310P
// TODO: 310P support quantification
#else
Cast(cast_local, input_local, RoundMode::CAST_NONE, QK4_0);
Cast(output_local, cast_local, RoundMode::CAST_NONE, QK4_0);
#endif
// Only mul need compile by group.
half scale = scale_gm.GetValue(scale_offset);