CANN: Support Ascend310P to accelerate F32 and F16 Model (#10216)

* CANN Support Ascend310P to accelerate F32 and F16 Model * Add compile option soc type macro ASCEND_310P to ggml-cann lib * Remove unused code * Remove the ascend soc_type hard code compile option in CMakelist.txt
2025-11-04 09:32:00 +00:00 · 2024-11-22 14:07:20 +08:00
parent a5e47592b6
commit c18610b4ee
7 changed files with 123 additions and 41 deletions
--- a/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp
+++ b/ggml/src/ggml-cann/kernels/get_row_q4_0.cpp
@@ -110,9 +110,12 @@ class GET_ROW_Q4_0 {
        LocalTensor<float> output_local = output_queue.AllocTensor<float>();

        // TODO: cast more data to speed up.
+#ifdef ASCEND_310P
+        // TODO: 310P support quantification
+#else
        Cast(cast_local, input_local, RoundMode::CAST_NONE, QK4_0);
        Cast(output_local, cast_local, RoundMode::CAST_NONE, QK4_0);
-
+#endif
        // Only mul need compile by group.
        half scale = scale_gm.GetValue(scale_offset);