CANN: Fix SOC_TYPE compile bug (#10519)

* CANN: Fix the bug build fail on Ascend310P under two cases:
1) Manual specify SOC_TYPE
2) Under some unusual compile environment

* Update the cann backend News content: Support F16 and F32 data type model for Ascend 310P NPU.

* fix CANN  compile fail bug: the assert in ascend kernel function doesn't supportted on some CANN version
This commit is contained in:
leo-pony
2024-11-28 15:25:24 +08:00
committed by GitHub
parent b7420131bf
commit 605fa66c50
8 changed files with 57 additions and 9 deletions

View File

@@ -2,6 +2,15 @@
// optimize me. Use template to avoid copy code.
using namespace AscendC;
#ifdef ASCEND_310P // 310P not support 4bit get row
extern "C" __global__ __aicore__ void ascendc_get_row_q4_0(
GM_ADDR input_gm, GM_ADDR indices_gm, GM_ADDR output_gm,
GM_ADDR input_ne_gm, GM_ADDR indices_ne_gm, GM_ADDR indices_nb_gm,
GM_ADDR output_ne_gm, GM_ADDR output_nb_gm) {
// let following test cases can continue run, here just print error information. Of Cource the test case that call this operator is failed.
printf("Ascend310P not support 4bit get row.\n");
}
#else
#define BUFFER_NUM 2
@@ -110,12 +119,9 @@ class GET_ROW_Q4_0 {
LocalTensor<float> output_local = output_queue.AllocTensor<float>();
// TODO: cast more data to speed up.
#ifdef ASCEND_310P
// TODO: 310P support quantification
#else
Cast(cast_local, input_local, RoundMode::CAST_NONE, QK4_0);
Cast(output_local, cast_local, RoundMode::CAST_NONE, QK4_0);
#endif
// Only mul need compile by group.
half scale = scale_gm.GetValue(scale_offset);
@@ -194,3 +200,5 @@ extern "C" __global__ __aicore__ void ascendc_get_row_q4_0(
indices_nb_ub, output_ne_ub, output_nb_ub);
op.calculate();
}
#endif // #ifdef ASCEND_310P