mirror of
https://github.com/ggml-org/llama.cpp.git
synced 2025-11-06 09:46:50 +00:00
cann: support q4_0 model (#8822)
This commit is contained in:
@@ -8,6 +8,8 @@
|
||||
|
||||
#include "aclrtlaunch_ascendc_quantize_f32_q8_0.h"
|
||||
#include "aclrtlaunch_ascendc_quantize_f16_q8_0.h"
|
||||
#include "aclrtlaunch_ascendc_quantize_f16_to_q4_0.h"
|
||||
#include "aclrtlaunch_ascendc_quantize_f32_to_q4_0.h"
|
||||
|
||||
#include "aclrtlaunch_ascendc_dup_by_rows_fp16.h"
|
||||
#include "aclrtlaunch_ascendc_dup_by_rows_fp32.h"
|
||||
|
||||
Reference in New Issue
Block a user