mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	ggml: update kleidiai to v1.13.0 (#15663)
This commit is contained in:
		@@ -14,6 +14,7 @@
 | 
			
		||||
 | 
			
		||||
#include "kai_lhs_pack_bf16p2vlx2_f32_sme.h"
 | 
			
		||||
#include "kai_lhs_quant_pack_qsi8d32p_f32.h"
 | 
			
		||||
#include "kai_lhs_quant_pack_qsi8d32p4x8sb_f32_neon.h"
 | 
			
		||||
#include "kai_lhs_quant_pack_qsi8d32p_f32_neon.h"
 | 
			
		||||
 | 
			
		||||
#include "kai_rhs_pack_kxn_bf16p2vlx2b_f32_x32_sme.h"
 | 
			
		||||
@@ -127,6 +128,12 @@ static ggml_kleidiai_kernels gemm_gemv_kernels[] = {
 | 
			
		||||
            /* .get_dst_size          = */ kai_get_dst_size_matmul_clamp_f32_qsi8d32p1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa,
 | 
			
		||||
            /* .run_kernel            = */ kai_run_matmul_clamp_f32_qsi8d32p1vlx4_qsi4c32p4vlx4_1vlx4vl_sme2_mopa,
 | 
			
		||||
        },
 | 
			
		||||
        /* .gemm_lhs_info = */ {
 | 
			
		||||
            /* .get_offset            = */ kai_get_lhs_offset_lhs_quant_pack_qsi8d32p_f32_neon,
 | 
			
		||||
            /* .get_packed_offset     = */ kai_get_lhs_packed_offset_lhs_quant_pack_qsi8d32p_f32_neon,
 | 
			
		||||
            /* .packed_size           = */ kai_get_lhs_packed_size_lhs_quant_pack_qsi8d32p_f32_neon,
 | 
			
		||||
            /* .pack_func             = */ kai_run_lhs_quant_pack_qsi8d32p_f32_neon,
 | 
			
		||||
        },
 | 
			
		||||
        /* SME GEMV */
 | 
			
		||||
        /* .kern_info = */ {
 | 
			
		||||
            /* .get_m_step            = */ kai_get_m_step_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4vlx4_1x4vl_sme2_sdot,
 | 
			
		||||
@@ -141,7 +148,7 @@ static ggml_kleidiai_kernels gemm_gemv_kernels[] = {
 | 
			
		||||
            /* .get_dst_size          = */ kai_get_dst_size_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4vlx4_1x4vl_sme2_sdot,
 | 
			
		||||
            /* .run_kernel            = */ kai_run_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4vlx4_1x4vl_sme2_sdot,
 | 
			
		||||
        },
 | 
			
		||||
        /* .lhs_info = */ {
 | 
			
		||||
        /* .gemv_lhs_info = */ {
 | 
			
		||||
            /* .get_offset            = */ kai_get_lhs_offset_lhs_quant_pack_qsi8d32p_f32_neon,
 | 
			
		||||
            /* .get_packed_offset     = */ kai_get_lhs_packed_offset_lhs_quant_pack_qsi8d32p_f32_neon,
 | 
			
		||||
            /* .packed_size           = */ kai_get_lhs_packed_size_lhs_quant_pack_qsi8d32p_f32_neon,
 | 
			
		||||
@@ -173,6 +180,12 @@ static ggml_kleidiai_kernels gemm_gemv_kernels[] = {
 | 
			
		||||
            /* .get_dst_size          = */ kai_get_dst_size_matmul_clamp_f32_bf16p2vlx2_bf16p2vlx2_2vlx2vl_sme2_mopa,
 | 
			
		||||
            /* .run_kernel            = */ kai_run_matmul_clamp_f32_bf16p2vlx2_bf16p2vlx2_2vlx2vl_sme2_mopa,
 | 
			
		||||
        },
 | 
			
		||||
        /* .gemm_lhs_info = */ {
 | 
			
		||||
            /* .get_offset            = */ kai_get_lhs_offset_lhs_pack_bf16p2vlx2_f32_sme,
 | 
			
		||||
            /* .get_packed_offset     = */ kai_get_lhs_packed_offset_lhs_pack_bf16p2vlx2_f32_sme,
 | 
			
		||||
            /* .packed_size           = */ kai_get_lhs_packed_size_lhs_pack_bf16p2vlx2_f32_sme,
 | 
			
		||||
            /* .pack_func             = */ kai_run_lhs_pack_bf16p2vlx2_f32_sme,
 | 
			
		||||
        },
 | 
			
		||||
        /* SME GEMV */
 | 
			
		||||
        /* .kern_info = */ {
 | 
			
		||||
            /* .get_m_step            = */ kai_get_m_step_matmul_clamp_f32_bf16p2vlx2_bf16p2vlx2_2vlx2vl_sme2_mopa,
 | 
			
		||||
@@ -187,7 +200,7 @@ static ggml_kleidiai_kernels gemm_gemv_kernels[] = {
 | 
			
		||||
            /* .get_dst_size          = */ kai_get_dst_size_matmul_clamp_f32_bf16p2vlx2_bf16p2vlx2_2vlx2vl_sme2_mopa,
 | 
			
		||||
            /* .run_kernel            = */ kai_run_matmul_clamp_f32_bf16p2vlx2_bf16p2vlx2_2vlx2vl_sme2_mopa,
 | 
			
		||||
        },
 | 
			
		||||
        /* .lhs_info = */ {
 | 
			
		||||
        /* .gemv_lhs_info = */ {
 | 
			
		||||
            /* .get_offset            = */ kai_get_lhs_offset_lhs_pack_bf16p2vlx2_f32_sme,
 | 
			
		||||
            /* .get_packed_offset     = */ kai_get_lhs_packed_offset_lhs_pack_bf16p2vlx2_f32_sme,
 | 
			
		||||
            /* .packed_size           = */ kai_get_lhs_packed_size_lhs_pack_bf16p2vlx2_f32_sme,
 | 
			
		||||
@@ -222,6 +235,12 @@ static ggml_kleidiai_kernels gemm_gemv_kernels[] = {
 | 
			
		||||
            /* .get_dst_size          = */ kai_get_dst_size_matmul_clamp_f32_qsi8d32p4x4_qsi4c32p4x4_16x4_neon_dotprod,
 | 
			
		||||
            /* .run_kernel            = */ kai_run_matmul_clamp_f32_qsi8d32p4x4_qsi4c32p4x4_16x4_neon_dotprod,
 | 
			
		||||
        },
 | 
			
		||||
        /* .gemm_lhs_info = */ {
 | 
			
		||||
            /* .get_offset            = */ kai_get_lhs_offset_lhs_quant_pack_qsi8d32p_f32,
 | 
			
		||||
            /* .get_packed_offset     = */ kai_get_lhs_packed_offset_lhs_quant_pack_qsi8d32p_f32,
 | 
			
		||||
            /* .packed_size           = */ kai_get_lhs_packed_size_lhs_quant_pack_qsi8d32p_f32,
 | 
			
		||||
            /* .pack_func             = */ kai_run_lhs_quant_pack_qsi8d32p_f32,
 | 
			
		||||
        },
 | 
			
		||||
        /* DOTPROD GEMV */
 | 
			
		||||
        /* .kern_info = */ {
 | 
			
		||||
            /* .get_m_step            = */ kai_get_m_step_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4x4_1x4_neon_dotprod,
 | 
			
		||||
@@ -236,7 +255,7 @@ static ggml_kleidiai_kernels gemm_gemv_kernels[] = {
 | 
			
		||||
            /* .get_dst_size          = */ kai_get_dst_size_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4x4_1x4_neon_dotprod,
 | 
			
		||||
            /* .run_kernel            = */ kai_run_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4x4_1x4_neon_dotprod,
 | 
			
		||||
        },
 | 
			
		||||
        /* .lhs_info = */ {
 | 
			
		||||
        /* .gemv_lhs_info = */ {
 | 
			
		||||
            /* .get_offset            = */ kai_get_lhs_offset_lhs_quant_pack_qsi8d32p_f32,
 | 
			
		||||
            /* .get_packed_offset     = */ kai_get_lhs_packed_offset_lhs_quant_pack_qsi8d32p_f32,
 | 
			
		||||
            /* .packed_size           = */ kai_get_lhs_packed_size_lhs_quant_pack_qsi8d32p_f32,
 | 
			
		||||
@@ -270,6 +289,12 @@ static ggml_kleidiai_kernels gemm_gemv_kernels[] = {
 | 
			
		||||
            /* .get_dst_size          = */ kai_get_dst_size_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_16x4_neon_i8mm,
 | 
			
		||||
            /* .run_kernel            = */ kai_run_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_16x4_neon_i8mm,
 | 
			
		||||
        },
 | 
			
		||||
        /* .gemm_lhs_info = */ {
 | 
			
		||||
            /* .get_offset            = */ kai_get_lhs_offset_lhs_quant_pack_qsi8d32p4x8sb_f32_neon,
 | 
			
		||||
            /* .get_packed_offset     = */ kai_get_lhs_packed_offset_lhs_quant_pack_qsi8d32p4x8sb_f32_neon,
 | 
			
		||||
            /* .packed_size           = */ kai_get_lhs_packed_size_lhs_quant_pack_qsi8d32p4x8sb_f32_neon,
 | 
			
		||||
            /* .pack_func             = */ kai_run_lhs_quant_pack_qsi8d32p4x8sb_f32_neon,
 | 
			
		||||
        },
 | 
			
		||||
        /* i8mm GEMV */
 | 
			
		||||
        /* .kern_info = */ {
 | 
			
		||||
            /* .get_m_step            = */ kai_get_m_step_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod,
 | 
			
		||||
@@ -284,7 +309,7 @@ static ggml_kleidiai_kernels gemm_gemv_kernels[] = {
 | 
			
		||||
            /* .get_dst_size          = */ kai_get_dst_size_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod,
 | 
			
		||||
            /* .run_kernel            = */ kai_run_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod,
 | 
			
		||||
        },
 | 
			
		||||
        /* .lhs_info = */ {
 | 
			
		||||
        /* .gemv_lhs_info = */ {
 | 
			
		||||
            /* .get_offset            = */ kai_get_lhs_offset_lhs_quant_pack_qsi8d32p_f32,
 | 
			
		||||
            /* .get_packed_offset     = */ kai_get_lhs_packed_offset_lhs_quant_pack_qsi8d32p_f32,
 | 
			
		||||
            /* .packed_size           = */ kai_get_lhs_packed_size_lhs_quant_pack_qsi8d32p_f32,
 | 
			
		||||
@@ -319,6 +344,12 @@ static ggml_kleidiai_kernels gemm_gemv_kernels[] = {
 | 
			
		||||
            /* .get_dst_size          = */ kai_get_dst_size_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_16x4_neon_i8mm,
 | 
			
		||||
            /* .run_kernel            = */ kai_run_matmul_clamp_f32_qsi8d32p4x8_qsi4c32p4x8_16x4_neon_i8mm,
 | 
			
		||||
        },
 | 
			
		||||
        /* .gemm_lhs_info = */ {
 | 
			
		||||
            /* .get_offset            = */ kai_get_lhs_offset_lhs_quant_pack_qsi8d32p4x8sb_f32_neon,
 | 
			
		||||
            /* .get_packed_offset     = */ kai_get_lhs_packed_offset_lhs_quant_pack_qsi8d32p4x8sb_f32_neon,
 | 
			
		||||
            /* .packed_size           = */ kai_get_lhs_packed_size_lhs_quant_pack_qsi8d32p4x8sb_f32_neon,
 | 
			
		||||
            /* .pack_func             = */ kai_run_lhs_quant_pack_qsi8d32p4x8sb_f32_neon,
 | 
			
		||||
        },
 | 
			
		||||
        /* i8mm GEMV */
 | 
			
		||||
        /* .kern_info = */ {
 | 
			
		||||
            /* .get_m_step            = */ kai_get_m_step_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod,
 | 
			
		||||
@@ -333,7 +364,7 @@ static ggml_kleidiai_kernels gemm_gemv_kernels[] = {
 | 
			
		||||
            /* .get_dst_size          = */ kai_get_dst_size_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod,
 | 
			
		||||
            /* .run_kernel            = */ kai_run_matmul_clamp_f32_qsi8d32p1x8_qsi4c32p4x8_1x4x32_neon_dotprod,
 | 
			
		||||
        },
 | 
			
		||||
        /* .lhs_info = */ {
 | 
			
		||||
        /* .gemv_lhs_info = */ {
 | 
			
		||||
            /* .get_offset            = */ kai_get_lhs_offset_lhs_quant_pack_qsi8d32p_f32,
 | 
			
		||||
            /* .get_packed_offset     = */ kai_get_lhs_packed_offset_lhs_quant_pack_qsi8d32p_f32,
 | 
			
		||||
            /* .packed_size           = */ kai_get_lhs_packed_size_lhs_quant_pack_qsi8d32p_f32,
 | 
			
		||||
@@ -367,6 +398,12 @@ static ggml_kleidiai_kernels gemm_gemv_kernels[] = {
 | 
			
		||||
            /* .get_dst_size          = */ kai_get_dst_size_matmul_clamp_f32_qsi8d32p4x4_qsi4c32p4x4_16x4_neon_dotprod,
 | 
			
		||||
            /* .run_kernel            = */ kai_run_matmul_clamp_f32_qsi8d32p4x4_qsi4c32p4x4_16x4_neon_dotprod,
 | 
			
		||||
        },
 | 
			
		||||
        /* .gemm_lhs_info = */ {
 | 
			
		||||
            /* .get_offset            = */ kai_get_lhs_offset_lhs_quant_pack_qsi8d32p_f32,
 | 
			
		||||
            /* .get_packed_offset     = */ kai_get_lhs_packed_offset_lhs_quant_pack_qsi8d32p_f32,
 | 
			
		||||
            /* .packed_size           = */ kai_get_lhs_packed_size_lhs_quant_pack_qsi8d32p_f32,
 | 
			
		||||
            /* .pack_func             = */ kai_run_lhs_quant_pack_qsi8d32p_f32,
 | 
			
		||||
        },
 | 
			
		||||
        /* DOTPROD GEMV */
 | 
			
		||||
        /* .kern_info = */ {
 | 
			
		||||
            /* .get_m_step            = */ kai_get_m_step_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4x4_1x4_neon_dotprod,
 | 
			
		||||
@@ -381,7 +418,7 @@ static ggml_kleidiai_kernels gemm_gemv_kernels[] = {
 | 
			
		||||
            /* .get_dst_size          = */ kai_get_dst_size_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4x4_1x4_neon_dotprod,
 | 
			
		||||
            /* .run_kernel            = */ kai_run_matmul_clamp_f32_qsi8d32p1x4_qsi4c32p4x4_1x4_neon_dotprod,
 | 
			
		||||
        },
 | 
			
		||||
        /* .lhs_info = */ {
 | 
			
		||||
        /* .gemv_lhs_info = */ {
 | 
			
		||||
            /* .get_offset            = */ kai_get_lhs_offset_lhs_quant_pack_qsi8d32p_f32,
 | 
			
		||||
            /* .get_packed_offset     = */ kai_get_lhs_packed_offset_lhs_quant_pack_qsi8d32p_f32,
 | 
			
		||||
            /* .packed_size           = */ kai_get_lhs_packed_size_lhs_quant_pack_qsi8d32p_f32,
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user