|  |  | @@ -1622,6 +1622,7 @@ static void ggml_vk_load_shaders(vk_device& device) { | 
			
		
	
		
		
			
				
					
					|  |  |  |         //CREATE_FA(GGML_TYPE_IQ2_S, iq2_s) |  |  |  |         //CREATE_FA(GGML_TYPE_IQ2_S, iq2_s) | 
			
		
	
		
		
			
				
					
					|  |  |  |         //CREATE_FA(GGML_TYPE_IQ3_XXS, iq3_xxs) |  |  |  |         //CREATE_FA(GGML_TYPE_IQ3_XXS, iq3_xxs) | 
			
		
	
		
		
			
				
					
					|  |  |  |         //CREATE_FA(GGML_TYPE_IQ3_S, iq3_s) |  |  |  |         //CREATE_FA(GGML_TYPE_IQ3_S, iq3_s) | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         //CREATE_FA(GGML_TYPE_IQ4_XS, iq4_xs) | 
			
		
	
		
		
			
				
					
					|  |  |  |         CREATE_FA(GGML_TYPE_IQ4_NL, iq4_nl) |  |  |  |         CREATE_FA(GGML_TYPE_IQ4_NL, iq4_nl) | 
			
		
	
		
		
			
				
					
					|  |  |  | #undef CREATE_FA |  |  |  | #undef CREATE_FA | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |  | 
			
		
	
	
		
		
			
				
					
					|  |  | @@ -1655,6 +1656,7 @@ static void ggml_vk_load_shaders(vk_device& device) { | 
			
		
	
		
		
			
				
					
					|  |  |  |         CREATE_MM(pipeline_dequant_mul_mat_mat_f16[GGML_TYPE_IQ2_S].f16acc,   matmul_iq2_s_f16,   _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3) |  |  |  |         CREATE_MM(pipeline_dequant_mul_mat_mat_f16[GGML_TYPE_IQ2_S].f16acc,   matmul_iq2_s_f16,   _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3) | 
			
		
	
		
		
			
				
					
					|  |  |  |         CREATE_MM(pipeline_dequant_mul_mat_mat_f16[GGML_TYPE_IQ3_XXS].f16acc, matmul_iq3_xxs_f16, _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3) |  |  |  |         CREATE_MM(pipeline_dequant_mul_mat_mat_f16[GGML_TYPE_IQ3_XXS].f16acc, matmul_iq3_xxs_f16, _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3) | 
			
		
	
		
		
			
				
					
					|  |  |  |         CREATE_MM(pipeline_dequant_mul_mat_mat_f16[GGML_TYPE_IQ3_S].f16acc,   matmul_iq3_s_f16,   _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3) |  |  |  |         CREATE_MM(pipeline_dequant_mul_mat_mat_f16[GGML_TYPE_IQ3_S].f16acc,   matmul_iq3_s_f16,   _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3) | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         CREATE_MM(pipeline_dequant_mul_mat_mat_f16[GGML_TYPE_IQ4_XS].f16acc,  matmul_iq4_xs_f16,  _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3) | 
			
		
	
		
		
			
				
					
					|  |  |  |         CREATE_MM(pipeline_dequant_mul_mat_mat_f16[GGML_TYPE_IQ4_NL].f16acc,  matmul_iq4_nl_f16,  _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3) |  |  |  |         CREATE_MM(pipeline_dequant_mul_mat_mat_f16[GGML_TYPE_IQ4_NL].f16acc,  matmul_iq4_nl_f16,  _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3) | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |         CREATE_MM2(pipeline_matmul_id_f16, matmul_id_f16, wg_denoms, warptile, vk_mat_mat_id_push_constants, 4) |  |  |  |         CREATE_MM2(pipeline_matmul_id_f16, matmul_id_f16, wg_denoms, warptile, vk_mat_mat_id_push_constants, 4) | 
			
		
	
	
		
		
			
				
					
					|  |  | @@ -1673,6 +1675,7 @@ static void ggml_vk_load_shaders(vk_device& device) { | 
			
		
	
		
		
			
				
					
					|  |  |  |         CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ2_S].f16acc,   matmul_id_iq2_s_f16,   , mmqid_wg_denoms, warptile_mmqid, vk_mat_mat_id_push_constants, 4) |  |  |  |         CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ2_S].f16acc,   matmul_id_iq2_s_f16,   , mmqid_wg_denoms, warptile_mmqid, vk_mat_mat_id_push_constants, 4) | 
			
		
	
		
		
			
				
					
					|  |  |  |         CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ3_XXS].f16acc, matmul_id_iq3_xxs_f16, , mmqid_wg_denoms, warptile_mmqid, vk_mat_mat_id_push_constants, 4) |  |  |  |         CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ3_XXS].f16acc, matmul_id_iq3_xxs_f16, , mmqid_wg_denoms, warptile_mmqid, vk_mat_mat_id_push_constants, 4) | 
			
		
	
		
		
			
				
					
					|  |  |  |         CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ3_S].f16acc,   matmul_id_iq3_s_f16,   , mmqid_wg_denoms, warptile_mmqid, vk_mat_mat_id_push_constants, 4) |  |  |  |         CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ3_S].f16acc,   matmul_id_iq3_s_f16,   , mmqid_wg_denoms, warptile_mmqid, vk_mat_mat_id_push_constants, 4) | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_XS].f16acc,  matmul_id_iq4_xs_f16,  , mmqid_wg_denoms, warptile_mmqid, vk_mat_mat_id_push_constants, 4) | 
			
		
	
		
		
			
				
					
					|  |  |  |         CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_NL].f16acc,  matmul_id_iq4_nl_f16,  , mmqid_wg_denoms, warptile_mmqid, vk_mat_mat_id_push_constants, 4) |  |  |  |         CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_NL].f16acc,  matmul_id_iq4_nl_f16,  , mmqid_wg_denoms, warptile_mmqid, vk_mat_mat_id_push_constants, 4) | 
			
		
	
		
		
			
				
					
					|  |  |  | #undef CREATE_MM |  |  |  | #undef CREATE_MM | 
			
		
	
		
		
			
				
					
					|  |  |  | #undef CREATE_MM2 |  |  |  | #undef CREATE_MM2 | 
			
		
	
	
		
		
			
				
					
					|  |  | @@ -1726,6 +1729,7 @@ static void ggml_vk_load_shaders(vk_device& device) { | 
			
		
	
		
		
			
				
					
					|  |  |  |             CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ2_S].f16acc,   matmul_iq2_s_f32,   _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); |  |  |  |             CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ2_S].f16acc,   matmul_iq2_s_f32,   _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); | 
			
		
	
		
		
			
				
					
					|  |  |  |             CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ3_XXS].f16acc, matmul_iq3_xxs_f32, _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); |  |  |  |             CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ3_XXS].f16acc, matmul_iq3_xxs_f32, _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); | 
			
		
	
		
		
			
				
					
					|  |  |  |             CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ3_S].f16acc,   matmul_iq3_s_f32,   _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); |  |  |  |             CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ3_S].f16acc,   matmul_iq3_s_f32,   _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |             CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_XS].f16acc,  matmul_iq4_xs_f32,  _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); | 
			
		
	
		
		
			
				
					
					|  |  |  |             CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_NL].f16acc,  matmul_iq4_nl_f32,  _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); |  |  |  |             CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_NL].f16acc,  matmul_iq4_nl_f32,  _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); | 
			
		
	
		
		
			
				
					
					|  |  |  |         } else { |  |  |  |         } else { | 
			
		
	
		
		
			
				
					
					|  |  |  |             CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_0].f16acc, matmul_q4_0_f32, , mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); |  |  |  |             CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_Q4_0].f16acc, matmul_q4_0_f32, , mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); | 
			
		
	
	
		
		
			
				
					
					|  |  | @@ -1744,6 +1748,7 @@ static void ggml_vk_load_shaders(vk_device& device) { | 
			
		
	
		
		
			
				
					
					|  |  |  |             CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ2_S].f16acc,   matmul_iq2_s_f32,   , mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); |  |  |  |             CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ2_S].f16acc,   matmul_iq2_s_f32,   , mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); | 
			
		
	
		
		
			
				
					
					|  |  |  |             CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ3_XXS].f16acc, matmul_iq3_xxs_f32, , mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); |  |  |  |             CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ3_XXS].f16acc, matmul_iq3_xxs_f32, , mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); | 
			
		
	
		
		
			
				
					
					|  |  |  |             CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ3_S].f16acc,   matmul_iq3_s_f32,   , mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); |  |  |  |             CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ3_S].f16acc,   matmul_iq3_s_f32,   , mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |             CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_XS].f16acc,  matmul_iq4_xs_f32,  , mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); | 
			
		
	
		
		
			
				
					
					|  |  |  |             CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_NL].f16acc,  matmul_iq4_nl_f32,  , mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); |  |  |  |             CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_NL].f16acc,  matmul_iq4_nl_f32,  , mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); | 
			
		
	
		
		
			
				
					
					|  |  |  |         } |  |  |  |         } | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |  | 
			
		
	
	
		
		
			
				
					
					|  |  | @@ -1770,6 +1775,7 @@ static void ggml_vk_load_shaders(vk_device& device) { | 
			
		
	
		
		
			
				
					
					|  |  |  |                 CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ2_S].f16acc,   matmul_id_iq2_s_f32,   _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); |  |  |  |                 CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ2_S].f16acc,   matmul_id_iq2_s_f32,   _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); | 
			
		
	
		
		
			
				
					
					|  |  |  |                 CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ3_XXS].f16acc, matmul_id_iq3_xxs_f32, _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); |  |  |  |                 CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ3_XXS].f16acc, matmul_id_iq3_xxs_f32, _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); | 
			
		
	
		
		
			
				
					
					|  |  |  |                 CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ3_S].f16acc,   matmul_id_iq3_s_f32,   _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); |  |  |  |                 CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ3_S].f16acc,   matmul_id_iq3_s_f32,   _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |                 CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_XS].f16acc,  matmul_id_iq4_xs_f32,  _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); | 
			
		
	
		
		
			
				
					
					|  |  |  |                 CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_NL].f16acc,  matmul_id_iq4_nl_f32,  _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); |  |  |  |                 CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_NL].f16acc,  matmul_id_iq4_nl_f32,  _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); | 
			
		
	
		
		
			
				
					
					|  |  |  |             } else { |  |  |  |             } else { | 
			
		
	
		
		
			
				
					
					|  |  |  |                 CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_0].f16acc, matmul_id_q4_0_f32, , mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); |  |  |  |                 CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_Q4_0].f16acc, matmul_id_q4_0_f32, , mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); | 
			
		
	
	
		
		
			
				
					
					|  |  | @@ -1788,6 +1794,7 @@ static void ggml_vk_load_shaders(vk_device& device) { | 
			
		
	
		
		
			
				
					
					|  |  |  |                 CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ2_S].f16acc,   matmul_id_iq2_s_f32,   , mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); |  |  |  |                 CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ2_S].f16acc,   matmul_id_iq2_s_f32,   , mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); | 
			
		
	
		
		
			
				
					
					|  |  |  |                 CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ3_XXS].f16acc, matmul_id_iq3_xxs_f32, , mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); |  |  |  |                 CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ3_XXS].f16acc, matmul_id_iq3_xxs_f32, , mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); | 
			
		
	
		
		
			
				
					
					|  |  |  |                 CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ3_S].f16acc,   matmul_id_iq3_s_f32,   , mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); |  |  |  |                 CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ3_S].f16acc,   matmul_id_iq3_s_f32,   , mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |                 CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_XS].f16acc,  matmul_id_iq4_xs_f32,  , mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); | 
			
		
	
		
		
			
				
					
					|  |  |  |                 CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_NL].f16acc,  matmul_id_iq4_nl_f32,  , mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); |  |  |  |                 CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_NL].f16acc,  matmul_id_iq4_nl_f32,  , mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); | 
			
		
	
		
		
			
				
					
					|  |  |  |             } |  |  |  |             } | 
			
		
	
		
		
			
				
					
					|  |  |  |         } |  |  |  |         } | 
			
		
	
	
		
		
			
				
					
					|  |  | @@ -1837,6 +1844,7 @@ static void ggml_vk_load_shaders(vk_device& device) { | 
			
		
	
		
		
			
				
					
					|  |  |  |         CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ2_S].f16acc,   matmul_iq2_s_f32,   _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); |  |  |  |         CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ2_S].f16acc,   matmul_iq2_s_f32,   _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); | 
			
		
	
		
		
			
				
					
					|  |  |  |         CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ3_XXS].f16acc, matmul_iq3_xxs_f32, _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); |  |  |  |         CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ3_XXS].f16acc, matmul_iq3_xxs_f32, _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); | 
			
		
	
		
		
			
				
					
					|  |  |  |         CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ3_S].f16acc,   matmul_iq3_s_f32,   _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); |  |  |  |         CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ3_S].f16acc,   matmul_iq3_s_f32,   _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_XS].f16acc,  matmul_iq4_xs_f32,  _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); | 
			
		
	
		
		
			
				
					
					|  |  |  |         CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_NL].f16acc,  matmul_iq4_nl_f32,  _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); |  |  |  |         CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_NL].f16acc,  matmul_iq4_nl_f32,  _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |         // If there's not enough shared memory for row_ids and the result tile, don't create these pipelines. |  |  |  |         // If there's not enough shared memory for row_ids and the result tile, don't create these pipelines. | 
			
		
	
	
		
		
			
				
					
					|  |  | @@ -1861,6 +1869,7 @@ static void ggml_vk_load_shaders(vk_device& device) { | 
			
		
	
		
		
			
				
					
					|  |  |  |             CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ2_S].f16acc,   matmul_id_iq2_s_f32,   _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); |  |  |  |             CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ2_S].f16acc,   matmul_id_iq2_s_f32,   _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); | 
			
		
	
		
		
			
				
					
					|  |  |  |             CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ3_XXS].f16acc, matmul_id_iq3_xxs_f32, _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); |  |  |  |             CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ3_XXS].f16acc, matmul_id_iq3_xxs_f32, _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); | 
			
		
	
		
		
			
				
					
					|  |  |  |             CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ3_S].f16acc,   matmul_id_iq3_s_f32,   _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); |  |  |  |             CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ3_S].f16acc,   matmul_id_iq3_s_f32,   _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |             CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_XS].f16acc,  matmul_id_iq4_xs_f32,  _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); | 
			
		
	
		
		
			
				
					
					|  |  |  |             CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_NL].f16acc,  matmul_id_iq4_nl_f32,  _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); |  |  |  |             CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_NL].f16acc,  matmul_id_iq4_nl_f32,  _f16acc, mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); | 
			
		
	
		
		
			
				
					
					|  |  |  |         } |  |  |  |         } | 
			
		
	
		
		
			
				
					
					|  |  |  | #undef CREATE_MM2 |  |  |  | #undef CREATE_MM2 | 
			
		
	
	
		
		
			
				
					
					|  |  | @@ -1902,6 +1911,7 @@ static void ggml_vk_load_shaders(vk_device& device) { | 
			
		
	
		
		
			
				
					
					|  |  |  |         CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ2_S].f32acc,   matmul_iq2_s_f32,   , mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); |  |  |  |         CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ2_S].f32acc,   matmul_iq2_s_f32,   , mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); | 
			
		
	
		
		
			
				
					
					|  |  |  |         CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ3_XXS].f32acc, matmul_iq3_xxs_f32, , mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); |  |  |  |         CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ3_XXS].f32acc, matmul_iq3_xxs_f32, , mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); | 
			
		
	
		
		
			
				
					
					|  |  |  |         CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ3_S].f32acc,   matmul_iq3_s_f32,   , mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); |  |  |  |         CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ3_S].f32acc,   matmul_iq3_s_f32,   , mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_XS].f32acc,  matmul_iq4_xs_f32,  , mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); | 
			
		
	
		
		
			
				
					
					|  |  |  |         CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_NL].f32acc,  matmul_iq4_nl_f32,  , mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); |  |  |  |         CREATE_MM(pipeline_dequant_mul_mat_mat[GGML_TYPE_IQ4_NL].f32acc,  matmul_iq4_nl_f32,  , mmq_wg_denoms, warptile_mmq, vk_mat_mat_push_constants, 3, ); | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |         // If there's not enough shared memory for row_ids and the result tile, don't create these pipelines. |  |  |  |         // If there's not enough shared memory for row_ids and the result tile, don't create these pipelines. | 
			
		
	
	
		
		
			
				
					
					|  |  | @@ -1926,6 +1936,7 @@ static void ggml_vk_load_shaders(vk_device& device) { | 
			
		
	
		
		
			
				
					
					|  |  |  |             CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ2_S].f32acc,   matmul_id_iq2_s_f32,   , mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); |  |  |  |             CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ2_S].f32acc,   matmul_id_iq2_s_f32,   , mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); | 
			
		
	
		
		
			
				
					
					|  |  |  |             CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ3_XXS].f32acc, matmul_id_iq3_xxs_f32, , mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); |  |  |  |             CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ3_XXS].f32acc, matmul_id_iq3_xxs_f32, , mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); | 
			
		
	
		
		
			
				
					
					|  |  |  |             CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ3_S].f32acc,   matmul_id_iq3_s_f32,   , mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); |  |  |  |             CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ3_S].f32acc,   matmul_id_iq3_s_f32,   , mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |             CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_XS].f32acc,  matmul_id_iq4_xs_f32,  , mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); | 
			
		
	
		
		
			
				
					
					|  |  |  |             CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_NL].f32acc,  matmul_id_iq4_nl_f32,  , mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); |  |  |  |             CREATE_MM(pipeline_dequant_mul_mat_mat_id[GGML_TYPE_IQ4_NL].f32acc,  matmul_id_iq4_nl_f32,  , mmq_wg_denoms, warptile_mmq, vk_mat_mat_id_push_constants, 4, _id); | 
			
		
	
		
		
			
				
					
					|  |  |  |         } |  |  |  |         } | 
			
		
	
		
		
			
				
					
					|  |  |  | #undef CREATE_MM |  |  |  | #undef CREATE_MM | 
			
		
	
	
		
		
			
				
					
					|  |  | @@ -1962,6 +1973,7 @@ static void ggml_vk_load_shaders(vk_device& device) { | 
			
		
	
		
		
			
				
					
					|  |  |  |         ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f32_f32[GGML_TYPE_IQ2_S][i],   "mul_mat_vec_iq2_s_f32_f32_"+std::to_string(i+1),   mul_mat_vec_iq2_s_f32_f32_len,   mul_mat_vec_iq2_s_f32_f32_data,   "main", 3, sizeof(vk_mat_vec_push_constants), {rm_kq, 1, 1}, {subgroup_size_16, rm_kq, i+1}, 1, true); |  |  |  |         ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f32_f32[GGML_TYPE_IQ2_S][i],   "mul_mat_vec_iq2_s_f32_f32_"+std::to_string(i+1),   mul_mat_vec_iq2_s_f32_f32_len,   mul_mat_vec_iq2_s_f32_f32_data,   "main", 3, sizeof(vk_mat_vec_push_constants), {rm_kq, 1, 1}, {subgroup_size_16, rm_kq, i+1}, 1, true); | 
			
		
	
		
		
			
				
					
					|  |  |  |         ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f32_f32[GGML_TYPE_IQ3_XXS][i], "mul_mat_vec_iq3_xxs_f32_f32_"+std::to_string(i+1), mul_mat_vec_iq3_xxs_f32_f32_len, mul_mat_vec_iq3_xxs_f32_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {rm_kq, 1, 1}, {subgroup_size_16, rm_kq, i+1}, 1, true); |  |  |  |         ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f32_f32[GGML_TYPE_IQ3_XXS][i], "mul_mat_vec_iq3_xxs_f32_f32_"+std::to_string(i+1), mul_mat_vec_iq3_xxs_f32_f32_len, mul_mat_vec_iq3_xxs_f32_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {rm_kq, 1, 1}, {subgroup_size_16, rm_kq, i+1}, 1, true); | 
			
		
	
		
		
			
				
					
					|  |  |  |         ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f32_f32[GGML_TYPE_IQ3_S][i],   "mul_mat_vec_iq3_s_f32_f32_"+std::to_string(i+1),   mul_mat_vec_iq3_s_f32_f32_len,   mul_mat_vec_iq3_s_f32_f32_data,   "main", 3, sizeof(vk_mat_vec_push_constants), {rm_kq, 1, 1}, {subgroup_size_16, rm_kq, i+1}, 1, true); |  |  |  |         ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f32_f32[GGML_TYPE_IQ3_S][i],   "mul_mat_vec_iq3_s_f32_f32_"+std::to_string(i+1),   mul_mat_vec_iq3_s_f32_f32_len,   mul_mat_vec_iq3_s_f32_f32_data,   "main", 3, sizeof(vk_mat_vec_push_constants), {rm_kq, 1, 1}, {subgroup_size_16, rm_kq, i+1}, 1, true); | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f32_f32[GGML_TYPE_IQ4_XS][i],  "mul_mat_vec_iq4_xs_f32_f32_"+std::to_string(i+1),  mul_mat_vec_iq4_xs_f32_f32_len,  mul_mat_vec_iq4_xs_f32_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {rm_kq, 1, 1}, {subgroup_size_16, rm_kq, i+1}, 1, true); | 
			
		
	
		
		
			
				
					
					|  |  |  |         ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f32_f32[GGML_TYPE_IQ4_NL][i],  "mul_mat_vec_iq4_nl_f32_f32_"+std::to_string(i+1),  mul_mat_vec_iq4_nl_f32_f32_len,  mul_mat_vec_iq4_nl_f32_f32_data,  "main", 3, sizeof(vk_mat_vec_push_constants), {2*rm_stdq, 1, 1}, {subgroup_size_16, 2*rm_stdq, i+1}, 1, true); |  |  |  |         ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f32_f32[GGML_TYPE_IQ4_NL][i],  "mul_mat_vec_iq4_nl_f32_f32_"+std::to_string(i+1),  mul_mat_vec_iq4_nl_f32_f32_len,  mul_mat_vec_iq4_nl_f32_f32_data,  "main", 3, sizeof(vk_mat_vec_push_constants), {2*rm_stdq, 1, 1}, {subgroup_size_16, 2*rm_stdq, i+1}, 1, true); | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |         ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[GGML_TYPE_F32 ][i], "mul_mat_vec_f32_f16_f32_"+std::to_string(i+1),  mul_mat_vec_f32_f16_f32_len,  mul_mat_vec_f32_f16_f32_data,  "main", 3, sizeof(vk_mat_vec_push_constants), {2, 1, 1}, {device->subgroup_size, 2, i+1}, 1); |  |  |  |         ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[GGML_TYPE_F32 ][i], "mul_mat_vec_f32_f16_f32_"+std::to_string(i+1),  mul_mat_vec_f32_f16_f32_len,  mul_mat_vec_f32_f16_f32_data,  "main", 3, sizeof(vk_mat_vec_push_constants), {2, 1, 1}, {device->subgroup_size, 2, i+1}, 1); | 
			
		
	
	
		
		
			
				
					
					|  |  | @@ -1981,6 +1993,7 @@ static void ggml_vk_load_shaders(vk_device& device) { | 
			
		
	
		
		
			
				
					
					|  |  |  |         ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[GGML_TYPE_IQ2_S][i],   "mul_mat_vec_iq2_s_f16_f32_"+std::to_string(i+1),   mul_mat_vec_iq2_s_f16_f32_len,   mul_mat_vec_iq2_s_f16_f32_data,   "main", 3, sizeof(vk_mat_vec_push_constants), {rm_kq, 1, 1}, {subgroup_size_16, rm_kq, i+1}, 1, true); |  |  |  |         ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[GGML_TYPE_IQ2_S][i],   "mul_mat_vec_iq2_s_f16_f32_"+std::to_string(i+1),   mul_mat_vec_iq2_s_f16_f32_len,   mul_mat_vec_iq2_s_f16_f32_data,   "main", 3, sizeof(vk_mat_vec_push_constants), {rm_kq, 1, 1}, {subgroup_size_16, rm_kq, i+1}, 1, true); | 
			
		
	
		
		
			
				
					
					|  |  |  |         ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[GGML_TYPE_IQ3_XXS][i], "mul_mat_vec_iq3_xxs_f16_f32_"+std::to_string(i+1), mul_mat_vec_iq3_xxs_f16_f32_len, mul_mat_vec_iq3_xxs_f16_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {rm_kq, 1, 1}, {subgroup_size_16, rm_kq, i+1}, 1, true); |  |  |  |         ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[GGML_TYPE_IQ3_XXS][i], "mul_mat_vec_iq3_xxs_f16_f32_"+std::to_string(i+1), mul_mat_vec_iq3_xxs_f16_f32_len, mul_mat_vec_iq3_xxs_f16_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {rm_kq, 1, 1}, {subgroup_size_16, rm_kq, i+1}, 1, true); | 
			
		
	
		
		
			
				
					
					|  |  |  |         ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[GGML_TYPE_IQ3_S][i],   "mul_mat_vec_iq3_s_f16_f32_"+std::to_string(i+1),   mul_mat_vec_iq3_s_f16_f32_len,   mul_mat_vec_iq3_s_f16_f32_data,   "main", 3, sizeof(vk_mat_vec_push_constants), {rm_kq, 1, 1}, {subgroup_size_16, rm_kq, i+1}, 1, true); |  |  |  |         ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[GGML_TYPE_IQ3_S][i],   "mul_mat_vec_iq3_s_f16_f32_"+std::to_string(i+1),   mul_mat_vec_iq3_s_f16_f32_len,   mul_mat_vec_iq3_s_f16_f32_data,   "main", 3, sizeof(vk_mat_vec_push_constants), {rm_kq, 1, 1}, {subgroup_size_16, rm_kq, i+1}, 1, true); | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[GGML_TYPE_IQ4_XS][i],  "mul_mat_vec_iq4_xs_f16_f32_"+std::to_string(i+1),  mul_mat_vec_iq4_xs_f16_f32_len,  mul_mat_vec_iq4_xs_f16_f32_data, "main", 3, sizeof(vk_mat_vec_push_constants), {rm_kq, 1, 1}, {subgroup_size_16, rm_kq, i+1}, 1, true); | 
			
		
	
		
		
			
				
					
					|  |  |  |         ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[GGML_TYPE_IQ4_NL][i],  "mul_mat_vec_iq4_nl_f16_f32_"+std::to_string(i+1),  mul_mat_vec_iq4_nl_f16_f32_len,  mul_mat_vec_iq4_nl_f16_f32_data,  "main", 3, sizeof(vk_mat_vec_push_constants), {2*rm_stdq, 1, 1}, {subgroup_size_16, 2*rm_stdq, i+1}, 1, true); |  |  |  |         ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_f16_f32[GGML_TYPE_IQ4_NL][i],  "mul_mat_vec_iq4_nl_f16_f32_"+std::to_string(i+1),  mul_mat_vec_iq4_nl_f16_f32_len,  mul_mat_vec_iq4_nl_f16_f32_data,  "main", 3, sizeof(vk_mat_vec_push_constants), {2*rm_stdq, 1, 1}, {subgroup_size_16, 2*rm_stdq, i+1}, 1, true); | 
			
		
	
		
		
			
				
					
					|  |  |  |     } |  |  |  |     } | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |  | 
			
		
	
	
		
		
			
				
					
					|  |  | @@ -2001,6 +2014,7 @@ static void ggml_vk_load_shaders(vk_device& device) { | 
			
		
	
		
		
			
				
					
					|  |  |  |     ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_id_f32[GGML_TYPE_IQ2_S],   "mul_mat_vec_id_iq2_s_f32",   mul_mat_vec_id_iq2_s_f32_len,   mul_mat_vec_id_iq2_s_f32_data,   "main", 4, sizeof(vk_mat_vec_id_push_constants), {rm_kq, 1, 1}, {subgroup_size_16, rm_kq}, 1, true); |  |  |  |     ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_id_f32[GGML_TYPE_IQ2_S],   "mul_mat_vec_id_iq2_s_f32",   mul_mat_vec_id_iq2_s_f32_len,   mul_mat_vec_id_iq2_s_f32_data,   "main", 4, sizeof(vk_mat_vec_id_push_constants), {rm_kq, 1, 1}, {subgroup_size_16, rm_kq}, 1, true); | 
			
		
	
		
		
			
				
					
					|  |  |  |     ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_id_f32[GGML_TYPE_IQ3_XXS], "mul_mat_vec_id_iq3_xxs_f32", mul_mat_vec_id_iq3_xxs_f32_len, mul_mat_vec_id_iq3_xxs_f32_data, "main", 4, sizeof(vk_mat_vec_id_push_constants), {rm_kq, 1, 1}, {subgroup_size_16, rm_kq}, 1, true); |  |  |  |     ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_id_f32[GGML_TYPE_IQ3_XXS], "mul_mat_vec_id_iq3_xxs_f32", mul_mat_vec_id_iq3_xxs_f32_len, mul_mat_vec_id_iq3_xxs_f32_data, "main", 4, sizeof(vk_mat_vec_id_push_constants), {rm_kq, 1, 1}, {subgroup_size_16, rm_kq}, 1, true); | 
			
		
	
		
		
			
				
					
					|  |  |  |     ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_id_f32[GGML_TYPE_IQ3_S],   "mul_mat_vec_id_iq3_s_f32",   mul_mat_vec_id_iq3_s_f32_len,   mul_mat_vec_id_iq3_s_f32_data,   "main", 4, sizeof(vk_mat_vec_id_push_constants), {rm_kq, 1, 1}, {subgroup_size_16, rm_kq}, 1, true); |  |  |  |     ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_id_f32[GGML_TYPE_IQ3_S],   "mul_mat_vec_id_iq3_s_f32",   mul_mat_vec_id_iq3_s_f32_len,   mul_mat_vec_id_iq3_s_f32_data,   "main", 4, sizeof(vk_mat_vec_id_push_constants), {rm_kq, 1, 1}, {subgroup_size_16, rm_kq}, 1, true); | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_id_f32[GGML_TYPE_IQ4_XS],  "mul_mat_vec_id_iq4_xs_f32",  mul_mat_vec_id_iq4_xs_f32_len,  mul_mat_vec_id_iq4_xs_f32_data,  "main", 4, sizeof(vk_mat_vec_id_push_constants), {rm_kq, 1, 1}, {subgroup_size_16, rm_kq}, 1, true); | 
			
		
	
		
		
			
				
					
					|  |  |  |     ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_id_f32[GGML_TYPE_IQ4_NL],  "mul_mat_vec_id_iq4_nl_f32",  mul_mat_vec_id_iq4_nl_f32_len,  mul_mat_vec_id_iq4_nl_f32_data,  "main", 4, sizeof(vk_mat_vec_id_push_constants), {2*rm_stdq, 1, 1}, {subgroup_size_16, 2*rm_stdq}, 1, true); |  |  |  |     ggml_vk_create_pipeline(device, device->pipeline_dequant_mul_mat_vec_id_f32[GGML_TYPE_IQ4_NL],  "mul_mat_vec_id_iq4_nl_f32",  mul_mat_vec_id_iq4_nl_f32_len,  mul_mat_vec_id_iq4_nl_f32_data,  "main", 4, sizeof(vk_mat_vec_id_push_constants), {2*rm_stdq, 1, 1}, {subgroup_size_16, 2*rm_stdq}, 1, true); | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |     // dequant shaders |  |  |  |     // dequant shaders | 
			
		
	
	
		
		
			
				
					
					|  |  | @@ -2020,6 +2034,7 @@ static void ggml_vk_load_shaders(vk_device& device) { | 
			
		
	
		
		
			
				
					
					|  |  |  |     ggml_vk_create_pipeline(device, device->pipeline_dequant[GGML_TYPE_IQ2_S],   "dequant_iq2_s",   dequant_iq2_s_len,   dequant_iq2_s_data,   "main", 2, 5 * sizeof(uint32_t), {256 * 32, 1, 1}, {}, 1); |  |  |  |     ggml_vk_create_pipeline(device, device->pipeline_dequant[GGML_TYPE_IQ2_S],   "dequant_iq2_s",   dequant_iq2_s_len,   dequant_iq2_s_data,   "main", 2, 5 * sizeof(uint32_t), {256 * 32, 1, 1}, {}, 1); | 
			
		
	
		
		
			
				
					
					|  |  |  |     ggml_vk_create_pipeline(device, device->pipeline_dequant[GGML_TYPE_IQ3_XXS], "dequant_iq3_xxs", dequant_iq3_xxs_len, dequant_iq3_xxs_data, "main", 2, 5 * sizeof(uint32_t), {256 * 32, 1, 1}, {}, 1); |  |  |  |     ggml_vk_create_pipeline(device, device->pipeline_dequant[GGML_TYPE_IQ3_XXS], "dequant_iq3_xxs", dequant_iq3_xxs_len, dequant_iq3_xxs_data, "main", 2, 5 * sizeof(uint32_t), {256 * 32, 1, 1}, {}, 1); | 
			
		
	
		
		
			
				
					
					|  |  |  |     ggml_vk_create_pipeline(device, device->pipeline_dequant[GGML_TYPE_IQ3_S],   "dequant_iq3_s",   dequant_iq3_s_len,   dequant_iq3_s_data,   "main", 2, 5 * sizeof(uint32_t), {256 * 32, 1, 1}, {}, 1); |  |  |  |     ggml_vk_create_pipeline(device, device->pipeline_dequant[GGML_TYPE_IQ3_S],   "dequant_iq3_s",   dequant_iq3_s_len,   dequant_iq3_s_data,   "main", 2, 5 * sizeof(uint32_t), {256 * 32, 1, 1}, {}, 1); | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     ggml_vk_create_pipeline(device, device->pipeline_dequant[GGML_TYPE_IQ4_XS],  "dequant_iq4_xs",  dequant_iq4_xs_len,  dequant_iq4_xs_data,  "main", 2, 5 * sizeof(uint32_t), {256 * 32, 1, 1}, {}, 1); | 
			
		
	
		
		
			
				
					
					|  |  |  |     ggml_vk_create_pipeline(device, device->pipeline_dequant[GGML_TYPE_IQ4_NL],  "dequant_iq4_nl",  dequant_iq4_nl_len,  dequant_iq4_nl_data,  "main", 2, 5 * sizeof(uint32_t), {256 * 16, 1, 1}, {}, 1); |  |  |  |     ggml_vk_create_pipeline(device, device->pipeline_dequant[GGML_TYPE_IQ4_NL],  "dequant_iq4_nl",  dequant_iq4_nl_len,  dequant_iq4_nl_data,  "main", 2, 5 * sizeof(uint32_t), {256 * 16, 1, 1}, {}, 1); | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |     // get_rows |  |  |  |     // get_rows | 
			
		
	
	
		
		
			
				
					
					|  |  | @@ -2035,6 +2050,7 @@ static void ggml_vk_load_shaders(vk_device& device) { | 
			
		
	
		
		
			
				
					
					|  |  |  |     ggml_vk_create_pipeline(device, device->pipeline_get_rows[GGML_TYPE_IQ2_S],   "get_rows_iq2_s",   get_rows_iq2_s_len,   get_rows_iq2_s_data,   "main", 3, sizeof(vk_op_binary_push_constants), {1024, 1, 1}, {}, 1); |  |  |  |     ggml_vk_create_pipeline(device, device->pipeline_get_rows[GGML_TYPE_IQ2_S],   "get_rows_iq2_s",   get_rows_iq2_s_len,   get_rows_iq2_s_data,   "main", 3, sizeof(vk_op_binary_push_constants), {1024, 1, 1}, {}, 1); | 
			
		
	
		
		
			
				
					
					|  |  |  |     ggml_vk_create_pipeline(device, device->pipeline_get_rows[GGML_TYPE_IQ3_XXS], "get_rows_iq3_xxs", get_rows_iq3_xxs_len, get_rows_iq3_xxs_data, "main", 3, sizeof(vk_op_binary_push_constants), {1024, 1, 1}, {}, 1); |  |  |  |     ggml_vk_create_pipeline(device, device->pipeline_get_rows[GGML_TYPE_IQ3_XXS], "get_rows_iq3_xxs", get_rows_iq3_xxs_len, get_rows_iq3_xxs_data, "main", 3, sizeof(vk_op_binary_push_constants), {1024, 1, 1}, {}, 1); | 
			
		
	
		
		
			
				
					
					|  |  |  |     ggml_vk_create_pipeline(device, device->pipeline_get_rows[GGML_TYPE_IQ3_S],   "get_rows_iq3_s",   get_rows_iq3_s_len,   get_rows_iq3_s_data,   "main", 3, sizeof(vk_op_binary_push_constants), {1024, 1, 1}, {}, 1); |  |  |  |     ggml_vk_create_pipeline(device, device->pipeline_get_rows[GGML_TYPE_IQ3_S],   "get_rows_iq3_s",   get_rows_iq3_s_len,   get_rows_iq3_s_data,   "main", 3, sizeof(vk_op_binary_push_constants), {1024, 1, 1}, {}, 1); | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     ggml_vk_create_pipeline(device, device->pipeline_get_rows[GGML_TYPE_IQ4_XS],  "get_rows_iq4_xs",  get_rows_iq4_xs_len,  get_rows_iq4_xs_data,  "main", 3, sizeof(vk_op_binary_push_constants), {1024, 1, 1}, {}, 1); | 
			
		
	
		
		
			
				
					
					|  |  |  |     ggml_vk_create_pipeline(device, device->pipeline_get_rows[GGML_TYPE_IQ4_NL],  "get_rows_iq4_nl",  get_rows_iq4_nl_len,  get_rows_iq4_nl_data,  "main", 3, sizeof(vk_op_binary_push_constants), {1024, 1, 1}, {}, 1); |  |  |  |     ggml_vk_create_pipeline(device, device->pipeline_get_rows[GGML_TYPE_IQ4_NL],  "get_rows_iq4_nl",  get_rows_iq4_nl_len,  get_rows_iq4_nl_data,  "main", 3, sizeof(vk_op_binary_push_constants), {1024, 1, 1}, {}, 1); | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |     ggml_vk_create_pipeline(device, device->pipeline_get_rows_f32[GGML_TYPE_F32 ], "get_rows_f32_f32",  get_rows_f32_f32_len,  get_rows_f32_f32_data,  "main", 3, sizeof(vk_op_binary_push_constants), { 512, 1, 1}, {}, 1); |  |  |  |     ggml_vk_create_pipeline(device, device->pipeline_get_rows_f32[GGML_TYPE_F32 ], "get_rows_f32_f32",  get_rows_f32_f32_len,  get_rows_f32_f32_data,  "main", 3, sizeof(vk_op_binary_push_constants), { 512, 1, 1}, {}, 1); | 
			
		
	
	
		
		
			
				
					
					|  |  | @@ -2049,6 +2065,7 @@ static void ggml_vk_load_shaders(vk_device& device) { | 
			
		
	
		
		
			
				
					
					|  |  |  |     ggml_vk_create_pipeline(device, device->pipeline_get_rows_f32[GGML_TYPE_IQ2_S],   "get_rows_iq2_s_f32",   get_rows_iq2_s_f32_len,   get_rows_iq2_s_f32_data,   "main", 3, sizeof(vk_op_binary_push_constants), {1024, 1, 1}, {}, 1); |  |  |  |     ggml_vk_create_pipeline(device, device->pipeline_get_rows_f32[GGML_TYPE_IQ2_S],   "get_rows_iq2_s_f32",   get_rows_iq2_s_f32_len,   get_rows_iq2_s_f32_data,   "main", 3, sizeof(vk_op_binary_push_constants), {1024, 1, 1}, {}, 1); | 
			
		
	
		
		
			
				
					
					|  |  |  |     ggml_vk_create_pipeline(device, device->pipeline_get_rows_f32[GGML_TYPE_IQ3_XXS], "get_rows_iq3_xxs_f32", get_rows_iq3_xxs_f32_len, get_rows_iq3_xxs_f32_data, "main", 3, sizeof(vk_op_binary_push_constants), {1024, 1, 1}, {}, 1); |  |  |  |     ggml_vk_create_pipeline(device, device->pipeline_get_rows_f32[GGML_TYPE_IQ3_XXS], "get_rows_iq3_xxs_f32", get_rows_iq3_xxs_f32_len, get_rows_iq3_xxs_f32_data, "main", 3, sizeof(vk_op_binary_push_constants), {1024, 1, 1}, {}, 1); | 
			
		
	
		
		
			
				
					
					|  |  |  |     ggml_vk_create_pipeline(device, device->pipeline_get_rows_f32[GGML_TYPE_IQ3_S],   "get_rows_iq3_s_f32",   get_rows_iq3_s_f32_len,   get_rows_iq3_s_f32_data,   "main", 3, sizeof(vk_op_binary_push_constants), {1024, 1, 1}, {}, 1); |  |  |  |     ggml_vk_create_pipeline(device, device->pipeline_get_rows_f32[GGML_TYPE_IQ3_S],   "get_rows_iq3_s_f32",   get_rows_iq3_s_f32_len,   get_rows_iq3_s_f32_data,   "main", 3, sizeof(vk_op_binary_push_constants), {1024, 1, 1}, {}, 1); | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |     ggml_vk_create_pipeline(device, device->pipeline_get_rows_f32[GGML_TYPE_IQ4_XS],  "get_rows_iq4_xs_f32",  get_rows_iq4_xs_f32_len,  get_rows_iq4_xs_f32_data,  "main", 3, sizeof(vk_op_binary_push_constants), {1024, 1, 1}, {}, 1); | 
			
		
	
		
		
			
				
					
					|  |  |  |     ggml_vk_create_pipeline(device, device->pipeline_get_rows_f32[GGML_TYPE_IQ4_NL],  "get_rows_iq4_nl_f32",  get_rows_iq4_nl_f32_len,  get_rows_iq4_nl_f32_data,  "main", 3, sizeof(vk_op_binary_push_constants), {1024, 1, 1}, {}, 1); |  |  |  |     ggml_vk_create_pipeline(device, device->pipeline_get_rows_f32[GGML_TYPE_IQ4_NL],  "get_rows_iq4_nl_f32",  get_rows_iq4_nl_f32_len,  get_rows_iq4_nl_f32_data,  "main", 3, sizeof(vk_op_binary_push_constants), {1024, 1, 1}, {}, 1); | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |  | 
			
		
	
		
		
			
				
					
					|  |  |  |     ggml_vk_create_pipeline(device, device->pipeline_matmul_split_k_reduce, "split_k_reduce", split_k_reduce_len, split_k_reduce_data, "main", 2, 2 * sizeof(uint32_t), {256 * 4, 1, 1}, {}, 1); |  |  |  |     ggml_vk_create_pipeline(device, device->pipeline_matmul_split_k_reduce, "split_k_reduce", split_k_reduce_len, split_k_reduce_data, "main", 2, 2 * sizeof(uint32_t), {256 * 4, 1, 1}, {}, 1); | 
			
		
	
	
		
		
			
				
					
					|  |  | @@ -2995,6 +3012,7 @@ static vk_pipeline ggml_vk_get_to_fp16(ggml_backend_vk_context * ctx, ggml_type | 
			
		
	
		
		
			
				
					
					|  |  |  |         case GGML_TYPE_IQ2_S: |  |  |  |         case GGML_TYPE_IQ2_S: | 
			
		
	
		
		
			
				
					
					|  |  |  |         case GGML_TYPE_IQ3_XXS: |  |  |  |         case GGML_TYPE_IQ3_XXS: | 
			
		
	
		
		
			
				
					
					|  |  |  |         case GGML_TYPE_IQ3_S: |  |  |  |         case GGML_TYPE_IQ3_S: | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         case GGML_TYPE_IQ4_XS: | 
			
		
	
		
		
			
				
					
					|  |  |  |         case GGML_TYPE_IQ4_NL: |  |  |  |         case GGML_TYPE_IQ4_NL: | 
			
		
	
		
		
			
				
					
					|  |  |  |             break; |  |  |  |             break; | 
			
		
	
		
		
			
				
					
					|  |  |  |         default: |  |  |  |         default: | 
			
		
	
	
		
		
			
				
					
					|  |  | @@ -3048,6 +3066,7 @@ static vk_matmul_pipeline ggml_vk_get_mul_mat_mat_pipeline(ggml_backend_vk_conte | 
			
		
	
		
		
			
				
					
					|  |  |  |         case GGML_TYPE_IQ2_S: |  |  |  |         case GGML_TYPE_IQ2_S: | 
			
		
	
		
		
			
				
					
					|  |  |  |         case GGML_TYPE_IQ3_XXS: |  |  |  |         case GGML_TYPE_IQ3_XXS: | 
			
		
	
		
		
			
				
					
					|  |  |  |         case GGML_TYPE_IQ3_S: |  |  |  |         case GGML_TYPE_IQ3_S: | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         case GGML_TYPE_IQ4_XS: | 
			
		
	
		
		
			
				
					
					|  |  |  |         case GGML_TYPE_IQ4_NL: |  |  |  |         case GGML_TYPE_IQ4_NL: | 
			
		
	
		
		
			
				
					
					|  |  |  |             break; |  |  |  |             break; | 
			
		
	
		
		
			
				
					
					|  |  |  |         default: |  |  |  |         default: | 
			
		
	
	
		
		
			
				
					
					|  |  | @@ -3084,6 +3103,7 @@ static vk_pipeline ggml_vk_get_dequantize_mul_mat_vec(ggml_backend_vk_context * | 
			
		
	
		
		
			
				
					
					|  |  |  |         case GGML_TYPE_IQ2_S: |  |  |  |         case GGML_TYPE_IQ2_S: | 
			
		
	
		
		
			
				
					
					|  |  |  |         case GGML_TYPE_IQ3_XXS: |  |  |  |         case GGML_TYPE_IQ3_XXS: | 
			
		
	
		
		
			
				
					
					|  |  |  |         case GGML_TYPE_IQ3_S: |  |  |  |         case GGML_TYPE_IQ3_S: | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         case GGML_TYPE_IQ4_XS: | 
			
		
	
		
		
			
				
					
					|  |  |  |         case GGML_TYPE_IQ4_NL: |  |  |  |         case GGML_TYPE_IQ4_NL: | 
			
		
	
		
		
			
				
					
					|  |  |  |             break; |  |  |  |             break; | 
			
		
	
		
		
			
				
					
					|  |  |  |         default: |  |  |  |         default: | 
			
		
	
	
		
		
			
				
					
					|  |  | @@ -3132,6 +3152,7 @@ static vk_matmul_pipeline ggml_vk_get_mul_mat_mat_id_pipeline(ggml_backend_vk_co | 
			
		
	
		
		
			
				
					
					|  |  |  |         case GGML_TYPE_IQ2_S: |  |  |  |         case GGML_TYPE_IQ2_S: | 
			
		
	
		
		
			
				
					
					|  |  |  |         case GGML_TYPE_IQ3_XXS: |  |  |  |         case GGML_TYPE_IQ3_XXS: | 
			
		
	
		
		
			
				
					
					|  |  |  |         case GGML_TYPE_IQ3_S: |  |  |  |         case GGML_TYPE_IQ3_S: | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         case GGML_TYPE_IQ4_XS: | 
			
		
	
		
		
			
				
					
					|  |  |  |         case GGML_TYPE_IQ4_NL: |  |  |  |         case GGML_TYPE_IQ4_NL: | 
			
		
	
		
		
			
				
					
					|  |  |  |             break; |  |  |  |             break; | 
			
		
	
		
		
			
				
					
					|  |  |  |         default: |  |  |  |         default: | 
			
		
	
	
		
		
			
				
					
					|  |  | @@ -3163,6 +3184,7 @@ static vk_pipeline ggml_vk_get_dequantize_mul_mat_vec_id(ggml_backend_vk_context | 
			
		
	
		
		
			
				
					
					|  |  |  |         case GGML_TYPE_IQ2_S: |  |  |  |         case GGML_TYPE_IQ2_S: | 
			
		
	
		
		
			
				
					
					|  |  |  |         case GGML_TYPE_IQ3_XXS: |  |  |  |         case GGML_TYPE_IQ3_XXS: | 
			
		
	
		
		
			
				
					
					|  |  |  |         case GGML_TYPE_IQ3_S: |  |  |  |         case GGML_TYPE_IQ3_S: | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |         case GGML_TYPE_IQ4_XS: | 
			
		
	
		
		
			
				
					
					|  |  |  |         case GGML_TYPE_IQ4_NL: |  |  |  |         case GGML_TYPE_IQ4_NL: | 
			
		
	
		
		
			
				
					
					|  |  |  |             break; |  |  |  |             break; | 
			
		
	
		
		
			
				
					
					|  |  |  |         default: |  |  |  |         default: | 
			
		
	
	
		
		
			
				
					
					|  |  | @@ -8037,6 +8059,7 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm | 
			
		
	
		
		
			
				
					
					|  |  |  |                     case GGML_TYPE_IQ2_S: |  |  |  |                     case GGML_TYPE_IQ2_S: | 
			
		
	
		
		
			
				
					
					|  |  |  |                     case GGML_TYPE_IQ3_XXS: |  |  |  |                     case GGML_TYPE_IQ3_XXS: | 
			
		
	
		
		
			
				
					
					|  |  |  |                     case GGML_TYPE_IQ3_S: |  |  |  |                     case GGML_TYPE_IQ3_S: | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |                     case GGML_TYPE_IQ4_XS: | 
			
		
	
		
		
			
				
					
					|  |  |  |                     case GGML_TYPE_IQ4_NL: |  |  |  |                     case GGML_TYPE_IQ4_NL: | 
			
		
	
		
		
			
				
					
					|  |  |  |                         break; |  |  |  |                         break; | 
			
		
	
		
		
			
				
					
					|  |  |  |                     default: |  |  |  |                     default: | 
			
		
	
	
		
		
			
				
					
					|  |  | @@ -8110,6 +8133,7 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm | 
			
		
	
		
		
			
				
					
					|  |  |  |                 //case GGML_TYPE_IQ2_S: |  |  |  |                 //case GGML_TYPE_IQ2_S: | 
			
		
	
		
		
			
				
					
					|  |  |  |                 //case GGML_TYPE_IQ3_XXS: |  |  |  |                 //case GGML_TYPE_IQ3_XXS: | 
			
		
	
		
		
			
				
					
					|  |  |  |                 //case GGML_TYPE_IQ3_S: |  |  |  |                 //case GGML_TYPE_IQ3_S: | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |                 //case GGML_TYPE_IQ4_XS: | 
			
		
	
		
		
			
				
					
					|  |  |  |                 case GGML_TYPE_IQ4_NL: |  |  |  |                 case GGML_TYPE_IQ4_NL: | 
			
		
	
		
		
			
				
					
					|  |  |  |                     break; |  |  |  |                     break; | 
			
		
	
		
		
			
				
					
					|  |  |  |                 default: |  |  |  |                 default: | 
			
		
	
	
		
		
			
				
					
					|  |  | @@ -8132,6 +8156,7 @@ static bool ggml_backend_vk_device_supports_op(ggml_backend_dev_t dev, const ggm | 
			
		
	
		
		
			
				
					
					|  |  |  |                     case GGML_TYPE_IQ2_S: |  |  |  |                     case GGML_TYPE_IQ2_S: | 
			
		
	
		
		
			
				
					
					|  |  |  |                     case GGML_TYPE_IQ3_XXS: |  |  |  |                     case GGML_TYPE_IQ3_XXS: | 
			
		
	
		
		
			
				
					
					|  |  |  |                     case GGML_TYPE_IQ3_S: |  |  |  |                     case GGML_TYPE_IQ3_S: | 
			
		
	
		
		
			
				
					
					|  |  |  |  |  |  |  |                     case GGML_TYPE_IQ4_XS: | 
			
		
	
		
		
			
				
					
					|  |  |  |                     case GGML_TYPE_IQ4_NL: |  |  |  |                     case GGML_TYPE_IQ4_NL: | 
			
		
	
		
		
			
				
					
					|  |  |  |                         return true; |  |  |  |                         return true; | 
			
		
	
		
		
			
				
					
					|  |  |  |                     default: |  |  |  |                     default: | 
			
		
	
	
		
		
			
				
					
					|  |  |   |