mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	cuda : re-add q4_0
This commit is contained in:
		| @@ -29,9 +29,9 @@ void ggml_cuda_op_mul_mat_q( | ||||
|     const mmq_args args = {src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stride00, src1_padded_row_size, src1_ncols, ne11, nrows_dst}; | ||||
|  | ||||
|     switch (src0->type) { | ||||
| //        case GGML_TYPE_Q4_0: | ||||
| //            mul_mat_q_case<GGML_TYPE_Q4_0>(ctx, args, stream); | ||||
| //            break; | ||||
|         case GGML_TYPE_Q4_0: | ||||
|             mul_mat_q_case<GGML_TYPE_Q4_0>(ctx, args, stream); | ||||
|             break; | ||||
| //        case GGML_TYPE_Q4_1: | ||||
| //            mul_mat_q_case<GGML_TYPE_Q4_1>(ctx, args, stream); | ||||
| //            break; | ||||
|   | ||||
| @@ -0,0 +1,5 @@ | ||||
| // This file has been autogenerated by generate_cu_files.py, do not edit manually. | ||||
|  | ||||
| #include "../mmq.cuh" | ||||
|  | ||||
| DECL_MMQ_CASE(GGML_TYPE_Q4_0); | ||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov