mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-30 08:42:00 +00:00 
			
		
		
		
	cuda : re-add q4_0
This commit is contained in:
		| @@ -29,9 +29,9 @@ void ggml_cuda_op_mul_mat_q( | |||||||
|     const mmq_args args = {src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stride00, src1_padded_row_size, src1_ncols, ne11, nrows_dst}; |     const mmq_args args = {src0_dd_i, src1_ddq_i, dst_dd_i, ne00, row_diff, stride00, src1_padded_row_size, src1_ncols, ne11, nrows_dst}; | ||||||
|  |  | ||||||
|     switch (src0->type) { |     switch (src0->type) { | ||||||
| //        case GGML_TYPE_Q4_0: |         case GGML_TYPE_Q4_0: | ||||||
| //            mul_mat_q_case<GGML_TYPE_Q4_0>(ctx, args, stream); |             mul_mat_q_case<GGML_TYPE_Q4_0>(ctx, args, stream); | ||||||
| //            break; |             break; | ||||||
| //        case GGML_TYPE_Q4_1: | //        case GGML_TYPE_Q4_1: | ||||||
| //            mul_mat_q_case<GGML_TYPE_Q4_1>(ctx, args, stream); | //            mul_mat_q_case<GGML_TYPE_Q4_1>(ctx, args, stream); | ||||||
| //            break; | //            break; | ||||||
|   | |||||||
| @@ -0,0 +1,5 @@ | |||||||
|  | // This file has been autogenerated by generate_cu_files.py, do not edit manually. | ||||||
|  |  | ||||||
|  | #include "../mmq.cuh" | ||||||
|  |  | ||||||
|  | DECL_MMQ_CASE(GGML_TYPE_Q4_0); | ||||||
		Reference in New Issue
	
	Block a user
	 Georgi Gerganov
					Georgi Gerganov