mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	Fix f16_sycl cpy call from Arc (#5411)
* fix f16_sycl cpy call * rm old logic * add fp16 build CI * use macro * format fix
This commit is contained in:
		 Abhilash Majumder
					Abhilash Majumder
				
			
				
					committed by
					
						 GitHub
						GitHub
					
				
			
			
				
	
			
			
			 GitHub
						GitHub
					
				
			
						parent
						
							ff4ff05c5f
						
					
				
				
					commit
					6e99f2a04f
				
			
							
								
								
									
										41
									
								
								.github/workflows/build.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										41
									
								
								.github/workflows/build.yml
									
									
									
									
										vendored
									
									
								
							| @@ -184,6 +184,47 @@ jobs: | |||||||
|           cmake -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx .. |           cmake -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx .. | ||||||
|           cmake --build . --config Release -j $(nproc) |           cmake --build . --config Release -j $(nproc) | ||||||
|  |  | ||||||
|  |   ubuntu-22-cmake-sycl-fp16: | ||||||
|  |     runs-on: ubuntu-22.04 | ||||||
|  |  | ||||||
|  |     continue-on-error: true | ||||||
|  |  | ||||||
|  |     steps: | ||||||
|  |       - uses: actions/checkout@v2 | ||||||
|  |  | ||||||
|  |       - name: add oneAPI to apt | ||||||
|  |         shell: bash | ||||||
|  |         run: | | ||||||
|  |           cd /tmp | ||||||
|  |           wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | ||||||
|  |           sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | ||||||
|  |           rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | ||||||
|  |           sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main" | ||||||
|  |  | ||||||
|  |       - name: install oneAPI dpcpp compiler | ||||||
|  |         shell: bash | ||||||
|  |         run: | | ||||||
|  |           sudo apt update | ||||||
|  |           sudo apt install intel-oneapi-compiler-dpcpp-cpp | ||||||
|  |  | ||||||
|  |       - name: install oneAPI MKL library | ||||||
|  |         shell: bash | ||||||
|  |         run: | | ||||||
|  |           sudo apt install intel-oneapi-mkl-devel | ||||||
|  |  | ||||||
|  |       - name: Clone | ||||||
|  |         id: checkout | ||||||
|  |         uses: actions/checkout@v3 | ||||||
|  |  | ||||||
|  |       - name: Build | ||||||
|  |         id: cmake_build | ||||||
|  |         run: | | ||||||
|  |           source /opt/intel/oneapi/setvars.sh | ||||||
|  |           mkdir build | ||||||
|  |           cd build | ||||||
|  |           cmake -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON .. | ||||||
|  |           cmake --build . --config Release -j $(nproc) | ||||||
|  |  | ||||||
|   # TODO: build with LLAMA_NO_METAL because test-backend-ops fail on "Apple Paravirtual device" and I don't know |   # TODO: build with LLAMA_NO_METAL because test-backend-ops fail on "Apple Paravirtual device" and I don't know | ||||||
|   #       how to debug it. |   #       how to debug it. | ||||||
|   #       ref: https://github.com/ggerganov/llama.cpp/actions/runs/7131777249/job/19420981052#step:5:1124 |   #       ref: https://github.com/ggerganov/llama.cpp/actions/runs/7131777249/job/19420981052#step:5:1124 | ||||||
|   | |||||||
| @@ -12148,7 +12148,8 @@ inline void ggml_sycl_op_dequantize_mul_mat_vec( | |||||||
|     const int64_t src1_ncols, const int64_t src1_padded_row_size, |     const int64_t src1_ncols, const int64_t src1_padded_row_size, | ||||||
|     const dpct::queue_ptr &stream) { |     const dpct::queue_ptr &stream) { | ||||||
|  |  | ||||||
|     const int64_t ne00 = src0->ne[0]; |     GGML_TENSOR_BINARY_OP_LOCALS | ||||||
|  |  | ||||||
|     const int64_t row_diff = row_high - row_low; |     const int64_t row_diff = row_high - row_low; | ||||||
|  |  | ||||||
|     // on some GPUs it is faster to convert src1 to half and to use half precision intrinsics |     // on some GPUs it is faster to convert src1 to half and to use half precision intrinsics | ||||||
| @@ -12167,8 +12168,9 @@ inline void ggml_sycl_op_dequantize_mul_mat_vec( | |||||||
|         } else { |         } else { | ||||||
|             src1_dfloat = src1_dfloat_a.alloc(ne00); |             src1_dfloat = src1_dfloat_a.alloc(ne00); | ||||||
|             ggml_cpy_f32_f16_sycl((const char *)src1_ddf_i, (char *)src1_dfloat, |             ggml_cpy_f32_f16_sycl((const char *)src1_ddf_i, (char *)src1_dfloat, | ||||||
|                                   ne00, ne00, 1, sizeof(float), 0, 0, ne00, 1, |                                   ne00, ne00, ne01, ne02, nb00, nb01, nb02, | ||||||
|                                   sizeof(sycl::half), 0, 0, stream); |                                   nb03, ne10, ne11, ne12, nb10, nb11, nb12, | ||||||
|  |                                   nb13, stream); | ||||||
|         } |         } | ||||||
|     } |     } | ||||||
| #else | #else | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user