mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-31 08:51:55 +00:00 
			
		
		
		
	Fix f16_sycl cpy call from Arc (#5411)
* fix f16_sycl cpy call * rm old logic * add fp16 build CI * use macro * format fix
This commit is contained in:
		 Abhilash Majumder
					Abhilash Majumder
				
			
				
					committed by
					
						 GitHub
						GitHub
					
				
			
			
				
	
			
			
			 GitHub
						GitHub
					
				
			
						parent
						
							ff4ff05c5f
						
					
				
				
					commit
					6e99f2a04f
				
			
							
								
								
									
										41
									
								
								.github/workflows/build.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										41
									
								
								.github/workflows/build.yml
									
									
									
									
										vendored
									
									
								
							| @@ -184,6 +184,47 @@ jobs: | ||||
|           cmake -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx .. | ||||
|           cmake --build . --config Release -j $(nproc) | ||||
|  | ||||
|   ubuntu-22-cmake-sycl-fp16: | ||||
|     runs-on: ubuntu-22.04 | ||||
|  | ||||
|     continue-on-error: true | ||||
|  | ||||
|     steps: | ||||
|       - uses: actions/checkout@v2 | ||||
|  | ||||
|       - name: add oneAPI to apt | ||||
|         shell: bash | ||||
|         run: | | ||||
|           cd /tmp | ||||
|           wget https://apt.repos.intel.com/intel-gpg-keys/GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | ||||
|           sudo apt-key add GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | ||||
|           rm GPG-PUB-KEY-INTEL-SW-PRODUCTS.PUB | ||||
|           sudo add-apt-repository "deb https://apt.repos.intel.com/oneapi all main" | ||||
|  | ||||
|       - name: install oneAPI dpcpp compiler | ||||
|         shell: bash | ||||
|         run: | | ||||
|           sudo apt update | ||||
|           sudo apt install intel-oneapi-compiler-dpcpp-cpp | ||||
|  | ||||
|       - name: install oneAPI MKL library | ||||
|         shell: bash | ||||
|         run: | | ||||
|           sudo apt install intel-oneapi-mkl-devel | ||||
|  | ||||
|       - name: Clone | ||||
|         id: checkout | ||||
|         uses: actions/checkout@v3 | ||||
|  | ||||
|       - name: Build | ||||
|         id: cmake_build | ||||
|         run: | | ||||
|           source /opt/intel/oneapi/setvars.sh | ||||
|           mkdir build | ||||
|           cd build | ||||
|           cmake -DLLAMA_SYCL=ON -DCMAKE_C_COMPILER=icx -DCMAKE_CXX_COMPILER=icpx -DLLAMA_SYCL_F16=ON .. | ||||
|           cmake --build . --config Release -j $(nproc) | ||||
|  | ||||
|   # TODO: build with LLAMA_NO_METAL because test-backend-ops fail on "Apple Paravirtual device" and I don't know | ||||
|   #       how to debug it. | ||||
|   #       ref: https://github.com/ggerganov/llama.cpp/actions/runs/7131777249/job/19420981052#step:5:1124 | ||||
|   | ||||
| @@ -12148,7 +12148,8 @@ inline void ggml_sycl_op_dequantize_mul_mat_vec( | ||||
|     const int64_t src1_ncols, const int64_t src1_padded_row_size, | ||||
|     const dpct::queue_ptr &stream) { | ||||
|  | ||||
|     const int64_t ne00 = src0->ne[0]; | ||||
|     GGML_TENSOR_BINARY_OP_LOCALS | ||||
|  | ||||
|     const int64_t row_diff = row_high - row_low; | ||||
|  | ||||
|     // on some GPUs it is faster to convert src1 to half and to use half precision intrinsics | ||||
| @@ -12167,8 +12168,9 @@ inline void ggml_sycl_op_dequantize_mul_mat_vec( | ||||
|         } else { | ||||
|             src1_dfloat = src1_dfloat_a.alloc(ne00); | ||||
|             ggml_cpy_f32_f16_sycl((const char *)src1_ddf_i, (char *)src1_dfloat, | ||||
|                                   ne00, ne00, 1, sizeof(float), 0, 0, ne00, 1, | ||||
|                                   sizeof(sycl::half), 0, 0, stream); | ||||
|                                   ne00, ne00, ne01, ne02, nb00, nb01, nb02, | ||||
|                                   nb03, ne10, ne11, ne12, nb10, nb11, nb12, | ||||
|                                   nb13, stream); | ||||
|         } | ||||
|     } | ||||
| #else | ||||
|   | ||||
		Reference in New Issue
	
	Block a user