mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-04 09:32:00 +00:00 
			
		
		
		
	Windows nvcc workaround (#1753)
Fix gibberish output on Windows when using CUDA
This commit is contained in:
		@@ -1512,6 +1512,14 @@ static void ggml_cuda_op(const ggml_tensor * src0, const ggml_tensor * src1, ggm
 | 
				
			|||||||
                        i01_high = row_high % ne01;
 | 
					                        i01_high = row_high % ne01;
 | 
				
			||||||
                    }
 | 
					                    }
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                // There is possibly a bug in the Windows nvcc compiler regarding instruction reordering or optimizing out local variables.
 | 
				
			||||||
 | 
					                // Removing the first assert or changing the order of the arguments causes the second assert to fail.
 | 
				
			||||||
 | 
					                // Removing both asserts results in i01_high becoming 0 which in turn results in garbage output.
 | 
				
			||||||
 | 
					                // The root cause seems to be a problem with i0_offset_high becoming 0 when it should always be >0 (for single GPU).
 | 
				
			||||||
 | 
					                GGML_ASSERT(i01_low == 0 || g_device_count > 1);
 | 
				
			||||||
 | 
					                GGML_ASSERT(i01_high == ne01 || g_device_count > 1);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
                const int64_t i01_diff = i01_high - i01_low;
 | 
					                const int64_t i01_diff = i01_high - i01_low;
 | 
				
			||||||
                if (i01_diff == 0) {
 | 
					                if (i01_diff == 0) {
 | 
				
			||||||
                    continue;
 | 
					                    continue;
 | 
				
			||||||
@@ -1727,6 +1735,7 @@ void ggml_cuda_load_data(const char * fname, struct ggml_tensor * tensor, const
 | 
				
			|||||||
            row_low -= row_low % GGML_CUDA_DMMV_Y;
 | 
					            row_low -= row_low % GGML_CUDA_DMMV_Y;
 | 
				
			||||||
            row_high = id == g_device_count - 1 ? nrows : nrows*g_tensor_split[id + 1];
 | 
					            row_high = id == g_device_count - 1 ? nrows : nrows*g_tensor_split[id + 1];
 | 
				
			||||||
            row_high -= row_high % GGML_CUDA_DMMV_Y;
 | 
					            row_high -= row_high % GGML_CUDA_DMMV_Y;
 | 
				
			||||||
 | 
					            GGML_ASSERT(nrows % GGML_CUDA_DMMV_Y == 0);
 | 
				
			||||||
        } else {
 | 
					        } else {
 | 
				
			||||||
            GGML_ASSERT(false);
 | 
					            GGML_ASSERT(false);
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user