mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-04 09:32:00 +00:00 
			
		
		
		
	fix: Correctly size the shared memory bufer and assert expected size relationships
Branch: GraniteFourPerf Signed-off-by: Gabe Goodhart <ghart@us.ibm.com>
This commit is contained in:
		@@ -3017,7 +3017,15 @@ static bool ggml_metal_encode_node(
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
                if (ne30 == 1) {
 | 
					                if (ne30 == 1) {
 | 
				
			||||||
                    // Mamba-2
 | 
					                    // Mamba-2
 | 
				
			||||||
                    [encoder setThreadgroupMemoryLength:32*sizeof(float) atIndex:0]; // SIMD size
 | 
					
 | 
				
			||||||
 | 
					                    // One shared memory bucket for each simd group in the threadgroup
 | 
				
			||||||
 | 
					                    const int64_t shmem_size = d_state / 32;
 | 
				
			||||||
 | 
					                    GGML_ASSERT(shmem_size * 32 == d_state);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    // One thread pre element in d_state
 | 
				
			||||||
 | 
					                    GGML_ASSERT(d_state <= (int64_t)pipeline.maxTotalThreadsPerThreadgroup);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					                    [encoder setThreadgroupMemoryLength:(shmem_size)*sizeof(float) atIndex:0];
 | 
				
			||||||
                    [encoder dispatchThreadgroups:MTLSizeMake(d_inner, n_head, n_seqs) threadsPerThreadgroup:MTLSizeMake(d_state, 1, 1)];
 | 
					                    [encoder dispatchThreadgroups:MTLSizeMake(d_inner, n_head, n_seqs) threadsPerThreadgroup:MTLSizeMake(d_state, 1, 1)];
 | 
				
			||||||
                } else {
 | 
					                } else {
 | 
				
			||||||
                    GGML_ASSERT(d_inner == 1);
 | 
					                    GGML_ASSERT(d_inner == 1);
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user