mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-04 09:32:00 +00:00 
			
		
		
		
	Update src/llama-model.cpp
Co-authored-by: Sigbjørn Skjæret <sigbjorn.skjaeret@scala.com>
This commit is contained in:
		@@ -14868,7 +14868,7 @@ struct llm_build_falcon_h1 : public llm_graph_context {
 | 
				
			|||||||
            // TODO: skip computing output earlier for unused tokens
 | 
					            // TODO: skip computing output earlier for unused tokens
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            y = ggml_add(ctx0, y, ggml_mul(ctx0, x, model.layers[il].ssm_d));
 | 
					            y = ggml_add(ctx0, y, ggml_mul(ctx0, x, model.layers[il].ssm_d));
 | 
				
			||||||
            y = ggml_mul(ctx0, y, ggml_silu(ctx0, ggml_cont(ctx0, z)));
 | 
					            y = ggml_swiglu_split(ctx0, ggml_cont(ctx0, z), y);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
            // grouped RMS norm
 | 
					            // grouped RMS norm
 | 
				
			||||||
            if (model.layers[il].ssm_norm) {
 | 
					            if (model.layers[il].ssm_norm) {
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user