mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	llama : fix embd when offloading non-repeating layers (#1891)
This commit is contained in:
		@@ -1658,7 +1658,7 @@ static bool llama_eval_internal(
 | 
				
			|||||||
 | 
					
 | 
				
			||||||
        // cur = cur*norm(broadcasted)
 | 
					        // cur = cur*norm(broadcasted)
 | 
				
			||||||
        cur = ggml_mul(ctx0, cur, model.norm);
 | 
					        cur = ggml_mul(ctx0, cur, model.norm);
 | 
				
			||||||
        offload_func_nr(cur);
 | 
					        // offload_func_nr(cur); // TODO CPU + GPU mirrored backend
 | 
				
			||||||
        ggml_set_name(cur, "result_norm");
 | 
					        ggml_set_name(cur, "result_norm");
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        embeddings = cur;
 | 
					        embeddings = cur;
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user