mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-04 09:32:00 +00:00 
			
		
		
		
	server : use 4 slots + unified KV by default
This commit is contained in:
		@@ -4432,6 +4432,13 @@ int main(int argc, char ** argv) {
 | 
				
			|||||||
        return 1;
 | 
					        return 1;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    if (params.n_parallel == 1 && params.kv_unified == false) {
 | 
				
			||||||
 | 
					        LOG_WRN("%s: setting n_parallel = 4 and kv_unified = true\n", __func__);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        params.n_parallel = 4;
 | 
				
			||||||
 | 
					        params.kv_unified = true;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    common_init();
 | 
					    common_init();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // struct that contains llama context and inference
 | 
					    // struct that contains llama context and inference
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user