mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-04 09:32:00 +00:00 
			
		
		
		
	llama : minor fixes for up llama load model speed (#11448)
* impl::load change map bpe_ranks to onordered map for reduce time of impl::load on 30% * llama_model_loader::init_mapping - replace new llama_mmap to std::make_unique<llama_mmap> for clean code & reduce (/2) time of running init_mappings * Update src/llama-vocab.cpp --------- Co-authored-by: lexasub <empty@empty.ru> Co-authored-by: Diego Devesa <slarengh@gmail.com>
This commit is contained in:
		@@ -1245,8 +1245,13 @@ struct llama_vocab::impl {
 | 
			
		||||
 | 
			
		||||
    std::vector<llama_token> cache_special_tokens;
 | 
			
		||||
    std::vector<std::string> cache_token_to_piece; // llama_token_to_piece(special = true);
 | 
			
		||||
 | 
			
		||||
    std::map<std::pair<std::string, std::string>, int> bpe_ranks;
 | 
			
		||||
    struct pair_hash {
 | 
			
		||||
        size_t operator()(const std::pair<std::string, std::string> & p) const {
 | 
			
		||||
            return std::hash<std::string>{}(p.first) ^  //create some hash for pair
 | 
			
		||||
                   (std::hash<std::string>{}(p.second) << 1);
 | 
			
		||||
        }
 | 
			
		||||
    };
 | 
			
		||||
    std::unordered_map<std::pair<std::string, std::string>, int, pair_hash> bpe_ranks;
 | 
			
		||||
 | 
			
		||||
    // set of all tokens that cause "end of generation"
 | 
			
		||||
    std::set<llama_token> special_eog_ids;
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user