mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-10-28 08:31:25 +00:00 
			
		
		
		
	llama : minor fixes for up llama load model speed (#11448)
* impl::load change map bpe_ranks to onordered map for reduce time of impl::load on 30% * llama_model_loader::init_mapping - replace new llama_mmap to std::make_unique<llama_mmap> for clean code & reduce (/2) time of running init_mappings * Update src/llama-vocab.cpp --------- Co-authored-by: lexasub <empty@empty.ru> Co-authored-by: Diego Devesa <slarengh@gmail.com>
This commit is contained in:
		| @@ -819,7 +819,7 @@ void llama_model_loader::init_mappings(bool prefetch, llama_mlocks * mlock_mmaps | |||||||
|         for (const auto & file : files) { |         for (const auto & file : files) { | ||||||
|             auto * reg = ggml_backend_dev_backend_reg(ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU)); |             auto * reg = ggml_backend_dev_backend_reg(ggml_backend_dev_by_type(GGML_BACKEND_DEVICE_TYPE_CPU)); | ||||||
|             auto * is_numa_fn = (decltype(ggml_is_numa) *) ggml_backend_reg_get_proc_address(reg, "ggml_backend_cpu_is_numa"); |             auto * is_numa_fn = (decltype(ggml_is_numa) *) ggml_backend_reg_get_proc_address(reg, "ggml_backend_cpu_is_numa"); | ||||||
|             std::unique_ptr<llama_mmap> mapping(new llama_mmap(file.get(), prefetch ? -1 : 0, is_numa_fn())); |             std::unique_ptr<llama_mmap> mapping = std::make_unique<llama_mmap>(file.get(), prefetch ? -1 : 0, is_numa_fn()); | ||||||
|             mmaps_used.emplace_back(mapping->size(), 0); |             mmaps_used.emplace_back(mapping->size(), 0); | ||||||
|             if (mlock_mmaps) { |             if (mlock_mmaps) { | ||||||
|                 std::unique_ptr<llama_mlock> mlock_mmap(new llama_mlock()); |                 std::unique_ptr<llama_mlock> mlock_mmap(new llama_mlock()); | ||||||
|   | |||||||
| @@ -1245,8 +1245,13 @@ struct llama_vocab::impl { | |||||||
|  |  | ||||||
|     std::vector<llama_token> cache_special_tokens; |     std::vector<llama_token> cache_special_tokens; | ||||||
|     std::vector<std::string> cache_token_to_piece; // llama_token_to_piece(special = true); |     std::vector<std::string> cache_token_to_piece; // llama_token_to_piece(special = true); | ||||||
|  |     struct pair_hash { | ||||||
|     std::map<std::pair<std::string, std::string>, int> bpe_ranks; |         size_t operator()(const std::pair<std::string, std::string> & p) const { | ||||||
|  |             return std::hash<std::string>{}(p.first) ^  //create some hash for pair | ||||||
|  |                    (std::hash<std::string>{}(p.second) << 1); | ||||||
|  |         } | ||||||
|  |     }; | ||||||
|  |     std::unordered_map<std::pair<std::string, std::string>, int, pair_hash> bpe_ranks; | ||||||
|  |  | ||||||
|     // set of all tokens that cause "end of generation" |     // set of all tokens that cause "end of generation" | ||||||
|     std::set<llama_token> special_eog_ids; |     std::set<llama_token> special_eog_ids; | ||||||
|   | |||||||
		Reference in New Issue
	
	Block a user
	 lexasub
					lexasub