mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-04 09:32:00 +00:00 
			
		
		
		
	cmpnct_gpt2bpe.hpp : fixes
This commit is contained in:
		@@ -445,17 +445,13 @@ struct gpt2bpe_vocab {
 | 
				
			|||||||
    std::vector<std::pair<std::string, std::string>> bpe_merges;
 | 
					    std::vector<std::pair<std::string, std::string>> bpe_merges;
 | 
				
			||||||
    std::map<std::string, int> special_tokens;
 | 
					    std::map<std::string, int> special_tokens;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    id special_bos_id = 0;
 | 
					    id special_bos_id = -1;
 | 
				
			||||||
    id special_eos_id = 0;
 | 
					    id special_eos_id = -1;
 | 
				
			||||||
    id special_unk_id = 0;
 | 
					    id special_unk_id = -1;
 | 
				
			||||||
    id special_sep_id = 0;
 | 
					    id special_sep_id = -1;
 | 
				
			||||||
    id special_pad_id = 0;
 | 
					    id special_pad_id = -1;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    bool special_have_bos = false;
 | 
					    id linefeed_id = -1;
 | 
				
			||||||
    bool special_have_eos = false;
 | 
					 | 
				
			||||||
    bool special_have_unk = false;
 | 
					 | 
				
			||||||
    bool special_have_sep = false;
 | 
					 | 
				
			||||||
    bool special_have_pad = false;
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
    std::unordered_map<token, id> token_to_id;
 | 
					    std::unordered_map<token, id> token_to_id;
 | 
				
			||||||
    std::unordered_map<id, token> id_to_token;
 | 
					    std::unordered_map<id, token> id_to_token;
 | 
				
			||||||
@@ -1002,7 +998,7 @@ static std::vector<gpt2bpe_vocab::id> gpt2bpe_tokenize(const gpt2bpe_vocab & voc
 | 
				
			|||||||
        return output;
 | 
					        return output;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    if (bos && vocab.special_have_bos) {
 | 
					    if (bos && vocab.special_bos_id != -1) {
 | 
				
			||||||
        output.push_back(vocab.special_bos_id);
 | 
					        output.push_back(vocab.special_bos_id);
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user