mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	* Add BPE pre-tokenization for Command-R/R+. * Bump transformers convert requirement. * command-r : add individual digits regex --------- Co-authored-by: Georgi Gerganov <ggerganov@gmail.com>
		
			
				
	
	
		
			107 lines
		
	
	
		
			1.8 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			107 lines
		
	
	
		
			1.8 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
ied 4 ½ months
 | 
						||
__ggml_vocab_test__
 | 
						||
Führer
 | 
						||
__ggml_vocab_test__
 | 
						||
 | 
						||
__ggml_vocab_test__
 | 
						||
 
 | 
						||
__ggml_vocab_test__
 | 
						||
  
 | 
						||
__ggml_vocab_test__
 | 
						||
   
 | 
						||
__ggml_vocab_test__
 | 
						||
	
 | 
						||
__ggml_vocab_test__
 | 
						||
 | 
						||
 | 
						||
__ggml_vocab_test__
 | 
						||
 | 
						||
 | 
						||
 | 
						||
__ggml_vocab_test__
 | 
						||
 | 
						||
 | 
						||
 | 
						||
 | 
						||
__ggml_vocab_test__
 | 
						||
	
 | 
						||
 | 
						||
__ggml_vocab_test__
 | 
						||
Hello world
 | 
						||
__ggml_vocab_test__
 | 
						||
 Hello world
 | 
						||
__ggml_vocab_test__
 | 
						||
Hello World
 | 
						||
__ggml_vocab_test__
 | 
						||
 Hello World
 | 
						||
__ggml_vocab_test__
 | 
						||
 Hello World!
 | 
						||
__ggml_vocab_test__
 | 
						||
Hello, world!
 | 
						||
__ggml_vocab_test__
 | 
						||
 Hello, world!
 | 
						||
__ggml_vocab_test__
 | 
						||
 this is 🦙.cpp
 | 
						||
__ggml_vocab_test__
 | 
						||
w048 7tuijk dsdfhu
 | 
						||
__ggml_vocab_test__
 | 
						||
нещо на Български
 | 
						||
__ggml_vocab_test__
 | 
						||
កាន់តែពិសេសអាចខលចេញ
 | 
						||
__ggml_vocab_test__
 | 
						||
🚀 (normal) 😶🌫️ (multiple emojis concatenated) ✅ (only emoji that has its own token)
 | 
						||
__ggml_vocab_test__
 | 
						||
Hello
 | 
						||
__ggml_vocab_test__
 | 
						||
 Hello
 | 
						||
__ggml_vocab_test__
 | 
						||
  Hello
 | 
						||
__ggml_vocab_test__
 | 
						||
   Hello
 | 
						||
__ggml_vocab_test__
 | 
						||
    Hello
 | 
						||
__ggml_vocab_test__
 | 
						||
    Hello
 | 
						||
    Hello
 | 
						||
__ggml_vocab_test__
 | 
						||
 (
 | 
						||
__ggml_vocab_test__
 | 
						||
 | 
						||
 =
 | 
						||
__ggml_vocab_test__
 | 
						||
' era
 | 
						||
__ggml_vocab_test__
 | 
						||
Hello, y'all! How are you 😁 ?我想在apple工作1314151天~
 | 
						||
__ggml_vocab_test__
 | 
						||
3
 | 
						||
__ggml_vocab_test__
 | 
						||
33
 | 
						||
__ggml_vocab_test__
 | 
						||
333
 | 
						||
__ggml_vocab_test__
 | 
						||
3333
 | 
						||
__ggml_vocab_test__
 | 
						||
33333
 | 
						||
__ggml_vocab_test__
 | 
						||
333333
 | 
						||
__ggml_vocab_test__
 | 
						||
3333333
 | 
						||
__ggml_vocab_test__
 | 
						||
33333333
 | 
						||
__ggml_vocab_test__
 | 
						||
333333333
 | 
						||
__ggml_vocab_test__
 | 
						||
 | 
						||
 
 | 
						||
 | 
						||
 
 | 
						||
 | 
						||
 | 
						||
 	 		 	
 | 
						||
  
 | 
						||
   
 | 
						||
    
 | 
						||
     
 | 
						||
🚀 (normal) 😶🌫️ (multiple emojis concatenated) ✅ 🦙🦙 3 33 333 3333 33333 333333 3333333 33333333 3.3 3..3 3...3 កាន់តែពិសេសអាច😁 ?我想在apple工作1314151天~ ------======= нещо на Български ''''''```````""""......!!!!!!?????? I've been 'told he's there, 'RE you sure? 'M not sure I'll make it, 'D you like some tea? We'Ve a'lL
 | 
						||
__ggml_vocab_test__
 |