mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-04 09:32:00 +00:00 
			
		
		
		
	llama : suppress conversion from 'size_t' to 'int' (#9046)
* llama : suppress conversion from 'size_t' to 'int'
This commit updates llm_tokenizer_spm.tokenize to suppress/remove the
following warnings that are generated on Windows when using MSVC:
```console
src\llama-vocab.cpp(211,1): warning C4267: 'argument':
    conversion from 'size_t' to 'int', possible loss of data
src\llama-vocab.cpp(517,1): warning C4267: 'argument':
    conversion from 'size_t' to 'int', possible loss of data
```
This is done by adding a cast for the size_t returned from
symbols.size(). I believe this is safe as it seems unlikely that
symbols, which stores an entry for each UTF8 character, would become
larger than INT_MAX.
The motivation for this change is to reduce the number of warnings that
are currently generated when building on Windows.
* squash! llama : suppress conversion from 'size_t' to 'int'
Move cast into for loop.
			
			
This commit is contained in:
		@@ -221,7 +221,7 @@ struct llm_tokenizer_spm_session {
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // seed the work queue with all possible 2-character tokens.
 | 
			
		||||
        for (size_t i = 1; i < symbols.size(); ++i) {
 | 
			
		||||
        for (int i = 1; i < (int) symbols.size(); ++i) {
 | 
			
		||||
            try_add_bigram(i - 1, i);
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
@@ -563,7 +563,7 @@ struct llm_tokenizer_bpe_session {
 | 
			
		||||
                index++;
 | 
			
		||||
                symbols.emplace_back(sym);
 | 
			
		||||
            }
 | 
			
		||||
            for (size_t i = 1; i < symbols.size(); ++i) {
 | 
			
		||||
            for (int i = 1; i < (int) symbols.size(); ++i) {
 | 
			
		||||
                add_new_bigram(i - 1, i);
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user