mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-03 09:22:01 +00:00 
			
		
		
		
	@@ -3385,38 +3385,6 @@ struct server_context {
 | 
			
		||||
            llama_set_embeddings(ctx, slot_batched->need_embd());
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        // pad the batch so that batch.n_tokens >= n_slots
 | 
			
		||||
        // TODO: temporary workaround for https://github.com/ggml-org/llama.cpp/issues/13689
 | 
			
		||||
        if (slot_batched->need_embd()) {
 | 
			
		||||
            const int n_slots = slots.size();
 | 
			
		||||
 | 
			
		||||
            if (batch.n_tokens < n_slots) {
 | 
			
		||||
                std::set<llama_seq_id> seq_ids;
 | 
			
		||||
                for (int j = 0; j < batch.n_tokens; ++j) {
 | 
			
		||||
                    seq_ids.insert(batch.seq_id[j][0]);
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                // find unused sequence id
 | 
			
		||||
                llama_seq_id seq_id = -1;
 | 
			
		||||
                for (int i = 0; i < n_slots; ++i) {
 | 
			
		||||
                    if (seq_ids.find(i) == seq_ids.end()) {
 | 
			
		||||
                        seq_id = i;
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                const int n_add = n_slots - batch.n_tokens;
 | 
			
		||||
 | 
			
		||||
                SRV_WRN("adding %d dummy tokens to the batch, seq_id = %d\n", n_add, seq_id);
 | 
			
		||||
 | 
			
		||||
                for (int j = 0; j < n_add; ++j) {
 | 
			
		||||
                    common_batch_add(batch, 0, j, { seq_id }, true);
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
                slots[seq_id].cache_tokens.clear();
 | 
			
		||||
                llama_memory_seq_rm(llama_get_memory(ctx), seq_id, -1, -1);
 | 
			
		||||
            }
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        int32_t i_next = 0;
 | 
			
		||||
 | 
			
		||||
        // process the created batch of tokens
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user