mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-04 09:32:00 +00:00 
			
		
		
		
	llama : avoid useless copies in dummy session writer
This commit is contained in:
		@@ -17346,6 +17346,14 @@ struct llama_data_write {
 | 
				
			|||||||
    virtual size_t get_size_written() = 0;
 | 
					    virtual size_t get_size_written() = 0;
 | 
				
			||||||
    virtual ~llama_data_write() = default;
 | 
					    virtual ~llama_data_write() = default;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    std::vector<uint8_t> temp_buffer;
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    virtual void * get_tensor_data(const struct ggml_tensor * tensor, size_t offset, size_t size) {
 | 
				
			||||||
 | 
					        temp_buffer.resize(size);
 | 
				
			||||||
 | 
					        ggml_backend_tensor_get(tensor, temp_buffer.data(), offset, size);
 | 
				
			||||||
 | 
					        return temp_buffer.data();
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    void write_string(const std::string & str) {
 | 
					    void write_string(const std::string & str) {
 | 
				
			||||||
        uint32_t str_size = str.size();
 | 
					        uint32_t str_size = str.size();
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -17465,9 +17473,9 @@ struct llama_data_write {
 | 
				
			|||||||
            // Read each range of cells of k_size length each into tmp_buf and write out
 | 
					            // Read each range of cells of k_size length each into tmp_buf and write out
 | 
				
			||||||
            for (const auto & range : cell_ranges) {
 | 
					            for (const auto & range : cell_ranges) {
 | 
				
			||||||
                const size_t range_size = range.second - range.first;
 | 
					                const size_t range_size = range.second - range.first;
 | 
				
			||||||
                tmp_buf.resize(range_size * k_size_row);
 | 
					                const size_t buf_size = range_size * k_size_row;
 | 
				
			||||||
                ggml_backend_tensor_get(kv_self.k_l[il], tmp_buf.data(), range.first * k_size_row, range_size * k_size_row);
 | 
					                const void * data = get_tensor_data(kv_self.k_l[il], range.first * k_size_row, buf_size);
 | 
				
			||||||
                write(tmp_buf.data(), tmp_buf.size());
 | 
					                write(data, buf_size);
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
@@ -17486,9 +17494,9 @@ struct llama_data_write {
 | 
				
			|||||||
                // Read each range of cells of v_size length each into tmp_buf and write out
 | 
					                // Read each range of cells of v_size length each into tmp_buf and write out
 | 
				
			||||||
                for (const auto & range : cell_ranges) {
 | 
					                for (const auto & range : cell_ranges) {
 | 
				
			||||||
                    const size_t range_size = range.second - range.first;
 | 
					                    const size_t range_size = range.second - range.first;
 | 
				
			||||||
                    tmp_buf.resize(range_size * v_size_row);
 | 
					                    const size_t buf_size = range_size * v_size_row;
 | 
				
			||||||
                    ggml_backend_tensor_get(kv_self.v_l[il], tmp_buf.data(), range.first * v_size_row, range_size * v_size_row);
 | 
					                    const void * data = get_tensor_data(kv_self.v_l[il], range.first * v_size_row, buf_size);
 | 
				
			||||||
                    write(tmp_buf.data(), tmp_buf.size());
 | 
					                    write(data, buf_size);
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
        } else {
 | 
					        } else {
 | 
				
			||||||
@@ -17514,9 +17522,9 @@ struct llama_data_write {
 | 
				
			|||||||
                    for (const auto & range : cell_ranges) {
 | 
					                    for (const auto & range : cell_ranges) {
 | 
				
			||||||
                        const size_t range_size = range.second - range.first;
 | 
					                        const size_t range_size = range.second - range.first;
 | 
				
			||||||
                        const size_t src_offset = (range.first + j * kv_size) * v_size_el;
 | 
					                        const size_t src_offset = (range.first + j * kv_size) * v_size_el;
 | 
				
			||||||
                        tmp_buf.resize(range_size * v_size_el);
 | 
					                        const size_t buf_size = range_size * v_size_el;
 | 
				
			||||||
                        ggml_backend_tensor_get(kv_self.v_l[il], tmp_buf.data(), src_offset, tmp_buf.size());
 | 
					                        const void * data = get_tensor_data(kv_self.v_l[il], src_offset, buf_size);
 | 
				
			||||||
                        write(tmp_buf.data(), tmp_buf.size());
 | 
					                        write(data, buf_size);
 | 
				
			||||||
                    }
 | 
					                    }
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
@@ -17881,6 +17889,10 @@ struct llama_data_write_dummy : llama_data_write {
 | 
				
			|||||||
        size_written += size;
 | 
					        size_written += size;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    void * get_tensor_data(const struct ggml_tensor * /* tensor */, size_t /* offset */, size_t /* size */) override {
 | 
				
			||||||
 | 
					        return nullptr;
 | 
				
			||||||
 | 
					    }
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    size_t get_size_written() override {
 | 
					    size_t get_size_written() override {
 | 
				
			||||||
        return size_written;
 | 
					        return size_written;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user