mirror of
				https://github.com/ggml-org/llama.cpp.git
				synced 2025-11-04 09:32:00 +00:00 
			
		
		
		
	rpc : add rpc_msg_set_tensor_hash_req (#13353)
* rpc : add rpc_msg_set_tensor_hash_req Use a dedicated struct for the request of RPC_CMD_SET_TENSOR_HASH which makes the code cleaner. * fix
This commit is contained in:
		
				
					committed by
					
						
						GitHub
					
				
			
			
				
	
			
			
			
						parent
						
							02115dcd9a
						
					
				
				
					commit
					b486ba05bf
				
			@@ -151,6 +151,12 @@ struct rpc_msg_buffer_clear_req {
 | 
				
			|||||||
    uint8_t value;
 | 
					    uint8_t value;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					struct rpc_msg_set_tensor_hash_req {
 | 
				
			||||||
 | 
					    rpc_tensor tensor;
 | 
				
			||||||
 | 
					    uint64_t offset;
 | 
				
			||||||
 | 
					    uint64_t hash;
 | 
				
			||||||
 | 
					};
 | 
				
			||||||
 | 
					
 | 
				
			||||||
struct rpc_msg_set_tensor_hash_rsp {
 | 
					struct rpc_msg_set_tensor_hash_rsp {
 | 
				
			||||||
    uint8_t result;
 | 
					    uint8_t result;
 | 
				
			||||||
};
 | 
					};
 | 
				
			||||||
@@ -548,15 +554,12 @@ static void ggml_backend_rpc_buffer_set_tensor(ggml_backend_buffer_t buffer, ggm
 | 
				
			|||||||
    ggml_backend_rpc_buffer_context * ctx = (ggml_backend_rpc_buffer_context *)buffer->context;
 | 
					    ggml_backend_rpc_buffer_context * ctx = (ggml_backend_rpc_buffer_context *)buffer->context;
 | 
				
			||||||
    rpc_tensor rpc_tensor = serialize_tensor(tensor);
 | 
					    rpc_tensor rpc_tensor = serialize_tensor(tensor);
 | 
				
			||||||
    if (size > HASH_THRESHOLD) {
 | 
					    if (size > HASH_THRESHOLD) {
 | 
				
			||||||
        // input serialization format: | rpc_tensor | offset (8 bytes) | hash (8 bytes)
 | 
					        rpc_msg_set_tensor_hash_req request;
 | 
				
			||||||
        size_t input_size = sizeof(rpc_tensor) + sizeof(uint64_t) + sizeof(uint64_t);
 | 
					        request.tensor = rpc_tensor;
 | 
				
			||||||
        std::vector<uint8_t> input(input_size, 0);
 | 
					        request.offset = offset;
 | 
				
			||||||
        uint64_t hash = fnv_hash((const uint8_t*)data, size);
 | 
					        request.hash = fnv_hash((const uint8_t*)data, size);
 | 
				
			||||||
        memcpy(input.data(), &rpc_tensor, sizeof(rpc_tensor));
 | 
					 | 
				
			||||||
        memcpy(input.data() + sizeof(rpc_tensor), &offset, sizeof(offset));
 | 
					 | 
				
			||||||
        memcpy(input.data() + sizeof(rpc_tensor) + sizeof(offset), &hash, sizeof(hash));
 | 
					 | 
				
			||||||
        rpc_msg_set_tensor_hash_rsp response;
 | 
					        rpc_msg_set_tensor_hash_rsp response;
 | 
				
			||||||
        bool status = send_rpc_cmd(ctx->sock, RPC_CMD_SET_TENSOR_HASH, input.data(), input.size(), &response, sizeof(response));
 | 
					        bool status = send_rpc_cmd(ctx->sock, RPC_CMD_SET_TENSOR_HASH, &request, sizeof(request), &response, sizeof(response));
 | 
				
			||||||
        GGML_ASSERT(status);
 | 
					        GGML_ASSERT(status);
 | 
				
			||||||
        if (response.result) {
 | 
					        if (response.result) {
 | 
				
			||||||
            // the server has the same data, no need to send it
 | 
					            // the server has the same data, no need to send it
 | 
				
			||||||
@@ -864,7 +867,7 @@ public:
 | 
				
			|||||||
    bool free_buffer(const rpc_msg_free_buffer_req & request);
 | 
					    bool free_buffer(const rpc_msg_free_buffer_req & request);
 | 
				
			||||||
    bool buffer_clear(const rpc_msg_buffer_clear_req & request);
 | 
					    bool buffer_clear(const rpc_msg_buffer_clear_req & request);
 | 
				
			||||||
    bool set_tensor(const std::vector<uint8_t> & input);
 | 
					    bool set_tensor(const std::vector<uint8_t> & input);
 | 
				
			||||||
    bool set_tensor_hash(const std::vector<uint8_t> & input, rpc_msg_set_tensor_hash_rsp & response);
 | 
					    bool set_tensor_hash(const rpc_msg_set_tensor_hash_req & request, rpc_msg_set_tensor_hash_rsp & response);
 | 
				
			||||||
    bool get_tensor(const rpc_msg_get_tensor_req & request, std::vector<uint8_t> & response);
 | 
					    bool get_tensor(const rpc_msg_get_tensor_req & request, std::vector<uint8_t> & response);
 | 
				
			||||||
    bool copy_tensor(const rpc_msg_copy_tensor_req & request, rpc_msg_copy_tensor_rsp & response);
 | 
					    bool copy_tensor(const rpc_msg_copy_tensor_req & request, rpc_msg_copy_tensor_rsp & response);
 | 
				
			||||||
    bool graph_compute(const std::vector<uint8_t> & input, rpc_msg_graph_compute_rsp & response);
 | 
					    bool graph_compute(const std::vector<uint8_t> & input, rpc_msg_graph_compute_rsp & response);
 | 
				
			||||||
@@ -1101,18 +1104,10 @@ bool rpc_server::get_cached_file(uint64_t hash, std::vector<uint8_t> & data) {
 | 
				
			|||||||
    return true;
 | 
					    return true;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
bool rpc_server::set_tensor_hash(const std::vector<uint8_t> & input, rpc_msg_set_tensor_hash_rsp & response)
 | 
					bool rpc_server::set_tensor_hash(const rpc_msg_set_tensor_hash_req & request, rpc_msg_set_tensor_hash_rsp & response)
 | 
				
			||||||
{
 | 
					{
 | 
				
			||||||
    // serialization format: | rpc_tensor | offset (8 bytes) | hash (8 bytes) |
 | 
					 | 
				
			||||||
    if (input.size() != sizeof(rpc_tensor) + 16) {
 | 
					 | 
				
			||||||
        return false;
 | 
					 | 
				
			||||||
    }
 | 
					 | 
				
			||||||
    const rpc_tensor * in_tensor = (const rpc_tensor *)input.data();
 | 
					 | 
				
			||||||
    uint64_t offset;
 | 
					 | 
				
			||||||
    memcpy(&offset, input.data() + sizeof(rpc_tensor), sizeof(offset));
 | 
					 | 
				
			||||||
    const uint64_t * hash = (const uint64_t *)(input.data() + sizeof(rpc_tensor) + sizeof(offset));
 | 
					 | 
				
			||||||
    std::vector<uint8_t> cached_file;
 | 
					    std::vector<uint8_t> cached_file;
 | 
				
			||||||
    if (!get_cached_file(*hash, cached_file)) {
 | 
					    if (!get_cached_file(request.hash, cached_file)) {
 | 
				
			||||||
        response.result = 0;
 | 
					        response.result = 0;
 | 
				
			||||||
        return true;
 | 
					        return true;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
@@ -1125,25 +1120,28 @@ bool rpc_server::set_tensor_hash(const std::vector<uint8_t> & input, rpc_msg_set
 | 
				
			|||||||
    ggml_context_ptr ctx_ptr { ggml_init(params) };
 | 
					    ggml_context_ptr ctx_ptr { ggml_init(params) };
 | 
				
			||||||
    GGML_ASSERT(ctx_ptr != nullptr);
 | 
					    GGML_ASSERT(ctx_ptr != nullptr);
 | 
				
			||||||
    ggml_context * ctx = ctx_ptr.get();
 | 
					    ggml_context * ctx = ctx_ptr.get();
 | 
				
			||||||
    ggml_tensor * tensor = deserialize_tensor(ctx, in_tensor);
 | 
					    ggml_tensor * tensor = deserialize_tensor(ctx, &request.tensor);
 | 
				
			||||||
    if (tensor == nullptr) {
 | 
					    if (tensor == nullptr) {
 | 
				
			||||||
        GGML_LOG_ERROR("[%s] error deserializing tensor\n", __func__);
 | 
					        GGML_LOG_ERROR("[%s] error deserializing tensor\n", __func__);
 | 
				
			||||||
        return false;
 | 
					        return false;
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    GGML_PRINT_DEBUG("[%s] buffer: %p, data: %p, offset: %" PRIu64 ", size: %zu, hash: %" PRIx64 "\n", __func__, (void*)tensor->buffer, tensor->data, offset, size, *hash);
 | 
					    GGML_PRINT_DEBUG("[%s] buffer: %p, data: %p, offset: %" PRIu64 ", size: %zu, hash: %" PRIx64 "\n",
 | 
				
			||||||
 | 
					        __func__, (void*)tensor->buffer, tensor->data, request.offset, size, request.hash);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    // sanitize tensor->data
 | 
					    // sanitize tensor->data
 | 
				
			||||||
    {
 | 
					    {
 | 
				
			||||||
        const size_t p0 = (size_t) ggml_backend_buffer_get_base(tensor->buffer);
 | 
					        const size_t p0 = (size_t) ggml_backend_buffer_get_base(tensor->buffer);
 | 
				
			||||||
        const size_t p1 = p0 + ggml_backend_buffer_get_size(tensor->buffer);
 | 
					        const size_t p1 = p0 + ggml_backend_buffer_get_size(tensor->buffer);
 | 
				
			||||||
 | 
					
 | 
				
			||||||
        if (in_tensor->data + offset < p0 || in_tensor->data + offset >= p1 || size > (p1 - in_tensor->data - offset)) {
 | 
					        if (request.tensor.data + request.offset < p0
 | 
				
			||||||
 | 
					         || request.tensor.data + request.offset >= p1
 | 
				
			||||||
 | 
					         || size > (p1 - request.tensor.data - request.offset)) {
 | 
				
			||||||
            GGML_LOG_ERROR("[%s] tensor data region (data=0x%" PRIx64 ", offset=%" PRIu64 ", size=%zu, hash=0x%" PRIx64 ") out of buffer bounds [0x%zx, 0x%zx)\n",
 | 
					            GGML_LOG_ERROR("[%s] tensor data region (data=0x%" PRIx64 ", offset=%" PRIu64 ", size=%zu, hash=0x%" PRIx64 ") out of buffer bounds [0x%zx, 0x%zx)\n",
 | 
				
			||||||
                           __func__, in_tensor->data, offset, size, *hash, p0, p1);
 | 
					                           __func__, request.tensor.data, request.offset, size, request.hash, p0, p1);
 | 
				
			||||||
            return false;
 | 
					            return false;
 | 
				
			||||||
        }
 | 
					        }
 | 
				
			||||||
    }
 | 
					    }
 | 
				
			||||||
    ggml_backend_tensor_set(tensor, cached_file.data(), offset, size);
 | 
					    ggml_backend_tensor_set(tensor, cached_file.data(), request.offset, size);
 | 
				
			||||||
    response.result = 1;
 | 
					    response.result = 1;
 | 
				
			||||||
    return true;
 | 
					    return true;
 | 
				
			||||||
}
 | 
					}
 | 
				
			||||||
@@ -1503,12 +1501,12 @@ static void rpc_serve_client(ggml_backend_t backend, const char * cache_dir,
 | 
				
			|||||||
                break;
 | 
					                break;
 | 
				
			||||||
            }
 | 
					            }
 | 
				
			||||||
            case RPC_CMD_SET_TENSOR_HASH: {
 | 
					            case RPC_CMD_SET_TENSOR_HASH: {
 | 
				
			||||||
                std::vector<uint8_t> input;
 | 
					                rpc_msg_set_tensor_hash_req request;
 | 
				
			||||||
                if (!recv_msg(sockfd, input)) {
 | 
					                if (!recv_msg(sockfd, &request, sizeof(request))) {
 | 
				
			||||||
                    return;
 | 
					                    return;
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
                rpc_msg_set_tensor_hash_rsp response;
 | 
					                rpc_msg_set_tensor_hash_rsp response;
 | 
				
			||||||
                if (!server.set_tensor_hash(input, response)) {
 | 
					                if (!server.set_tensor_hash(request, response)) {
 | 
				
			||||||
                    return;
 | 
					                    return;
 | 
				
			||||||
                }
 | 
					                }
 | 
				
			||||||
                if (!send_msg(sockfd, &response, sizeof(response))) {
 | 
					                if (!send_msg(sockfd, &response, sizeof(response))) {
 | 
				
			||||||
 
 | 
				
			|||||||
		Reference in New Issue
	
	Block a user