heavy work in progress
This commit is contained in:
91
devlog/2025-05-21-Rethink-routing.md
Normal file
91
devlog/2025-05-21-Rethink-routing.md
Normal file
@ -0,0 +1,91 @@
|
||||
# Rethinking the Routing Memory Pool
|
||||
Date: 2025-05-21
|
||||
|
||||
## Goals and Expectations
|
||||
To finish the RX and TX queues.
|
||||
|
||||
## Results
|
||||
Nope. I'm half way through the TX queue and I'm gonna rework the
|
||||
entire thing.
|
||||
|
||||
## Thought Train
|
||||
Separating the TX queue to be per-interface is amazing. But making it
|
||||
a multi-headed queue is a disaster. In this case, it doesn't simplify
|
||||
the logic, while taking away one of the benefits of a shared memory
|
||||
pool.
|
||||
|
||||
Allow me to walk through this: If we have a symmetric design, where
|
||||
all interfaces send and receive at the same speed, synced, then this
|
||||
would not have been a problem. But in the real world, the interfaces
|
||||
won't guarantee that. Which means for the multi-headed queue, I'd
|
||||
have to implement a separate queue tracking which packets are
|
||||
complete - one of the reasons why I chose to separate the queues in
|
||||
the first place. It meant tracking, per interface, which packets are
|
||||
complete, which is as complex as a shared memory pool. And in the
|
||||
shared memory pool case, it would've handled bursts better. So, why
|
||||
not just implement the shared memory pool and let each interface keep
|
||||
track of the complete packets, and let the central routing logic
|
||||
handle a bi-directional multi-headed queue where each interface gets a
|
||||
read and write pointer.
|
||||
|
||||
## Reworking details
|
||||
|
||||
### Rework the central logic
|
||||
The `hub` would now keep track of the packet queues:
|
||||
|
||||
1. When there's an incoming byte from an interface, throw it in the
|
||||
appropriate place. If it's a new packet, get a new spot for the
|
||||
packet or drop it, if it's part of an existing packet (in memory),
|
||||
append it to that. Benefit: no more buffering packets inside of
|
||||
the interfaces.
|
||||
2. When a packet is complete (i.e. reached the pre-agreed length),
|
||||
parse its header and figure out where to send it, and send a
|
||||
message to the interface telling it where the packet is. No more
|
||||
buffering for the header in the interfaces to tell the hub where to
|
||||
send the packet.
|
||||
|
||||
In addition to that, let the command `000000` always be a command to
|
||||
the header, with the packet length, and the rest of the packet is all
|
||||
just info for the hub (who knows why the hub needs at least 63 bytes
|
||||
of data for a command). This means that there's no need for an
|
||||
`rx_cmd` section, just let the hub store the entire packet and parse
|
||||
it later.
|
||||
|
||||
#### IMPORTANT NOTE
|
||||
There may be the need for reserved memory for to-the-hub commands,
|
||||
otherwise when the packet queue is full, the hub would drop the
|
||||
packet.
|
||||
|
||||
#### More notes
|
||||
The hub will now contain *ALL* the logic for congestion control. If
|
||||
it's full, toggle a bit to let the interfaces know and start sending
|
||||
out messages.
|
||||
|
||||
### Rework the interfaces
|
||||
The interfaces will contain less logic. It only knows the following
|
||||
things:
|
||||
|
||||
1. Upon receiving a byte, if the hub has space, send it to the hub,
|
||||
otherwise send back a message telling the device to stop congesting
|
||||
the fabric.
|
||||
2. When the hub tells it that another packet for that interface is
|
||||
ready, start sending it if it's not sending anything else, or add
|
||||
it to the to-send queue. **NOTE:** Congestion messages are
|
||||
top-priority, it's always the first packet to check for.
|
||||
|
||||
## Potential problem
|
||||
If one flow is congesting the fabric, then the entire network would be
|
||||
congested. However, there are CC methods and we can always have an
|
||||
upper bound for the TX to-send queues.
|
||||
|
||||
## Reflections
|
||||
Good planning is still the way. Plan as you go. See the trade-offs.
|
||||
Also, try, trying would make the plan and the project better.
|
||||
|
||||
It's good that I was able to catch this before I implement the entire
|
||||
thing. And what I already completed isn't in vain - the logic is
|
||||
still there, what I learned from doing it is still there, they've just
|
||||
been repurposed to something else, something more elegant.
|
||||
|
||||
## Next steps
|
||||
Put the reworking into action.
|
@ -5,7 +5,7 @@ module hub (
|
||||
input logic [3:0] rx_cmd_valid,
|
||||
input logic [31:0] rx_byte,
|
||||
input logic [3:0] rx_valid,
|
||||
input logic [7:0] rx2tx_dest, // rx byte's destination
|
||||
input logic [31:0] rx2tx_dest, // rx byte's destination
|
||||
input logic [3:0] tx_ready, // if tx_byte is ready to be read
|
||||
output logic [3:0] rx_ready, // if rx_byte is ready to be read
|
||||
output logic [7:0] tx_src, // tell the tx where the stream is comming from
|
||||
@ -49,7 +49,7 @@ module hub (
|
||||
for (int i = 0; i < 4; i++) begin
|
||||
if (rx_valid[i]) begin
|
||||
if (!in_buffer[i]) begin
|
||||
service_buffer[i].dest <= get_dest(rx2tx_dest, i[1:0]);
|
||||
service_buffer[i].dest <= get_hop(rx2tx_dest, i[1:0]);
|
||||
service_buffer[i].payload <= get_byte(rx_byte, i[1:0]);
|
||||
in_buffer[i] <= 1;
|
||||
end
|
||||
@ -73,12 +73,24 @@ module hub (
|
||||
|
||||
endmodule // hub
|
||||
|
||||
function automatic logic [1:0] get_dest(input logic [7:0] dest_map,
|
||||
input logic [1:0] idx);
|
||||
return dest_map[{idx, 1'b0} +: 2];
|
||||
endfunction // get_dest
|
||||
|
||||
function automatic logic [7:0] get_byte(input logic [31:0] byte_arr,
|
||||
input logic [1:0] idx);
|
||||
return byte_arr[{idx, 3'b000} +: 8];
|
||||
endfunction // get_byte
|
||||
|
||||
// NOTE: addr 0 is alway mapped to the fabric itself and caught before this
|
||||
function automatic logic [1:0] get_hop(input logic [31:0] dest_map,
|
||||
input logic [1:0] idx);
|
||||
case (dest_map[{idx, 3'b000} +: 8])
|
||||
8'b00000001:
|
||||
return 2'b00;
|
||||
8'b00000010:
|
||||
return 2'b01;
|
||||
8'b00000011:
|
||||
return 2'b10;
|
||||
8'b00000100:
|
||||
return 2'b11;
|
||||
default:
|
||||
return 0;
|
||||
endcase // case (dest_map[{idx, 3'b000} +: 8])
|
||||
endfunction // get_hop
|
||||
|
@ -10,11 +10,12 @@ module spi_interface(
|
||||
input logic tx_valid,
|
||||
input logic [7:0] tx_byte,
|
||||
input logic [1:0] tx_src,
|
||||
input logic [1:0] packet_size,
|
||||
output logic miso,
|
||||
output logic tx_ready,
|
||||
output logic rx_valid,
|
||||
output logic [7:0] rx_byte,
|
||||
output logic [1:0] rx_dest,
|
||||
output logic [7:0] rx_dest,
|
||||
output logic [7:0] rx_cmd,
|
||||
output logic rx_cmd_valid);
|
||||
|
||||
@ -34,20 +35,20 @@ module spi_interface(
|
||||
int bit_cnt = 0;
|
||||
logic [7:0] rx_shift;
|
||||
logic [7:0] tx_shift = 8'b00101010;
|
||||
logic [7:0] tx_buff = '0;
|
||||
logic [7:0] rx_buff = '0;
|
||||
logic byte_ready = 0;
|
||||
|
||||
always_ff @ (posedge sclk_rising_edge or posedge rst) begin
|
||||
if (rst) begin
|
||||
rx_shift <= '0;
|
||||
tx_buff <= '0;
|
||||
rx_buff <= '0;
|
||||
bit_cnt <= '0;
|
||||
byte_ready <= 0;
|
||||
end
|
||||
else begin
|
||||
if (cs) begin
|
||||
rx_shift <= 0;
|
||||
tx_buff <= 0;
|
||||
rx_buff <= 0;
|
||||
bit_cnt <= 0;
|
||||
end else begin
|
||||
rx_shift <= {rx_shift[6:0], mosi};
|
||||
@ -55,7 +56,7 @@ module spi_interface(
|
||||
|
||||
if (bit_cnt == 7) begin
|
||||
bit_cnt <= 0;
|
||||
tx_buff <= {rx_shift[6:0], mosi};
|
||||
rx_buff <= {rx_shift[6:0], mosi};
|
||||
byte_ready <= 1;
|
||||
end else
|
||||
byte_ready <= 0;
|
||||
@ -64,7 +65,7 @@ module spi_interface(
|
||||
|
||||
$display("[%0d] current rx_shift: %b", $time, rx_shift);
|
||||
$display("[%0d] current bit_cnt: %0d", $time, bit_cnt);
|
||||
$display("[%0d] current tx_buff: %b", $time, tx_buff);
|
||||
$display("[%0d] current rx_buff: %b", $time, rx_buff);
|
||||
end // always_ff @ (posedge sclk)
|
||||
|
||||
always_ff @ (posedge sclk_falling_edge) begin
|
||||
@ -76,7 +77,7 @@ module spi_interface(
|
||||
tx_shift <= 0;
|
||||
end else begin
|
||||
if (bit_cnt == 0) begin
|
||||
tx_shift <= tx_buff[7:0];
|
||||
tx_shift <= rx_buff[7:0];
|
||||
end else begin
|
||||
tx_shift <= {tx_shift[6:0], 1'b0};
|
||||
end
|
||||
@ -89,25 +90,25 @@ module spi_interface(
|
||||
|
||||
assign miso = tx_shift[7];
|
||||
|
||||
shortint packet_size = 64;
|
||||
|
||||
// RX and TX logic
|
||||
logic [9:0] rx_queue_head = 0;
|
||||
logic [9:0] rx_queue_tail = 0;
|
||||
logic [10:0] rx_size = 0;
|
||||
logic rx_queue_write = 0;
|
||||
logic [7:0] rx_read;
|
||||
logic packet_in;
|
||||
logic [7:0] dest_read;
|
||||
logic packet_sending;
|
||||
logic rx_queue_empty;
|
||||
assign rx_size = (rx_queue_tail + 11'd1024 - rx_queue_head) & 11'h3FF;
|
||||
assign rx_queue_empty = ~(|rx_size);
|
||||
|
||||
bram_1024B rx_queue (.sys_clk(sys_clk),
|
||||
rx_queue_bram rx_queue (.sys_clk(sys_clk),
|
||||
.write_enable(rx_queue_write),
|
||||
.read_addr(rx_queue_head),
|
||||
.write_addr(rx_queue_tail),
|
||||
.write_data(tx_buff),
|
||||
.read_data(rx_read));
|
||||
.write_data(rx_buff),
|
||||
.read_data(rx_read),
|
||||
.read_dest(dest_read));
|
||||
|
||||
always_ff @ (posedge sys_clk) begin
|
||||
if (rst) begin
|
||||
@ -115,17 +116,36 @@ module spi_interface(
|
||||
rx_queue_tail <= '0;
|
||||
rx_queue_write <= '0;
|
||||
rx_read <= '0;
|
||||
packet_in <= 0;
|
||||
packet_sending <= 0;
|
||||
end else begin
|
||||
if (byte_ready)
|
||||
rx_queue_write <= 1;
|
||||
else
|
||||
if (rx_queue_write) begin
|
||||
rx_queue_write <= 0;
|
||||
if (!packet_in && rx_size > 2) begin
|
||||
// CONSULT internal routing table for directions
|
||||
rx_queue_tail <= rx_queue_tail + 1;
|
||||
end
|
||||
if (!packet_sending) begin
|
||||
if (rx_size > 2 && rx_ready) begin
|
||||
rx_byte <= rx_read;
|
||||
rx_dest <= dest_read;
|
||||
rx_valid <= 1;
|
||||
end else
|
||||
rx_valid <= 0;
|
||||
end else begin
|
||||
if (is_packet_complete(rx_queue_head, packet_size))
|
||||
packet_sending <= 0;
|
||||
else if (rx_size > 0) begin
|
||||
rx_byte <= rx_read;
|
||||
rx_dest <= dest_read;
|
||||
rx_valid <= 1;
|
||||
end
|
||||
end
|
||||
end
|
||||
end // always_ff @ (posedge sys_clk)
|
||||
|
||||
logic [13:0] tx_queue_head;
|
||||
logic [13:0] tx_queue_tail;
|
||||
|
||||
|
||||
endmodule // spi_interface
|
||||
|
||||
@ -166,23 +186,58 @@ module async_get_clk_edges(
|
||||
`endif // !`ifdef SYNC_2FF
|
||||
endmodule // async_get_clk_edges
|
||||
|
||||
module bram_1024B (
|
||||
module rx_queue_bram (
|
||||
input logic sys_clk,
|
||||
input logic write_enable,
|
||||
input logic [9:0] read_addr,
|
||||
input logic [9:0] write_addr,
|
||||
input logic [7:0] write_data,
|
||||
output logic [7:0] read_data);
|
||||
output logic [7:0] read_data,
|
||||
output logic [7:0] read_dest);
|
||||
timeunit 1ns;
|
||||
timeprecision 1ps;
|
||||
|
||||
|
||||
logic [7:0] mem [0:1023];
|
||||
logic [7:0] mem [1023:0];
|
||||
|
||||
always_ff @(posedge sys_clk) begin
|
||||
always_ff @ (posedge sys_clk) begin
|
||||
if (write_enable)
|
||||
mem[write_addr] <= write_data;
|
||||
read_data <= mem[read_addr];
|
||||
read_dest <= mem[read_addr + 1];
|
||||
end
|
||||
|
||||
endmodule // rx_queue_bram
|
||||
|
||||
module tx_queue_bram(input logic sys_clk,
|
||||
input logic write_enable,
|
||||
input logic [13:0] read_addr,
|
||||
input logic [13:0] write_addr,
|
||||
input logic [7:0] write_data,
|
||||
output logic [7:0] read_data);
|
||||
timeunit 1ns;
|
||||
timeprecision 1ps;
|
||||
|
||||
logic [7:0] mem [16 * 1023:0];
|
||||
|
||||
always_ff @ (posedge sys_clk) begin
|
||||
if (write_enable)
|
||||
mem[write_addr] <= write_data;
|
||||
read_data <= mem[read_addr];
|
||||
end
|
||||
|
||||
endmodule // bram_1024B
|
||||
endmodule // tx_queue_bram
|
||||
|
||||
function automatic logic is_packet_complete(input logic [9:0] head,
|
||||
input logic [1:0] packet_size);
|
||||
case(packet_size)
|
||||
2'b00:
|
||||
return &(head & 'd64);
|
||||
2'b01:
|
||||
return &(head & 'd128);
|
||||
2'b10:
|
||||
return &(head & 'd256);
|
||||
2'b11:
|
||||
return &head;
|
||||
endcase // case (packet_size)
|
||||
endfunction // packet_complete
|
||||
|
Reference in New Issue
Block a user