heavy work in progress

This commit is contained in:
2025-05-21 21:04:57 -04:00
parent 1f7c47a1fb
commit dac3140829
3 changed files with 191 additions and 33 deletions

View File

@ -0,0 +1,91 @@
# Rethinking the Routing Memory Pool
Date: 2025-05-21
## Goals and Expectations
To finish the RX and TX queues.
## Results
Nope. I'm half way through the TX queue and I'm gonna rework the
entire thing.
## Thought Train
Separating the TX queue to be per-interface is amazing. But making it
a multi-headed queue is a disaster. In this case, it doesn't simplify
the logic, while taking away one of the benefits of a shared memory
pool.
Allow me to walk through this: If we have a symmetric design, where
all interfaces send and receive at the same speed, synced, then this
would not have been a problem. But in the real world, the interfaces
won't guarantee that. Which means for the multi-headed queue, I'd
have to implement a separate queue tracking which packets are
complete - one of the reasons why I chose to separate the queues in
the first place. It meant tracking, per interface, which packets are
complete, which is as complex as a shared memory pool. And in the
shared memory pool case, it would've handled bursts better. So, why
not just implement the shared memory pool and let each interface keep
track of the complete packets, and let the central routing logic
handle a bi-directional multi-headed queue where each interface gets a
read and write pointer.
## Reworking details
### Rework the central logic
The `hub` would now keep track of the packet queues:
1. When there's an incoming byte from an interface, throw it in the
appropriate place. If it's a new packet, get a new spot for the
packet or drop it, if it's part of an existing packet (in memory),
append it to that. Benefit: no more buffering packets inside of
the interfaces.
2. When a packet is complete (i.e. reached the pre-agreed length),
parse its header and figure out where to send it, and send a
message to the interface telling it where the packet is. No more
buffering for the header in the interfaces to tell the hub where to
send the packet.
In addition to that, let the command `000000` always be a command to
the header, with the packet length, and the rest of the packet is all
just info for the hub (who knows why the hub needs at least 63 bytes
of data for a command). This means that there's no need for an
`rx_cmd` section, just let the hub store the entire packet and parse
it later.
#### IMPORTANT NOTE
There may be the need for reserved memory for to-the-hub commands,
otherwise when the packet queue is full, the hub would drop the
packet.
#### More notes
The hub will now contain *ALL* the logic for congestion control. If
it's full, toggle a bit to let the interfaces know and start sending
out messages.
### Rework the interfaces
The interfaces will contain less logic. It only knows the following
things:
1. Upon receiving a byte, if the hub has space, send it to the hub,
otherwise send back a message telling the device to stop congesting
the fabric.
2. When the hub tells it that another packet for that interface is
ready, start sending it if it's not sending anything else, or add
it to the to-send queue. **NOTE:** Congestion messages are
top-priority, it's always the first packet to check for.
## Potential problem
If one flow is congesting the fabric, then the entire network would be
congested. However, there are CC methods and we can always have an
upper bound for the TX to-send queues.
## Reflections
Good planning is still the way. Plan as you go. See the trade-offs.
Also, try, trying would make the plan and the project better.
It's good that I was able to catch this before I implement the entire
thing. And what I already completed isn't in vain - the logic is
still there, what I learned from doing it is still there, they've just
been repurposed to something else, something more elegant.
## Next steps
Put the reworking into action.

View File

@ -5,7 +5,7 @@ module hub (
input logic [3:0] rx_cmd_valid, input logic [3:0] rx_cmd_valid,
input logic [31:0] rx_byte, input logic [31:0] rx_byte,
input logic [3:0] rx_valid, input logic [3:0] rx_valid,
input logic [7:0] rx2tx_dest, // rx byte's destination input logic [31:0] rx2tx_dest, // rx byte's destination
input logic [3:0] tx_ready, // if tx_byte is ready to be read input logic [3:0] tx_ready, // if tx_byte is ready to be read
output logic [3:0] rx_ready, // if rx_byte is ready to be read output logic [3:0] rx_ready, // if rx_byte is ready to be read
output logic [7:0] tx_src, // tell the tx where the stream is comming from output logic [7:0] tx_src, // tell the tx where the stream is comming from
@ -49,7 +49,7 @@ module hub (
for (int i = 0; i < 4; i++) begin for (int i = 0; i < 4; i++) begin
if (rx_valid[i]) begin if (rx_valid[i]) begin
if (!in_buffer[i]) begin if (!in_buffer[i]) begin
service_buffer[i].dest <= get_dest(rx2tx_dest, i[1:0]); service_buffer[i].dest <= get_hop(rx2tx_dest, i[1:0]);
service_buffer[i].payload <= get_byte(rx_byte, i[1:0]); service_buffer[i].payload <= get_byte(rx_byte, i[1:0]);
in_buffer[i] <= 1; in_buffer[i] <= 1;
end end
@ -73,12 +73,24 @@ module hub (
endmodule // hub endmodule // hub
function automatic logic [1:0] get_dest(input logic [7:0] dest_map,
input logic [1:0] idx);
return dest_map[{idx, 1'b0} +: 2];
endfunction // get_dest
function automatic logic [7:0] get_byte(input logic [31:0] byte_arr, function automatic logic [7:0] get_byte(input logic [31:0] byte_arr,
input logic [1:0] idx); input logic [1:0] idx);
return byte_arr[{idx, 3'b000} +: 8]; return byte_arr[{idx, 3'b000} +: 8];
endfunction // get_byte endfunction // get_byte
// NOTE: addr 0 is alway mapped to the fabric itself and caught before this
function automatic logic [1:0] get_hop(input logic [31:0] dest_map,
input logic [1:0] idx);
case (dest_map[{idx, 3'b000} +: 8])
8'b00000001:
return 2'b00;
8'b00000010:
return 2'b01;
8'b00000011:
return 2'b10;
8'b00000100:
return 2'b11;
default:
return 0;
endcase // case (dest_map[{idx, 3'b000} +: 8])
endfunction // get_hop

View File

@ -10,11 +10,12 @@ module spi_interface(
input logic tx_valid, input logic tx_valid,
input logic [7:0] tx_byte, input logic [7:0] tx_byte,
input logic [1:0] tx_src, input logic [1:0] tx_src,
input logic [1:0] packet_size,
output logic miso, output logic miso,
output logic tx_ready, output logic tx_ready,
output logic rx_valid, output logic rx_valid,
output logic [7:0] rx_byte, output logic [7:0] rx_byte,
output logic [1:0] rx_dest, output logic [7:0] rx_dest,
output logic [7:0] rx_cmd, output logic [7:0] rx_cmd,
output logic rx_cmd_valid); output logic rx_cmd_valid);
@ -34,20 +35,20 @@ module spi_interface(
int bit_cnt = 0; int bit_cnt = 0;
logic [7:0] rx_shift; logic [7:0] rx_shift;
logic [7:0] tx_shift = 8'b00101010; logic [7:0] tx_shift = 8'b00101010;
logic [7:0] tx_buff = '0; logic [7:0] rx_buff = '0;
logic byte_ready = 0; logic byte_ready = 0;
always_ff @ (posedge sclk_rising_edge or posedge rst) begin always_ff @ (posedge sclk_rising_edge or posedge rst) begin
if (rst) begin if (rst) begin
rx_shift <= '0; rx_shift <= '0;
tx_buff <= '0; rx_buff <= '0;
bit_cnt <= '0; bit_cnt <= '0;
byte_ready <= 0; byte_ready <= 0;
end end
else begin else begin
if (cs) begin if (cs) begin
rx_shift <= 0; rx_shift <= 0;
tx_buff <= 0; rx_buff <= 0;
bit_cnt <= 0; bit_cnt <= 0;
end else begin end else begin
rx_shift <= {rx_shift[6:0], mosi}; rx_shift <= {rx_shift[6:0], mosi};
@ -55,7 +56,7 @@ module spi_interface(
if (bit_cnt == 7) begin if (bit_cnt == 7) begin
bit_cnt <= 0; bit_cnt <= 0;
tx_buff <= {rx_shift[6:0], mosi}; rx_buff <= {rx_shift[6:0], mosi};
byte_ready <= 1; byte_ready <= 1;
end else end else
byte_ready <= 0; byte_ready <= 0;
@ -64,7 +65,7 @@ module spi_interface(
$display("[%0d] current rx_shift: %b", $time, rx_shift); $display("[%0d] current rx_shift: %b", $time, rx_shift);
$display("[%0d] current bit_cnt: %0d", $time, bit_cnt); $display("[%0d] current bit_cnt: %0d", $time, bit_cnt);
$display("[%0d] current tx_buff: %b", $time, tx_buff); $display("[%0d] current rx_buff: %b", $time, rx_buff);
end // always_ff @ (posedge sclk) end // always_ff @ (posedge sclk)
always_ff @ (posedge sclk_falling_edge) begin always_ff @ (posedge sclk_falling_edge) begin
@ -76,7 +77,7 @@ module spi_interface(
tx_shift <= 0; tx_shift <= 0;
end else begin end else begin
if (bit_cnt == 0) begin if (bit_cnt == 0) begin
tx_shift <= tx_buff[7:0]; tx_shift <= rx_buff[7:0];
end else begin end else begin
tx_shift <= {tx_shift[6:0], 1'b0}; tx_shift <= {tx_shift[6:0], 1'b0};
end end
@ -89,25 +90,25 @@ module spi_interface(
assign miso = tx_shift[7]; assign miso = tx_shift[7];
shortint packet_size = 64;
// RX and TX logic // RX and TX logic
logic [9:0] rx_queue_head = 0; logic [9:0] rx_queue_head = 0;
logic [9:0] rx_queue_tail = 0; logic [9:0] rx_queue_tail = 0;
logic [10:0] rx_size = 0; logic [10:0] rx_size = 0;
logic rx_queue_write = 0; logic rx_queue_write = 0;
logic [7:0] rx_read; logic [7:0] rx_read;
logic packet_in; logic [7:0] dest_read;
logic packet_sending;
logic rx_queue_empty; logic rx_queue_empty;
assign rx_size = (rx_queue_tail + 11'd1024 - rx_queue_head) & 11'h3FF; assign rx_size = (rx_queue_tail + 11'd1024 - rx_queue_head) & 11'h3FF;
assign rx_queue_empty = ~(|rx_size); assign rx_queue_empty = ~(|rx_size);
bram_1024B rx_queue (.sys_clk(sys_clk), rx_queue_bram rx_queue (.sys_clk(sys_clk),
.write_enable(rx_queue_write), .write_enable(rx_queue_write),
.read_addr(rx_queue_head), .read_addr(rx_queue_head),
.write_addr(rx_queue_tail), .write_addr(rx_queue_tail),
.write_data(tx_buff), .write_data(rx_buff),
.read_data(rx_read)); .read_data(rx_read),
.read_dest(dest_read));
always_ff @ (posedge sys_clk) begin always_ff @ (posedge sys_clk) begin
if (rst) begin if (rst) begin
@ -115,17 +116,36 @@ module spi_interface(
rx_queue_tail <= '0; rx_queue_tail <= '0;
rx_queue_write <= '0; rx_queue_write <= '0;
rx_read <= '0; rx_read <= '0;
packet_in <= 0; packet_sending <= 0;
end else begin end else begin
if (byte_ready) if (byte_ready)
rx_queue_write <= 1; rx_queue_write <= 1;
else if (rx_queue_write) begin
rx_queue_write <= 0; rx_queue_write <= 0;
if (!packet_in && rx_size > 2) begin rx_queue_tail <= rx_queue_tail + 1;
// CONSULT internal routing table for directions end
if (!packet_sending) begin
if (rx_size > 2 && rx_ready) begin
rx_byte <= rx_read;
rx_dest <= dest_read;
rx_valid <= 1;
end else
rx_valid <= 0;
end else begin
if (is_packet_complete(rx_queue_head, packet_size))
packet_sending <= 0;
else if (rx_size > 0) begin
rx_byte <= rx_read;
rx_dest <= dest_read;
rx_valid <= 1;
end end
end end
end end
end // always_ff @ (posedge sys_clk)
logic [13:0] tx_queue_head;
logic [13:0] tx_queue_tail;
endmodule // spi_interface endmodule // spi_interface
@ -166,23 +186,58 @@ module async_get_clk_edges(
`endif // !`ifdef SYNC_2FF `endif // !`ifdef SYNC_2FF
endmodule // async_get_clk_edges endmodule // async_get_clk_edges
module bram_1024B ( module rx_queue_bram (
input logic sys_clk, input logic sys_clk,
input logic write_enable, input logic write_enable,
input logic [9:0] read_addr, input logic [9:0] read_addr,
input logic [9:0] write_addr, input logic [9:0] write_addr,
input logic [7:0] write_data, input logic [7:0] write_data,
output logic [7:0] read_data); output logic [7:0] read_data,
output logic [7:0] read_dest);
timeunit 1ns; timeunit 1ns;
timeprecision 1ps; timeprecision 1ps;
logic [7:0] mem [0:1023]; logic [7:0] mem [1023:0];
always_ff @(posedge sys_clk) begin always_ff @ (posedge sys_clk) begin
if (write_enable)
mem[write_addr] <= write_data;
read_data <= mem[read_addr];
read_dest <= mem[read_addr + 1];
end
endmodule // rx_queue_bram
module tx_queue_bram(input logic sys_clk,
input logic write_enable,
input logic [13:0] read_addr,
input logic [13:0] write_addr,
input logic [7:0] write_data,
output logic [7:0] read_data);
timeunit 1ns;
timeprecision 1ps;
logic [7:0] mem [16 * 1023:0];
always_ff @ (posedge sys_clk) begin
if (write_enable) if (write_enable)
mem[write_addr] <= write_data; mem[write_addr] <= write_data;
read_data <= mem[read_addr]; read_data <= mem[read_addr];
end end
endmodule // bram_1024B endmodule // tx_queue_bram
function automatic logic is_packet_complete(input logic [9:0] head,
input logic [1:0] packet_size);
case(packet_size)
2'b00:
return &(head & 'd64);
2'b01:
return &(head & 'd128);
2'b10:
return &(head & 'd256);
2'b11:
return &head;
endcase // case (packet_size)
endfunction // packet_complete