"...composable_kernel_rocm.git" did not exist on "8a659a2e4c52936986884632f13cb58091a54cbf"
Commit c15f2eab authored by Antoine Kaufmann's avatar Antoine Kaufmann
Browse files

sims/net/menshen: add

parent 3be11ceb
...@@ -13,6 +13,8 @@ sims/nic/i40e_bm/i40e_bm ...@@ -13,6 +13,8 @@ sims/nic/i40e_bm/i40e_bm
sims/net/wire/net_wire sims/net/wire/net_wire
sims/net/tap/net_tap sims/net/tap/net_tap
sims/net/switch/net_switch sims/net/switch/net_switch
sims/net/menshen/menshen_hw
sims/net/menshen/obj_dir
sims/tofino/tofino sims/tofino/tofino
doc/doxygen doc/doxygen
doc/_build doc/_build
......
module blk_mem_gen_0 #(
parameter ADDR_BITS = 5,
parameter DATA_BITS = 32
)
(
input [(ADDR_BITS - 1):0] addra,
input clka,
input [(DATA_BITS - 1):0] dina,
input ena,
input wea,
input [(ADDR_BITS - 1):0] addrb,
input clkb,
output [(DATA_BITS - 1):0] doutb,
input enb
);
ram_blk #(
.ADDR_BITS(ADDR_BITS),
.DATA_BITS(DATA_BITS)
) ram (
.addra(addra),
.clka(clka),
.dina(dina),
.ena(ena),
.wea(wea),
.addrb(addrb),
.clkb(clkb),
.doutb(doutb),
.enb(enb)
);
endmodule
\ No newline at end of file
module blk_mem_gen_1 #(
parameter ADDR_BITS = 4,
parameter DATA_BITS = 625
)
(
input [(ADDR_BITS - 1):0] addra,
input clka,
input [(DATA_BITS - 1):0] dina,
input ena,
input wea,
input [(ADDR_BITS - 1):0] addrb,
input clkb,
output [(DATA_BITS - 1):0] doutb,
input enb
);
ram_blk #(
.ADDR_BITS(ADDR_BITS),
.DATA_BITS(DATA_BITS)
) ram (
.addra(addra),
.clka(clka),
.dina(dina),
.ena(ena),
.wea(wea),
.addrb(addrb),
.clkb(clkb),
.doutb(doutb),
.enb(enb)
);
endmodule
\ No newline at end of file
module blk_mem_gen_2 #(
parameter ADDR_BITS = 5,
parameter DATA_BITS = 38
)
(
input [(ADDR_BITS - 1):0] addra,
input clka,
input [(DATA_BITS - 1):0] dina,
input ena,
input wea,
input [(ADDR_BITS - 1):0] addrb,
input clkb,
output [(DATA_BITS - 1):0] doutb,
input enb
);
ram_blk #(
.ADDR_BITS(ADDR_BITS),
.DATA_BITS(DATA_BITS)
) ram (
.addra(addra),
.clka(clka),
.dina(dina),
.ena(ena),
.wea(wea),
.addrb(addrb),
.clkb(clkb),
.doutb(doutb),
.enb(enb)
);
endmodule
\ No newline at end of file
module blk_mem_gen_3 #(
parameter ADDR_BITS = 5,
parameter DATA_BITS = 193
)
(
input [(ADDR_BITS - 1):0] addra,
input clka,
input [(DATA_BITS - 1):0] dina,
input ena,
input wea,
input [(ADDR_BITS - 1):0] addrb,
input clkb,
output [(DATA_BITS - 1):0] doutb,
input enb
);
ram_blk #(
.ADDR_BITS(ADDR_BITS),
.DATA_BITS(DATA_BITS)
) ram (
.addra(addra),
.clka(clka),
.dina(dina),
.ena(ena),
.wea(wea),
.addrb(addrb),
.clkb(clkb),
.doutb(doutb),
.enb(enb)
);
endmodule
\ No newline at end of file
module cam_simple #(
parameter DATA_WIDTH = 64,
parameter ADDR_WIDTH = 5,
parameter SLICE_WIDTH = 9
)
(
input wire clk,
input wire rst,
input wire [ADDR_WIDTH-1:0] write_addr,
input wire [DATA_WIDTH-1:0] write_data,
input wire write_delete,
input wire write_enable,
output wire write_busy,
input wire [DATA_WIDTH-1:0] compare_data,
output wire [2**ADDR_WIDTH-1:0] match_many,
output wire [2**ADDR_WIDTH-1:0] match_single,
output wire [ADDR_WIDTH-1:0] match_addr,
output wire match
);
localparam RAM_DEPTH = 2**ADDR_WIDTH;
reg [RAM_DEPTH - 1 : 0]match_many_raw;
reg valids[RAM_DEPTH];
reg [DATA_WIDTH-1:0]keys[RAM_DEPTH];
// reads
integer k;
always @(posedge clk) begin
for (k = 0; k < RAM_DEPTH; k = k + 1) begin
match_many_raw[k] <= valids[k] && (keys[k] == compare_data);
end
end
// writes
integer i;
always @(posedge clk) begin
if (rst) begin
for (i = 0; i < RAM_DEPTH; i = i + 1) begin
valids[i] <= 0;
keys[i] <= 0;
end
end else if (write_enable) begin
if (write_delete) begin
valids[write_addr] <= 0;
end else begin
keys[write_addr] <= write_data;
valids[write_addr] <= 1;
end
end
end
priority_encoder #(
.WIDTH(RAM_DEPTH),
.LSB_PRIORITY("HIGH")
)
priority_encoder_inst (
.input_unencoded(match_many_raw),
.output_valid(match),
.output_encoded(match_addr),
.output_unencoded(match_single)
);
endmodule
module cam_top #(
parameter C_DEPTH = 16,
parameter ADDR_BITS = 4,
parameter C_WIDTH = 205,
parameter C_MEM_INIT = 0
) (
input CLK,
input RST,
input [(C_WIDTH-1):0] CMP_DIN,
input CMP_DATA_MASK,
output BUSY,
output MATCH,
output [(ADDR_BITS - 1):0] MATCH_ADDR,
input WE,
input [(ADDR_BITS - 1):0] WR_ADDR,
input DATA_MASK,
input [(C_WIDTH-1):0] DIN,
input EN
);
cam_simple #(
.DATA_WIDTH(C_WIDTH),
.ADDR_WIDTH(ADDR_BITS),
.SLICE_WIDTH(9)
) cam (
.clk(CLK),
.rst(RST),
.write_addr(WR_ADDR),
.write_data(DIN),
.write_delete(0),
.write_enable(WE),
.write_busy(BUSY),
.compare_data(CMP_DIN),
.match_many(),
.match_single(),
.match_addr(MATCH_ADDR),
.match(MATCH)
);
endmodule
///////////////////////////////////////////////////////////////////////////////
// $Id: small_fifo.v 1998 2007-07-21 01:22:57Z grg $
//
// Module: fallthrough_small_fifo.v
// Project: utils
// Description: small fifo with fallthrough i.e. data valid when rd is high
//
// Change history:
// 7/20/07 -- Set nearly full to 2^MAX_DEPTH_BITS - 1 by default so that it
// goes high a clock cycle early.
// 2/11/09 -- jnaous: Rewrote to make much more efficient.
///////////////////////////////////////////////////////////////////////////////
`timescale 1ns/1ps
module fallthrough_small_fifo
#(parameter WIDTH = 72,
parameter MAX_DEPTH_BITS = 3,
parameter PROG_FULL_THRESHOLD = 2**MAX_DEPTH_BITS - 1)
(
input [WIDTH-1:0] din, // Data in
input wr_en, // Write enable
input rd_en, // Read the next word
output [WIDTH-1:0] dout, // Data out
output full,
output nearly_full,
output prog_full,
output reg empty,
input reset,
input clk
);
reg fifo_rd_en, empty_nxt;
small_fifo
#(.WIDTH (WIDTH),
.MAX_DEPTH_BITS (MAX_DEPTH_BITS),
.PROG_FULL_THRESHOLD (PROG_FULL_THRESHOLD))
fifo
(.din (din),
.wr_en (wr_en),
.rd_en (fifo_rd_en),
.dout (dout),
.full (full),
.nearly_full (nearly_full),
.prog_full (prog_full),
.empty (fifo_empty),
.reset (reset),
.clk (clk)
);
always @(*) begin
empty_nxt = empty;
fifo_rd_en = 0;
case (empty)
1'b1: begin
if(!fifo_empty) begin
fifo_rd_en = 1;
empty_nxt = 0;
end
end
1'b0: begin
if(rd_en) begin
if(fifo_empty) begin
empty_nxt = 1;
end
else begin
fifo_rd_en = 1;
end
end
end
endcase // case(empty)
end // always @ (*)
always @(posedge clk) begin
if(reset) begin
empty <= 1'b1;
end
else begin
empty <= empty_nxt;
end
end
// synthesis translate_off
always @(posedge clk)
begin
if (wr_en && full) begin
$display("%t ERROR: Attempt to write to full FIFO: %m", $time);
end
if (rd_en && empty) begin
$display("%t ERROR: Attempt to read an empty FIFO: %m", $time);
end
end // always @ (posedge clk)
// synthesis translate_on
endmodule // fallthrough_small_fifo_v2
/* vim:set shiftwidth=3 softtabstop=3 expandtab: */
module page_tbl_16w_32d #(
parameter ADDR_BITS = 5,
parameter DATA_BITS = 16
)
(
input [(ADDR_BITS - 1):0] addra,
input clka,
input [(DATA_BITS - 1):0] dina,
input ena,
input wea,
input [(ADDR_BITS - 1):0] addrb,
input clkb,
output [(DATA_BITS - 1):0] doutb,
input enb
);
ram_blk #(
.ADDR_BITS(ADDR_BITS),
.DATA_BITS(DATA_BITS)
) ram (
.addra(addra),
.clka(clka),
.dina(dina),
.ena(ena),
.wea(wea),
.addrb(addrb),
.clkb(clkb),
.doutb(doutb),
.enb(enb)
);
endmodule
\ No newline at end of file
module parse_act_ram_ip #(
parameter ADDR_BITS = 5,
parameter DATA_BITS = 160
)
(
input [(ADDR_BITS - 1):0] addra,
input clka,
input [(DATA_BITS - 1):0] dina,
input ena,
input wea,
input [(ADDR_BITS - 1):0] addrb,
input clkb,
output [(DATA_BITS - 1):0] doutb,
input enb
);
ram_blk #(
.ADDR_BITS(ADDR_BITS),
.DATA_BITS(DATA_BITS)
) ram (
.addra(addra),
.clka(clka),
.dina(dina),
.ena(ena),
.wea(wea),
.addrb(addrb),
.clkb(clkb),
.doutb(doutb),
.enb(enb)
);
endmodule
\ No newline at end of file
/*
Copyright (c) 2014-2018 Alex Forencich
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
// Language: Verilog 2001
`timescale 1ns / 1ps
/*
* Priority encoder module
*/
module priority_encoder #
(
parameter WIDTH = 4,
// LSB priority: "LOW", "HIGH"
parameter LSB_PRIORITY = "LOW"
)
(
input wire [WIDTH-1:0] input_unencoded,
output wire output_valid,
output wire [$clog2(WIDTH)-1:0] output_encoded,
output wire [WIDTH-1:0] output_unencoded
);
// power-of-two width
parameter W1 = 2**$clog2(WIDTH);
parameter W2 = W1/2;
generate
if (WIDTH == 1) begin
// one input
assign output_valid = input_unencoded;
assign output_encoded = 0;
end else if (WIDTH == 2) begin
// two inputs - just an OR gate
assign output_valid = |input_unencoded;
if (LSB_PRIORITY == "LOW") begin
assign output_encoded = input_unencoded[1];
end else begin
assign output_encoded = ~input_unencoded[0];
end
end else begin
// more than two inputs - split into two parts and recurse
// also pad input to correct power-of-two width
wire [$clog2(W2)-1:0] out1, out2;
wire valid1, valid2;
priority_encoder #(
.WIDTH(W2),
.LSB_PRIORITY(LSB_PRIORITY)
)
priority_encoder_inst1 (
.input_unencoded(input_unencoded[W2-1:0]),
.output_valid(valid1),
.output_encoded(out1)
);
priority_encoder #(
.WIDTH(W2),
.LSB_PRIORITY(LSB_PRIORITY)
)
priority_encoder_inst2 (
.input_unencoded({{W1-WIDTH{1'b0}}, input_unencoded[WIDTH-1:W2]}),
.output_valid(valid2),
.output_encoded(out2)
);
// multiplexer to select part
assign output_valid = valid1 | valid2;
if (LSB_PRIORITY == "LOW") begin
assign output_encoded = valid2 ? {1'b1, out2} : {1'b0, out1};
end else begin
assign output_encoded = valid1 ? {1'b0, out1} : {1'b1, out2};
end
end
endgenerate
// unencoded output
assign output_unencoded = 1 << output_encoded;
endmodule
module ram_blk #(
parameter ADDR_BITS = 5,
parameter DATA_BITS = 32
)
(
input [(ADDR_BITS - 1):0] addra,
input clka,
input [(DATA_BITS - 1):0] dina,
input ena,
input wea,
input [(ADDR_BITS - 1):0] addrb,
input clkb,
output [(DATA_BITS - 1):0] doutb,
input enb
);
reg [(DATA_BITS - 1):0] ram [(ADDR_BITS - 1):0];
//reg [(ADDR_BITS - 1):0] read_addrb;
reg [(DATA_BITS - 1):0] doutb_r;
always @(posedge clka) begin
if (ena) begin
if (wea)
ram[addra] <= dina;
//read_addra <= addra;
end
end
/*always @(posedge clkb) begin
if (enb)
doutb_r <= ram[addrb];
end
assign doutb = doutb_r;
*/
//assign douta = ram[read_addra];
assign doutb = ram[addrb];
//
endmodule
\ No newline at end of file
/*
Copyright (c) 2016 Alex Forencich
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in
all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
*/
// Language: Verilog 2001
`timescale 1ns / 1ps
/*
* Generic dual-port RAM
*/
module ram_dp #
(
parameter DATA_WIDTH = 32,
parameter ADDR_WIDTH = 10
)
(
// port A
input wire a_clk,
input wire a_we,
input wire [ADDR_WIDTH-1:0] a_addr,
input wire [DATA_WIDTH-1:0] a_din,
output wire [DATA_WIDTH-1:0] a_dout,
// port B
input wire b_clk,
input wire b_we,
input wire [ADDR_WIDTH-1:0] b_addr,
input wire [DATA_WIDTH-1:0] b_din,
output wire [DATA_WIDTH-1:0] b_dout
);
reg [DATA_WIDTH-1:0] a_dout_reg = {DATA_WIDTH{1'b0}};
reg [DATA_WIDTH-1:0] b_dout_reg = {DATA_WIDTH{1'b0}};
// (* RAM_STYLE="BLOCK" *)
reg [DATA_WIDTH-1:0] mem[(2**ADDR_WIDTH)-1:0];
assign a_dout = a_dout_reg;
assign b_dout = b_dout_reg;
integer i, j;
initial begin
// two nested loops for smaller number of iterations per loop
// workaround for synthesizer complaints about large loop counts
for (i = 0; i < 2**ADDR_WIDTH; i = i + 2**(ADDR_WIDTH/2)) begin
for (j = i; j < i + 2**(ADDR_WIDTH/2); j = j + 1) begin
mem[j] = 0;
end
end
end
// port A
always @(posedge a_clk) begin
a_dout_reg <= mem[a_addr];
if (a_we) begin
mem[a_addr] <= a_din;
a_dout_reg <= a_din;
end
end
// port B
always @(posedge b_clk) begin
b_dout_reg <= mem[b_addr];
if (b_we) begin
mem[b_addr] <= b_din;
b_dout_reg <= b_din;
end
end
endmodule
///////////////////////////////////////////////////////////////////////////////
// $Id: small_fifo.v 4761 2008-12-27 01:11:00Z jnaous $
//
// Module: small_fifo.v
// Project: UNET
// Description: small fifo with no fallthrough i.e. data valid after rd is high
//
// Change history:
// 7/20/07 -- Set nearly full to 2^MAX_DEPTH_BITS - 1 by default so that it
// goes high a clock cycle early.
// 11/2/09 -- Modified to have both prog threshold and almost full
///////////////////////////////////////////////////////////////////////////////
`timescale 1ns/1ps
module small_fifo
#(parameter WIDTH = 72,
parameter MAX_DEPTH_BITS = 3,
parameter PROG_FULL_THRESHOLD = 2**MAX_DEPTH_BITS - 1
)
(
input [WIDTH-1:0] din, // Data in
input wr_en, // Write enable
input rd_en, // Read the next word
output reg [WIDTH-1:0] dout, // Data out
output full,
output nearly_full,
output prog_full,
output empty,
input reset,
input clk
);
parameter MAX_DEPTH = 2 ** MAX_DEPTH_BITS;
reg [WIDTH-1:0] queue [MAX_DEPTH - 1 : 0];
reg [MAX_DEPTH_BITS - 1 : 0] rd_ptr;
reg [MAX_DEPTH_BITS - 1 : 0] wr_ptr;
reg [MAX_DEPTH_BITS : 0] depth;
// Sample the data
always @(posedge clk)
begin
if (wr_en)
queue[wr_ptr] <= din;
if (rd_en)
dout <=
// synthesis translate_off
#1
// synthesis translate_on
queue[rd_ptr];
end
always @(posedge clk)
begin
if (reset) begin
rd_ptr <= 'h0;
wr_ptr <= 'h0;
depth <= 'h0;
end
else begin
if (wr_en) wr_ptr <= wr_ptr + 'h1;
if (rd_en) rd_ptr <= rd_ptr + 'h1;
if (wr_en & ~rd_en) depth <=
// synthesis translate_off
#1
// synthesis translate_on
depth + 'h1;
else if (~wr_en & rd_en) depth <=
// synthesis translate_off
#1
// synthesis translate_on
depth - 'h1;
end
end
//assign dout = queue[rd_ptr];
assign full = depth == MAX_DEPTH;
assign prog_full = (depth >= PROG_FULL_THRESHOLD);
assign nearly_full = depth >= MAX_DEPTH-1;
assign empty = depth == 'h0;
// synthesis translate_off
always @(posedge clk)
begin
if (wr_en && depth == MAX_DEPTH && !rd_en)
$display($time, " ERROR: Attempt to write to full FIFO: %m");
if (rd_en && depth == 'h0)
$display($time, " ERROR: Attempt to read an empty FIFO: %m");
end
// synthesis translate_on
endmodule // small_fifo
/* vim:set shiftwidth=3 softtabstop=3 expandtab: */
#include <verilated.h>
#include <verilated_fst_c.h>
#include <iostream>
#include <signal.h>
#include <vector>
#include "sims/net/menshen/obj_dir/Vrmt_wrapper.h"
#include "sims/net/menshen/ports.h"
#define MAX_PKT_SIZE 2048
//#define ETH_DEBUG
//#define TRACE_ENABLED
class EthernetTx;
class EthernetRx;
std::vector<Port *> ports;
int synchronized = 0;
uint64_t sync_period = (500 * 1000ULL); // 500ns
uint64_t eth_latency = (500 * 1000ULL); // 500ns
int sync_mode = SIMBRICKS_PROTO_SYNC_SIMBRICKS;
static uint64_t clock_period = 4 * 1000ULL; // 4ns -> 250MHz
uint64_t main_time = 0;
int exiting = 0;
EthernetTx *txMAC;
EthernetRx *rxMAC;
static void sigint_handler(int dummy) {
exiting = 1;
}
static void sigusr1_handler(int dummy) {
fprintf(stderr, "main_time = %lu\n", main_time);
}
double sc_time_stamp() {
return main_time;
}
static void reset_inputs(Vrmt_wrapper *top) {
top->clk = 0;
top->aresetn = 0;
top->vlan_drop_flags = 0;
memset(top->s_axis_tdata, 0, sizeof(top->s_axis_tdata));
top->s_axis_tkeep = 0;
memset(top->s_axis_tuser, 0, sizeof(top->s_axis_tuser));
top->s_axis_tvalid = 0;
top->s_axis_tlast = 0;
top->m_axis_tready = 0;
}
static void dump_if(Vrmt_wrapper *top) {
std::cout << "Dumping Interfaces:" << std::endl;
std::cout << " clk = " << top->clk << std::endl;
std::cout << " aresetn = " << top->aresetn << std::endl;
std::cout << " ctrl_token = " << top->ctrl_token << std::endl;
std::cout << " vlan_drop_flags = " << top->vlan_drop_flags << std::endl;
std::cout << std::endl;
std::cout << " s_axis_tdata = { ";
for (size_t i = 0;
i < sizeof(top->s_axis_tdata) / sizeof(top->s_axis_tdata[0]);
i++) {
std::cout << top->s_axis_tdata[i] << " ";
}
std::cout << "}" << std::endl;
std::cout << " s_axis_tkeep = " << top->s_axis_tkeep << std::endl;
std::cout << " s_axis_tuser = { ";
for (size_t i = 0;
i < sizeof(top->s_axis_tuser) / sizeof(top->s_axis_tuser[0]);
i++) {
std::cout << top->s_axis_tuser[i] << " ";
}
std::cout << "}" << std::endl;
std::cout << " s_axis_tvalid = " << top->s_axis_tvalid << std::endl;
std::cout << " s_axis_tready = " << top->s_axis_tready << std::endl;
std::cout << " s_axis_tlast = " << top->s_axis_tlast << std::endl;
std::cout << std::endl;
std::cout << " m_axis_tdata = { ";
for (size_t i = 0;
i < sizeof(top->m_axis_tdata) / sizeof(top->m_axis_tdata[0]);
i++) {
std::cout << top->m_axis_tdata[i] << " ";
}
std::cout << "}" << std::endl;
std::cout << " m_axis_tkeep = " << top->m_axis_tkeep << std::endl;
std::cout << " m_axis_tuser = { ";
for (size_t i = 0;
i < sizeof(top->m_axis_tuser) / sizeof(top->m_axis_tuser[0]);
i++) {
std::cout << top->m_axis_tuser[i] << " ";
}
std::cout << "}" << std::endl;
std::cout << " m_axis_tvalid = " << top->m_axis_tvalid << std::endl;
std::cout << " m_axis_tready = " << top->m_axis_tready << std::endl;
std::cout << " m_axis_tlast = " << top->m_axis_tlast << std::endl;
}
class EthernetTx {
protected:
Vrmt_wrapper &top;
uint8_t packet_buf[MAX_PKT_SIZE];
size_t packet_len;
public:
explicit EthernetTx(Vrmt_wrapper &top_) : top(top_), packet_len(0) {
}
void packet_done(uint16_t port_id) {
if (port_id >= ports.size()) {
#ifdef ETH_DEBUG
std::cerr << "EthernetTx: invalid port set (" << port_id << "), setting to 0" << std::endl;
#endif
port_id = 0;
}
ports[port_id]->TxPacket(packet_buf, packet_len, main_time);
#ifdef ETH_DEBUG
std::cerr << main_time << " EthernetTx: packet len=" << std::hex
<< packet_len << " port=" << port_id << " ";
for (size_t i = 0; i < packet_len; i++) {
std::cerr << (unsigned)packet_buf[i] << " ";
}
std::cerr << std::endl;
#endif
}
void step() {
top.m_axis_tready = 1;
if (top.m_axis_tvalid) {
/* iterate over all bytes on the bus */
uint8_t *txbus = (uint8_t *) &top.m_axis_tdata;
for (size_t i = 0; i < sizeof(top.m_axis_tdata); i++) {
if ((top.m_axis_tkeep & (1ULL << i)) != 0) {
assert(packet_len < 2048);
packet_buf[packet_len++] = txbus[i];
}
}
if (top.m_axis_tlast) {
packet_done((top.m_axis_tuser[0] >> 24) & 0xff);
packet_len = 0;
}
}
}
};
class EthernetRx {
protected:
Vrmt_wrapper &top;
static const size_t FIFO_SIZE = 32;
uint16_t fifo_ports[FIFO_SIZE];
uint8_t fifo_bufs[FIFO_SIZE][MAX_PKT_SIZE];
size_t fifo_lens[FIFO_SIZE];
size_t fifo_pos_rd;
size_t fifo_pos_wr;
size_t packet_off;
public:
explicit EthernetRx(Vrmt_wrapper &top_)
: top(top_), fifo_pos_rd(0), fifo_pos_wr(0), packet_off(0) {
for (size_t i = 0; i < FIFO_SIZE; i++)
fifo_lens[i] = 0;
}
void packet_received(const void *data, size_t len, uint16_t port) {
if (fifo_lens[fifo_pos_wr] != 0) {
#ifdef ETH_DEBUG
std::cerr << "EthernetRx: dropping packet" << std::endl;
#endif
return;
}
memcpy(fifo_bufs[fifo_pos_wr], data, len);
fifo_lens[fifo_pos_wr] = len;
fifo_ports[fifo_pos_wr] = port;
#ifdef ETH_DEBUG
std::cout << main_time << " rx into " << fifo_pos_wr << std::endl;
std::cerr << main_time << " EthernetRx: packet len=" << std::hex << len
<< " ";
for (size_t i = 0; i < len; i++) {
std::cerr << (unsigned)fifo_bufs[fifo_pos_wr][i] << " ";
}
std::cerr << std::endl;
#endif
fifo_pos_wr = (fifo_pos_wr + 1) % FIFO_SIZE;
}
void step() {
if (fifo_lens[fifo_pos_rd] != 0) {
// we have data to send
if (packet_off != 0 && !top.s_axis_tready) {
// no ready signal, can't advance
#ifdef ETH_DEBUG
std::cerr << "eth rx: no ready " << fifo_pos_rd << " " << packet_off << std::endl;
#endif
} else if (packet_off == fifo_lens[fifo_pos_rd]) {
// done with packet
#ifdef ETH_DEBUG
std::cerr << main_time << " EthernetRx: finished packet" << std::endl;
#endif
top.s_axis_tvalid = 0;
top.s_axis_tlast = 0;
top.s_axis_tuser[0] = 0;
packet_off = 0;
fifo_lens[fifo_pos_rd] = 0;
fifo_pos_rd = (fifo_pos_rd + 1) % FIFO_SIZE;
} else {
// put out more packet data
#ifdef ETH_DEBUG
std::cerr << main_time << " EthernetRx: push flit " << packet_off
<< std::endl;
if (packet_off == 0)
std::cout << "rx from " << fifo_pos_rd << std::endl;
#endif
top.s_axis_tkeep = 0;
uint8_t *rdata = (uint8_t *) &top.s_axis_tdata;
size_t i;
/*if (packet_off == 0) {
dump_if(&top);
}*/
if (packet_off == 0)
top.s_axis_tuser[0] = fifo_lens[fifo_pos_rd] |
(((uint64_t) fifo_ports[fifo_pos_rd]) << 16) |
(((uint64_t) fifo_ports[fifo_pos_rd]) << 24);
else
top.s_axis_tuser[0] = 0;
for (i = 0; i < sizeof(top.s_axis_tdata) &&
packet_off < fifo_lens[fifo_pos_rd]; i++) {
rdata[i] = fifo_bufs[fifo_pos_rd][packet_off];
top.s_axis_tkeep |= (1ULL << i);
packet_off++;
}
top.s_axis_tvalid = 1;
top.s_axis_tlast = (packet_off == fifo_lens[fifo_pos_rd]);
}
// trace->dump(main_time);
} else {
// no data
top.s_axis_tuser[0] = 0;
top.s_axis_tvalid = 0;
top.s_axis_tlast = 0;
}
}
};
static void poll_ports() {
uint16_t p_id = 0;
for (auto port : ports) {
while (!exiting) {
const void *data;
size_t len;
enum Port::RxPollState ps = port->RxPacket(data, len, main_time);
if (ps == Port::kRxPollFail)
break;
if (ps == Port::kRxPollSuccess)
rxMAC->packet_received(data, len, p_id);
port->RxDone();
if (!synchronized)
break;
}
p_id++;
}
}
int main(int argc, char *argv[]) {
signal(SIGINT, sigint_handler);
signal(SIGUSR1, sigusr1_handler);
char *vargs[2] = {argv[0], NULL};
Verilated::commandArgs(1, vargs);
#ifdef TRACE_ENABLED
Verilated::traceEverOn(true);
#endif
Vrmt_wrapper *top = new Vrmt_wrapper;
/* execute reset */
reset_inputs(top);
top->aresetn = 0;
for (int i = 0; i < 16; i++) {
top->eval();
top->clk = !top->clk;
}
top->aresetn = 1;
dump_if(top);
if (argc <= 1) {
std::cerr << "no ports" << std::endl;
return EXIT_FAILURE;
}
for (int i = 1; i < argc; i++) {
NetPort *np = new NetPort();
if (!np->Connect(argv[i], synchronized)) {
std::cerr << "connecting to port " << argv[i] << " failed" << std::endl;
return EXIT_FAILURE;
}
ports.push_back(np);
}
txMAC = new EthernetTx(*top);
rxMAC = new EthernetRx(*top);
#ifdef TRACE_ENABLED
VerilatedFstC *trace = nullptr;
trace = new VerilatedFstC;
top->trace(trace, 99);
trace->open("debug.fst");
#endif
while (!exiting) {
// Sync all interfaces
for (auto port : ports)
port->Sync(main_time);
for (auto port : ports)
port->AdvanceEpoch(main_time);
poll_ports();
/* falling edge */
top->clk = !top->clk;
main_time += clock_period / 2;
top->eval();
#ifdef TRACE_ENABLED
trace->dump(main_time);
#endif
txMAC->step();
rxMAC->step();
/* rising edge */
top->clk = !top->clk;
main_time += clock_period / 2;
top->eval();
#ifdef TRACE_ENABLED
trace->dump(main_time);
#endif
}
#ifdef TRACE_ENABLED
trace->close();
#endif
dump_if(top);
return 0;
}
#ifndef NET_MENSHEN_PORTS_H_
#define NET_MENSHEN_PORTS_H_
#include <stdint.h>
#include <simbricks/proto/base.h>
#include <simbricks/proto/network.h>
extern "C" {
#include <simbricks/netif/netif.h>
}
extern uint64_t sync_period;
extern uint64_t eth_latency;
extern int sync_mode;
/** Abstract base switch port */
class Port {
public:
enum RxPollState {
kRxPollSuccess = 0,
kRxPollFail = 1,
kRxPollSync = 2,
};
virtual ~Port() = default;
virtual bool Connect(const char *path, int sync) = 0;
virtual bool IsSync() = 0;
virtual void Sync(uint64_t cur_ts) = 0;
virtual void AdvanceEpoch(uint64_t cur_ts) = 0;
virtual uint64_t NextTimestamp() = 0;
virtual enum RxPollState RxPacket(
const void *& data, size_t &len, uint64_t cur_ts) = 0;
virtual void RxDone() = 0;
virtual bool TxPacket(const void *data, size_t len, uint64_t cur_ts) = 0;
};
/** Normal network switch port (conneting to a NIC) */
class NetPort : public Port {
protected:
struct SimbricksNetIf netif_;
volatile union SimbricksProtoNetD2N *rx_;
int sync_;
public:
NetPort() : rx_(nullptr), sync_(0) {
memset(&netif_, 0, sizeof(netif_));
}
NetPort(const NetPort &other) : netif_(other.netif_), rx_(other.rx_),
sync_(other.sync_) {}
virtual bool Connect(const char *path, int sync) override {
sync_ = sync;
return SimbricksNetIfInit(&netif_, path, &sync_) == 0;
}
virtual bool IsSync() override {
return sync_;
}
virtual void Sync(uint64_t cur_ts) override {
while (SimbricksNetIfN2DSync(&netif_, cur_ts, eth_latency, sync_period,
sync_mode));
}
virtual void AdvanceEpoch(uint64_t cur_ts) override {
SimbricksNetIfAdvanceEpoch(cur_ts, sync_period, sync_mode);
}
virtual uint64_t NextTimestamp() override {
return SimbricksNetIfD2NTimestamp(&netif_);
}
virtual enum RxPollState RxPacket(
const void *& data, size_t &len, uint64_t cur_ts) override {
assert(rx_ == nullptr);
rx_ = SimbricksNetIfD2NPoll(&netif_, cur_ts);
if (!rx_)
return kRxPollFail;
uint8_t type = rx_->dummy.own_type & SIMBRICKS_PROTO_NET_D2N_MSG_MASK;
if (type == SIMBRICKS_PROTO_NET_D2N_MSG_SEND) {
data = (const void *)rx_->send.data;
len = rx_->send.len;
return kRxPollSuccess;
} else if (type == SIMBRICKS_PROTO_NET_D2N_MSG_SYNC) {
return kRxPollSync;
} else {
fprintf(stderr, "switch_pkt: unsupported type=%u\n", type);
abort();
}
}
virtual void RxDone() override {
assert(rx_ != nullptr);
SimbricksNetIfD2NDone(&netif_, rx_);
rx_ = nullptr;
}
virtual bool TxPacket(
const void *data, size_t len, uint64_t cur_ts) override {
volatile union SimbricksProtoNetN2D *msg_to =
SimbricksNetIfN2DAlloc(&netif_, cur_ts, eth_latency);
if (!msg_to && !sync_) {
return false;
} else if (!msg_to && sync_) {
while (!msg_to)
msg_to = SimbricksNetIfN2DAlloc(&netif_, cur_ts, eth_latency);
}
volatile struct SimbricksProtoNetN2DRecv *rx;
rx = &msg_to->recv;
rx->len = len;
rx->port = 0;
memcpy((void *)rx->data, data, len);
// WMB();
rx->own_type =
SIMBRICKS_PROTO_NET_N2D_MSG_RECV | SIMBRICKS_PROTO_NET_N2D_OWN_DEV;
return true;
}
};
#endif // NET_MENSHEN_PORTS_H_
\ No newline at end of file
This diff is collapsed.
`timescale 1ns / 1ps
module alu_1 #(
parameter STAGE_ID = 0,
parameter ACTION_LEN = 25,
parameter DATA_WIDTH = 48 //data width of the ALU
)
(
input clk,
input rst_n,
//input from sub_action
input [ACTION_LEN-1:0] action_in,
input action_valid,
input [DATA_WIDTH-1:0] operand_1_in,
input [DATA_WIDTH-1:0] operand_2_in,
//output to form PHV
output reg [DATA_WIDTH-1:0] container_out,
output reg container_out_valid
);
/*
4 operations to support:
1,2. add/sub: 0001/0010
extract 2 operands from pkt header, add(sub) and write back.
3,4. addi/subi: 0011/0100
extract op1 from pkt header, op2 from action, add(sub) and write back.
*/
localparam IDLE_S=3'd0,
WAIT1_S=3'd1,
WAIT2_S=3'd2,
WAIT3_S=3'd3,
OUTPUT_S=3'd4;
reg [2:0] state, state_next;
reg [DATA_WIDTH-1:0] container_out_r;
reg container_out_valid_next;
always @(*) begin
state_next = state;
container_out_r = container_out;
container_out_valid_next = 0;
case (state)
IDLE_S: begin
if (action_valid) begin
state_next = OUTPUT_S;
case(action_in[24:21])
4'b0001, 4'b1001: begin
container_out_r = operand_1_in + operand_2_in;
end
4'b0010, 4'b1010: begin
container_out_r = operand_1_in - operand_2_in;
end
4'b1110: begin
container_out_r = operand_2_in;
end
//if its an empty (default) action
default: begin
container_out_r = operand_1_in;
end
endcase
end
end
WAIT1_S: begin
// empty cycle
state_next = WAIT2_S;
end
WAIT2_S: begin
state_next = WAIT3_S;
end
WAIT3_S: begin
state_next = OUTPUT_S;
end
OUTPUT_S: begin
container_out_valid_next = 1;
state_next = IDLE_S;
end
endcase
end
always @(posedge clk or negedge rst_n) begin
if (~rst_n) begin
container_out <= 0;
container_out_valid <= 0;
state <= IDLE_S;
end
else begin
state <= state_next;
container_out_valid <= container_out_valid_next;
container_out <= container_out_r;
end
end
endmodule
`timescale 1ns / 1ps
module alu_2 #(
parameter STAGE_ID = 0,
parameter ACTION_LEN = 25,
parameter DATA_WIDTH = 32, //data width of the ALU
parameter ACTION_ID = 3,
parameter C_S_AXIS_DATA_WIDTH = 512,
parameter C_S_AXIS_TUSER_WIDTH = 128
)
(
input clk,
input rst_n,
//input from sub_action
input [ACTION_LEN-1:0] action_in,
input action_valid,
input [DATA_WIDTH-1:0] operand_1_in,
input [DATA_WIDTH-1:0] operand_2_in,
input [DATA_WIDTH-1:0] operand_3_in,
output reg ready_out,
input [15:0] page_tbl_out,
input page_tbl_out_valid,
//output to form PHV
output [DATA_WIDTH-1:0] container_out_w,
output reg container_out_valid,
input ready_in
);
reg [3:0] action_type, action_type_next;
//regs for RAM access
reg store_en, store_en_next;
reg [4:0] store_addr, store_addr_next;
wire [31:0] store_din_w;
reg [31:0] store_din, store_din_next;
wire [31:0] load_data;
wire [4:0] load_addr;
// reg [4:0] load_addr, load_addr_next;
reg [2:0] alu_state, alu_state_next;
reg [DATA_WIDTH-1:0] container_out, container_out_next;
reg container_out_valid_next;
//regs/wires for isolation
wire [7:0] base_addr;
wire [7:0] addr_len;
assign {addr_len, base_addr} = page_tbl_out;
reg overflow, overflow_next;
reg ready_out_next;
/********intermediate variables declared here********/
//support tenant isolation
// assign load_addr = store_addr[4:0] + base_addr;
assign load_addr = operand_2_in[4:0] + base_addr;
assign store_din_w = (action_type==4'b1000)?store_din:
((action_type==4'b0111)?(load_data+1):0);
assign container_out_w = (action_type==4'b1011)?load_data:
(action_type==4'b0111)?(load_data+1):
container_out;
/*
7 operations to support:
1,2. add/sub: 0001/0010
extract 2 operands from pkt header, add(sub) and write back.
3,4. addi/subi: 1001/1010
extract op1 from pkt header, op2 from action, add(sub) and write back.
5: load: 0101
load data from RAM, write to pkt header according to addr in action.
6. store: 0110
read data from pkt header, write to ram according to addr in action.
7. loadd: 0111
load data from RAM, increment by 1 write it to container, and write it
back to the RAM.
8. set: 1110
set to an immediate value
*/
localparam IDLE_S = 3'd0,
EMPTY1_S = 3'd1,
OB_ADDR_S = 3'd2,
EMPTY2_S = 3'd3,
OUTPUT_S = 3'd4,
HALT_S = 3'd5;
always @(*) begin
alu_state_next = alu_state;
action_type_next = action_type;
container_out_next = container_out;
store_addr_next = store_addr;
store_din_next = store_din;
store_en_next = 0;
// load_addr_next = load_addr;
overflow_next = overflow;
container_out_valid_next = 0;
ready_out_next = ready_out;
case (alu_state)
IDLE_S: begin
if (action_valid) begin
action_type_next = action_in[24:21];
overflow_next = 0;
alu_state_next = EMPTY1_S;
ready_out_next = 1'b0;
case(action_in[24:21])
//add/addi ops
4'b0001, 4'b1001: begin
container_out_next = operand_1_in + operand_2_in;
end
//sub/subi ops
4'b0010, 4'b1010: begin
container_out_next = operand_1_in - operand_2_in;
end
//store op (interact with RAM)
4'b1000: begin
container_out_next = operand_3_in;
//store_en_r = 1;
store_addr_next = operand_2_in[4:0];
store_din_next = operand_1_in;
end
// load op (interact with RAM)
4'b1011: begin
container_out_next = operand_3_in;
end
// loadd op
4'b0111: begin
// do nothing now
//checkme
container_out_next = operand_3_in;
store_addr_next = operand_2_in[4:0];
end
// set operation
4'b1110: begin
container_out_next = operand_2_in;
end
//cannot go back to IDLE since this
//might be a legal action.
default: begin
container_out_next = operand_3_in;
end
endcase
//ok, if its `load` op, needs to check overflow.
if(action_in[24:21] == 4'b1011 || action_in[24:21] == 4'b0111 || action_in[24:21] == 4'b1000) begin
if(operand_2_in[4:0] > addr_len) begin
overflow_next = 1'b1;
end
else begin
overflow_next = 1'b0;
//its the right time to write for `store`
if(action_in[24:21] == 4'b1000 || action_in[24:21] == 4'b0111) begin
store_addr_next = base_addr + operand_2_in[4:0];
//store_din_r = operand_1_in;
//store_en_next = 1'b1;
end
end
end
//
// load_addr_next = operand_2_in[4:0] + base_addr;
alu_state_next = EMPTY2_S;
end
end
EMPTY2_S: begin
//wait for the result of RAM
if (ready_in) begin
alu_state_next = IDLE_S;
container_out_valid_next = 1;
ready_out_next = 1;
// action_type
if ((action_type==4'b1000 || action_type==4'b0111) &&
overflow==0) begin
store_en_next = 1'b1;
end
end
else begin
alu_state_next = HALT_S;
end
end
HALT_S: begin
if (ready_in) begin
alu_state_next = IDLE_S;
container_out_valid_next = 1;
ready_out_next = 1;
// action_type
if ((action_type==4'b1000 || action_type==4'b0111) &&
overflow==0) begin
store_en_next = 1'b1;
end
end
end
endcase
end
always @(posedge clk) begin
if (~rst_n) begin
alu_state <= IDLE_S;
action_type <= 0;
container_out <= 0;
container_out_valid <= 0;
store_en <= 0;
store_addr <= 0;
store_din <= 0;
// load_addr <= 0;
overflow <= 0;
ready_out <= 1'b1;
end
else begin
alu_state <= alu_state_next;
action_type <= action_type_next;
container_out <= container_out_next;
container_out_valid <= container_out_valid_next;
store_en <= store_en_next;
store_addr <= store_addr_next;
store_din <= store_din_next;
// load_addr <= load_addr_next;
overflow <= overflow_next;
ready_out <= ready_out_next;
end
end
blk_mem_gen_0
data_ram_32w_32d
(
//store-related
.addra(store_addr),
.clka(clk),
.dina(store_din_w),
.ena(1'b1),
.wea(store_en),
//load-related
.addrb(load_addr),
.clkb(clk),
.doutb(load_data),
.enb(1'b1)
);
endmodule
`timescale 1ns / 1ps
module alu_3 #(
parameter STAGE_ID = 0,
parameter ACTION_LEN = 25,
parameter META_LEN = 256
)(
input clk,
input rst_n,
//the input data shall be metadata & com_ins
input [META_LEN-1:0] comp_meta_data_in,
input comp_meta_data_valid_in,
input [ACTION_LEN-1:0] action_in,
input action_valid_in,
//output is the modified metadata plus comp_ins
output reg [META_LEN-1:0] comp_meta_data_out,
output reg comp_meta_data_valid_out
);
//need delay for one cycle before the result pushed out
/*
action format:
[24:20]: opcode;
[19:12]: dst_port;
[11]: discard_flag;
[10:5]: next_table_id;
[4:0]: reserverd_bit;
*/
/*
metadata fields that are related:
TODO: next table id is not supported yet.
[255:250]: next_table_id;
[249:129]: reservered for other use;
[128]: discard_field;
[127:0]: copied from NetFPGA's md;
*/
localparam IDLE_S=3'd0,
WAIT1_S=3'd1,
WAIT2_S=3'd2,
WAIT3_S=3'd3,
OUTPUT_S=3'd4;
reg [2:0] state, state_next;
reg [META_LEN-1:0] comp_meta_data_out_r;
reg comp_meta_data_valid_next;
always @(*) begin
state_next = state;
comp_meta_data_out_r = comp_meta_data_out;
comp_meta_data_valid_next = 0;
case (state)
IDLE_S: begin
if (action_valid_in) begin
state_next = OUTPUT_S;
case(action_in[24:21])
4'b1100: begin // dst_port
comp_meta_data_out_r[255:32] = {action_in[10:5],comp_meta_data_in[249:32]};
comp_meta_data_out_r[31:24] = action_in[20:13];
comp_meta_data_out_r[23:0] = comp_meta_data_in[23:0];
end
4'b1101: begin // discard
comp_meta_data_out_r[255:129] = {action_in[10:5],comp_meta_data_in[249:129]};
comp_meta_data_out_r[128] = action_in[12];
comp_meta_data_out_r[127:0] = comp_meta_data_in[127:0];
end
default: begin
comp_meta_data_out_r = comp_meta_data_in;
end
endcase
end
end
WAIT1_S: begin
// empty cycle
state_next = WAIT2_S;
end
WAIT2_S: begin
state_next = WAIT3_S;
end
WAIT3_S: begin
state_next = OUTPUT_S;
end
OUTPUT_S: begin
comp_meta_data_valid_next = 1;
state_next = IDLE_S;
end
endcase
end
always @(posedge clk) begin
if (~rst_n) begin
comp_meta_data_out <= 0;
comp_meta_data_valid_out <= 0;
state <= IDLE_S;
end
else begin
state <= state_next;
comp_meta_data_out <= comp_meta_data_out_r;
comp_meta_data_valid_out <= comp_meta_data_valid_next;
end
end
endmodule
Markdown is supported
0% or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment