Skip to content

Commit

Permalink
Add user field between memory and caches (openhwgroup#857)
Browse files Browse the repository at this point in the history
* wt_dcche_wbuffer.sv: fix assert

Signed-off-by: Jean-Roch Coulon <[email protected]>

* Many files: Add user between memories and cva6

Signed-off-by: Jean-Roch Coulon <[email protected]>

* Update std_nbdcache.sv

Make wb cache work

* Update setup.sh

Co-authored-by: Guillaume Chauvon <[email protected]>
  • Loading branch information
JeanRochCoulon and Gchauvon authored Apr 20, 2022
1 parent 0b61544 commit 56f8c9f
Show file tree
Hide file tree
Showing 25 changed files with 269 additions and 41 deletions.
2 changes: 1 addition & 1 deletion ci/setup.sh
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ export CPLUS_INCLUDE_PATH=$RISCV/include
echo 'deb http://download.opensuse.org/repositories/home:/phiwag:/edatools/xUbuntu_20.04/ /' | sudo tee /etc/apt/sources.list.d/home:phiwag:edatools.list
curl -fsSL https://download.opensuse.org/repositories/home:phiwag:edatools/xUbuntu_20.04/Release.key | gpg --dearmor | sudo tee /etc/apt/trusted.gpg.d/home_phiwag_edatools.gpg > /dev/null
sudo apt update
sudo apt install verilator-4.100 device-tree-compiler
sudo apt install verilator-4.110 device-tree-compiler

ci/make-tmp.sh
sudo mkdir -p $RISCV && sudo chmod 777 $RISCV
Expand Down
45 changes: 45 additions & 0 deletions common/local/util/sram.sv
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@

module sram #(
parameter DATA_WIDTH = 64,
parameter USER_WIDTH = 1,
parameter USER_EN = 0,
parameter NUM_WORDS = 1024,
parameter OUT_REGS = 0, // enables output registers in FPGA macro (read lat = 2)
parameter DROMAJO_RAM = 0
Expand All @@ -29,26 +31,34 @@ module sram #(
input logic req_i,
input logic we_i,
input logic [$clog2(NUM_WORDS)-1:0] addr_i,
input logic [USER_WIDTH-1:0] wuser_i,
input logic [DATA_WIDTH-1:0] wdata_i,
input logic [(DATA_WIDTH+7)/8-1:0] be_i,
output logic [USER_WIDTH-1:0] ruser_o,
output logic [DATA_WIDTH-1:0] rdata_o
);

localparam DATA_WIDTH_ALIGNED = ((DATA_WIDTH+63)/64)*64;
localparam USER_WIDTH_ALIGNED = DATA_WIDTH_ALIGNED; // To be fine tuned to reduce memory size
localparam BE_WIDTH_ALIGNED = (((DATA_WIDTH+7)/8+7)/8)*8;

logic [DATA_WIDTH_ALIGNED-1:0] wdata_aligned;
logic [USER_WIDTH_ALIGNED-1:0] wuser_aligned;
logic [BE_WIDTH_ALIGNED-1:0] be_aligned;
logic [DATA_WIDTH_ALIGNED-1:0] rdata_aligned;
logic [USER_WIDTH_ALIGNED-1:0] ruser_aligned;

// align to 64 bits for inferrable macro below
always_comb begin : p_align
wdata_aligned ='0;
wuser_aligned ='0;
be_aligned ='0;
wdata_aligned[DATA_WIDTH-1:0] = wdata_i;
wuser_aligned[USER_WIDTH-1:0] = wuser_i;
be_aligned[BE_WIDTH_ALIGNED-1:0] = be_i;

rdata_o = rdata_aligned[DATA_WIDTH-1:0];
ruser_o = ruser_aligned[USER_WIDTH-1:0];
end

for (genvar k = 0; k<(DATA_WIDTH+63)/64; k++) begin : gen_cut
Expand All @@ -67,6 +77,22 @@ end
.Addr_DI ( addr_i ),
.RdData_DO ( rdata_aligned[k*64 +: 64] )
);
if (USER_EN) begin : gen_dromajo_user
dromajo_ram #(
.ADDR_WIDTH($clog2(NUM_WORDS)),
.DATA_DEPTH(NUM_WORDS),
.OUT_REGS (0)
) i_ram_user (
.Clk_CI ( clk_i ),
.Rst_RBI ( rst_ni ),
.CSel_SI ( req_i ),
.WrEn_SI ( we_i ),
.BEn_SI ( be_aligned[k*8 +: 8] ),
.WrData_DI ( wuser_aligned[k*64 +: 64] ),
.Addr_DI ( addr_i ),
.RdData_DO ( ruser_aligned[k*64 +: 64] )
);
end
end else begin : gen_mem
// unused byte-enable segments (8bits) are culled by the tool
SyncSpRamBeNx64 #(
Expand All @@ -86,6 +112,25 @@ end
.Addr_DI ( addr_i ),
.RdData_DO ( rdata_aligned[k*64 +: 64] )
);
if (USER_EN) begin : gen_mem_user
SyncSpRamBeNx64 #(
.ADDR_WIDTH($clog2(NUM_WORDS)),
.DATA_DEPTH(NUM_WORDS),
.OUT_REGS (0),
// this initializes the memory with 0es. adjust to taste...
// 0: no init, 1: zero init, 2: random init, 3: deadbeef init
.SIM_INIT (1)
) i_ram_user (
.Clk_CI ( clk_i ),
.Rst_RBI ( rst_ni ),
.CSel_SI ( req_i ),
.WrEn_SI ( we_i ),
.BEn_SI ( be_aligned[k*8 +: 8] ),
.WrData_DI ( wuser_aligned[k*64 +: 64] ),
.Addr_DI ( addr_i ),
.RdData_DO ( ruser_aligned[k*64 +: 64] )
);
end
end
end
endmodule : sram
5 changes: 5 additions & 0 deletions core/axi_shim.sv
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@


module axi_shim #(
parameter int unsigned AxiUserWidth = 64, // data width in dwords, this is also the maximum burst length, must be >=2
parameter int unsigned AxiNumWords = 4, // data width in dwords, this is also the maximum burst length, must be >=2
parameter int unsigned AxiIdWidth = 4 // stick to the spec
) (
Expand All @@ -39,13 +40,15 @@ module axi_shim #(
output logic rd_last_o,
output logic rd_valid_o,
output logic [63:0] rd_data_o,
output logic [AxiUserWidth-1:0] rd_user_o,
output logic [AxiIdWidth-1:0] rd_id_o,
output logic rd_exokay_o, // indicates whether exclusive tx succeeded
// write channel
input logic wr_req_i,
output logic wr_gnt_o,
input logic [63:0] wr_addr_i,
input logic [AxiNumWords-1:0][63:0] wr_data_i,
input logic [AxiNumWords-1:0][AxiUserWidth-1:0] wr_user_i,
input logic [AxiNumWords-1:0][7:0] wr_be_i,
input logic [$clog2(AxiNumWords)-1:0] wr_blen_i, // axi convention: LEN-1
input logic [1:0] wr_size_i,
Expand Down Expand Up @@ -91,6 +94,7 @@ module axi_shim #(
assign axi_req_o.aw.atop = wr_atop_i;
// data
assign axi_req_o.w.data = wr_data_i[wr_cnt_q];
assign axi_req_o.w.user = wr_user_i[wr_cnt_q];
assign axi_req_o.w.strb = wr_be_i[wr_cnt_q];
assign axi_req_o.w.last = wr_cnt_done;

Expand Down Expand Up @@ -252,6 +256,7 @@ module axi_shim #(
// return path
assign axi_req_o.r_ready = rd_rdy_i;
assign rd_data_o = axi_resp_i.r.data;
assign rd_user_o = axi_resp_i.r.user;
assign rd_last_o = axi_resp_i.r.last;
assign rd_valid_o = axi_resp_i.r_valid;
assign rd_id_o = axi_resp_i.r.id;
Expand Down
20 changes: 18 additions & 2 deletions core/cache_subsystem/cva6_icache.sv
Original file line number Diff line number Diff line change
Expand Up @@ -81,7 +81,9 @@ module cva6_icache import ariane_pkg::*; import wt_cache_pkg::*; #(
logic [ICACHE_TAG_WIDTH-1:0] cl_tag_d, cl_tag_q; // this is the cache tag
logic [ICACHE_TAG_WIDTH-1:0] cl_tag_rdata [ICACHE_SET_ASSOC-1:0]; // these are the tags coming from the tagmem
logic [ICACHE_LINE_WIDTH-1:0] cl_rdata [ICACHE_SET_ASSOC-1:0]; // these are the cachelines coming from the cache
logic [ICACHE_USER_LINE_WIDTH-1:0] cl_ruser[ICACHE_SET_ASSOC-1:0]; // these are the cachelines coming from the user cache
logic [ICACHE_SET_ASSOC-1:0][FETCH_WIDTH-1:0]cl_sel; // selected word from each cacheline
logic [ICACHE_SET_ASSOC-1:0][FETCH_USER_WIDTH-1:0] cl_user; // selected word from each cacheline
logic [ICACHE_SET_ASSOC-1:0] vld_req; // bit enable for valid regs
logic vld_we; // valid bits write enable
logic [ICACHE_SET_ASSOC-1:0] vld_wdata; // valid bits to write
Expand Down Expand Up @@ -389,6 +391,7 @@ end else begin : gen_piton_offset
for (genvar i=0;i<ICACHE_SET_ASSOC;i++) begin : gen_tag_cmpsel
assign cl_hit[i] = (cl_tag_rdata[i] == cl_tag_d) & vld_rdata[i];
assign cl_sel[i] = cl_rdata[i][{cl_offset_q,3'b0} +: FETCH_WIDTH];
assign cl_user[i] = cl_ruser[i][{cl_offset_q,3'b0} +: FETCH_USER_WIDTH];
end


Expand All @@ -400,8 +403,15 @@ end else begin : gen_piton_offset
.empty_o ( )
);

assign dreq_o.data = (cmp_en_q) ? cl_sel[hit_idx] :
mem_rtrn_i.data[{cl_offset_q,3'b0} +: FETCH_WIDTH];
always_comb begin
if (cmp_en_q) begin
dreq_o.data = cl_sel[hit_idx];
dreq_o.user = cl_user[hit_idx];
end else begin
dreq_o.data = mem_rtrn_i.data[{cl_offset_q,3'b0} +: FETCH_WIDTH];
dreq_o.user = mem_rtrn_i.user[{cl_offset_q,3'b0} +: FETCH_USER_WIDTH];
end
end

///////////////////////////////////////////////////////
// memory arrays and regs
Expand All @@ -424,8 +434,10 @@ end else begin : gen_piton_offset
.addr_i ( vld_addr ),
// we can always use the saved tag here since it takes a
// couple of cycle until we write to the cache upon a miss
.wuser_i ( '0 ),
.wdata_i ( {vld_wdata[i], cl_tag_q} ),
.be_i ( '1 ),
.ruser_o ( ),
.rdata_o ( cl_tag_valid_rdata[i] )
);

Expand All @@ -434,16 +446,20 @@ end else begin : gen_piton_offset

// Data RAM
sram #(
.USER_WIDTH ( ICACHE_USER_LINE_WIDTH ),
.DATA_WIDTH ( ICACHE_LINE_WIDTH ),
.USER_EN ( ariane_pkg::FETCH_USER_EN ),
.NUM_WORDS ( ICACHE_NUM_WORDS )
) data_sram (
.clk_i ( clk_i ),
.rst_ni ( rst_ni ),
.req_i ( cl_req[i] ),
.we_i ( cl_we ),
.addr_i ( cl_index ),
.wuser_i ( mem_rtrn_i.user ),
.wdata_i ( mem_rtrn_i.data ),
.be_i ( '1 ),
.ruser_o ( cl_ruser[i] ),
.rdata_o ( cl_rdata[i] )
);
end
Expand Down
3 changes: 3 additions & 0 deletions core/cache_subsystem/cva6_icache_axi_wrapper.sv
Original file line number Diff line number Diff line change
Expand Up @@ -118,6 +118,7 @@ module cva6_icache_axi_wrapper import ariane_pkg::*; import wt_cache_pkg::*; #(
// AXI shim
// --------
axi_shim #(
.AxiUserWidth ( AXI_USER_WIDTH ),
.AxiNumWords ( AxiNumWords ),
.AxiIdWidth ( $size(axi_resp_i.r.id) )
) i_axi_shim (
Expand All @@ -134,12 +135,14 @@ module cva6_icache_axi_wrapper import ariane_pkg::*; import wt_cache_pkg::*; #(
.rd_last_o ( axi_rd_last ),
.rd_valid_o ( axi_rd_valid ),
.rd_data_o ( axi_rd_data ),
.rd_user_o ( ),
.rd_id_o ( axi_rd_id_out ),
.rd_exokay_o ( axi_rd_exokay ),
.wr_req_i ( '0 ),
.wr_gnt_o ( ),
.wr_addr_i ( '0 ),
.wr_data_i ( '0 ),
.wr_user_i ( '0 ),
.wr_be_i ( '0 ),
.wr_blen_i ( '0 ),
.wr_size_i ( '0 ),
Expand Down
7 changes: 7 additions & 0 deletions core/cache_subsystem/std_nbdcache.sv
Original file line number Diff line number Diff line change
Expand Up @@ -172,8 +172,10 @@ import std_cache_pkg::*;
.rst_ni ( rst_ni ),
.we_i ( we_ram ),
.addr_i ( addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] ),
.wuser_i ( '0 ),
.wdata_i ( wdata_ram.data ),
.be_i ( be_ram.data ),
.ruser_o ( ),
.rdata_o ( rdata_ram[i].data ),
.*
);
Expand All @@ -186,8 +188,10 @@ import std_cache_pkg::*;
.rst_ni ( rst_ni ),
.we_i ( we_ram ),
.addr_i ( addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] ),
.wuser_i ( '0 ),
.wdata_i ( wdata_ram.tag ),
.be_i ( be_ram.tag ),
.ruser_o ( ),
.rdata_o ( rdata_ram[i].tag ),
.*
);
Expand All @@ -211,6 +215,7 @@ import std_cache_pkg::*;
end

sram #(
.USER_WIDTH ( 1 ),
.DATA_WIDTH ( 4*DCACHE_DIRTY_WIDTH ),
.NUM_WORDS ( DCACHE_NUM_WORDS )
) valid_dirty_sram (
Expand All @@ -219,8 +224,10 @@ import std_cache_pkg::*;
.req_i ( |req_ram ),
.we_i ( we_ram ),
.addr_i ( addr_ram[DCACHE_INDEX_WIDTH-1:DCACHE_BYTE_OFFSET] ),
.wuser_i ( '0 ),
.wdata_i ( dirty_wdata ),
.be_i ( be_ram.vldrty ),
.ruser_o ( ),
.rdata_o ( dirty_rdata )
);

Expand Down
Loading

0 comments on commit 56f8c9f

Please sign in to comment.