diff --git a/bhv/cv32e41p_apu_tracer.sv b/bhv/cv32e41p_apu_tracer.sv index dee4ef5..026678a 100644 --- a/bhv/cv32e41p_apu_tracer.sv +++ b/bhv/cv32e41p_apu_tracer.sv @@ -52,7 +52,7 @@ module cv32e41p_apu_tracer ( // open/close output file for writing initial begin - wait(rst_n == 1'b1); + wait (rst_n == 1'b1); $sformat(fn, "apu_trace_core_%h.log", hart_id_i); $display("[APU_TRACER] Output filename is: %s", fn); apu_trace = $fopen(fn, "w"); diff --git a/bhv/cv32e41p_instr_trace.svh b/bhv/cv32e41p_instr_trace.svh index 165cd3e..e6203e1 100644 --- a/bhv/cv32e41p_instr_trace.svh +++ b/bhv/cv32e41p_instr_trace.svh @@ -48,20 +48,20 @@ typedef struct { } mem_acc_t; class instr_trace_t; - time simtime; - int cycles; - logic [31:0] pc; - logic [31:0] instr; - bit compressed; - bit wb_bypass; - bit misaligned; - bit retire; - bit ebreak; - string str; - reg_t regs_read[$]; - reg_t regs_write[$]; - mem_acc_t mem_access[$]; - logic retired; + time simtime; + int cycles; + logic [31:0] pc; + logic [31:0] instr; + bit compressed; + bit wb_bypass; + bit misaligned; + bit retire; + bit ebreak; + string str; + reg_t regs_read [$]; + reg_t regs_write [$]; + mem_acc_t mem_access [$]; + logic retired; function new(); str = ""; @@ -359,9 +359,8 @@ class instr_trace_t; regs_read.push_back('{rs1, rs1_value, 0}); regs_read.push_back('{rs2, rs2_value, 0}); regs_write.push_back('{rd, 'x, 0}); - str = $sformatf( - "%-16s x%0d, x%0d, x%0d, 0x%0d", mnemonic, rd, rs1, rs2, $unsigned(imm_s3_type[4:0]) - ); + str = $sformatf("%-16s x%0d, x%0d, x%0d, 0x%0d", mnemonic, rd, rs1, rs2, + $unsigned(imm_s3_type[4:0])); end endfunction // printAddNInstr @@ -389,9 +388,8 @@ class instr_trace_t; regs_read.push_back('{rs2, rs2_value, 0}); regs_read.push_back('{rs4, rs3_value, 0}); regs_write.push_back('{rd, 'x, 0}); - str = $sformatf( - "%-16s f%0d, f%0d, f%0d, f%0d", mnemonic, rd - 32, rs1 - 32, rs2 - 32, rs4 - 32 - ); + str = $sformatf("%-16s f%0d, f%0d, f%0d, f%0d", mnemonic, rd - 32, rs1 - 32, rs2 - 32, + rs4 - 32); end endfunction // printF3Instr diff --git a/bhv/cv32e41p_wrapper.sv b/bhv/cv32e41p_wrapper.sv index d6d6efb..cff5944 100644 --- a/bhv/cv32e41p_wrapper.sv +++ b/bhv/cv32e41p_wrapper.sv @@ -28,10 +28,14 @@ module cv32e41p_wrapper import cv32e41p_apu_core_pkg::*; #( - parameter PULP_XPULP = 0, // PULP ISA Extension (incl. custom CSRs and hardware loop, excl. p.elw) + parameter PULP_XPULP = 0, // PULP ISA Extension (incl. custom CSRs and hardware loop, excl. p.elw) parameter PULP_CLUSTER = 0, // PULP Cluster interface (incl. p.elw) parameter FPU = 0, // Floating Point Unit (interfaced via APU interface) parameter PULP_ZFINX = 0, // Float-in-General Purpose registers + parameter Zcea = 0, + parameter Zceb = 0, + parameter Zcec = 0, + parameter Zcee = 0, parameter NUM_MHPMCOUNTERS = 1 ) ( // Clock and Reset @@ -39,7 +43,7 @@ module cv32e41p_wrapper input logic rst_ni, input logic pulp_clock_en_i, // PULP clock enable (only used if PULP_CLUSTER = 1) - input logic scan_cg_en_i, // Enable all clock gates for testing + input logic scan_cg_en_i, // Enable all clock gates for testing // Core ID, Cluster ID, debug mode halt address and boot address are considered more or less static input logic [31:0] boot_addr_i, @@ -79,7 +83,7 @@ module cv32e41p_wrapper input logic [APU_NUSFLAGS_CPU-1:0] apu_flags_i, // Interrupt inputs - input logic [31:0] irq_i, // CLINT interrupts + CLINT extension interrupts + input logic [31:0] irq_i, // CLINT interrupts + CLINT extension interrupts output logic irq_ack_o, output logic [ 4:0] irq_id_o, @@ -144,7 +148,7 @@ module cv32e41p_wrapper .pc (core_i.id_stage_i.pc_id_i), .instr (core_i.id_stage_i.instr), .controller_state_i(core_i.id_stage_i.controller_i.ctrl_fsm_cs), - .compressed (core_i.id_stage_i.is_compressed_i), + .compressed (core_i.id_stage_i.is_compressed), .id_valid (core_i.id_stage_i.id_valid_o), .is_decoding (core_i.id_stage_i.is_decoding_o), .is_illegal (core_i.id_stage_i.illegal_insn_dec), @@ -204,6 +208,10 @@ module cv32e41p_wrapper .PULP_CLUSTER (PULP_CLUSTER), .FPU (FPU), .PULP_ZFINX (PULP_ZFINX), + .Zcea (Zcea), + .Zceb (Zceb), + .Zcec (Zcec), + .Zcee (Zcee), .NUM_MHPMCOUNTERS(NUM_MHPMCOUNTERS) ) core_i ( .* diff --git a/cv32e41p_manifest.flist b/cv32e41p_manifest.flist index 6be31a5..3e6b2fe 100644 --- a/cv32e41p_manifest.flist +++ b/cv32e41p_manifest.flist @@ -38,8 +38,7 @@ ${DESIGN_RTL_DIR}/cv32e41p_register_file_ff.sv ${DESIGN_RTL_DIR}/cv32e41p_load_store_unit.sv ${DESIGN_RTL_DIR}/cv32e41p_id_stage.sv ${DESIGN_RTL_DIR}/cv32e41p_aligner.sv -${DESIGN_RTL_DIR}/cv32e41p_decoder.sv -${DESIGN_RTL_DIR}/cv32e41p_compressed_decoder.sv +${DESIGN_RTL_DIR}/cv32e41p_merged_decoder.sv ${DESIGN_RTL_DIR}/cv32e41p_fifo.sv ${DESIGN_RTL_DIR}/cv32e41p_prefetch_buffer.sv ${DESIGN_RTL_DIR}/cv32e41p_hwloop_regs.sv diff --git a/rtl/cv32e41p_aligner.sv b/rtl/cv32e41p_aligner.sv index 9661e60..796bf3b 100644 --- a/rtl/cv32e41p_aligner.sv +++ b/rtl/cv32e41p_aligner.sv @@ -25,7 +25,7 @@ module cv32e41p_aligner ( input logic rst_n, input logic fetch_valid_i, - output logic aligner_ready_o, //prevents overwriting the fethced instruction + output logic aligner_ready_o, //prevents overwriting the fethced instruction input logic if_valid_i, @@ -34,7 +34,7 @@ module cv32e41p_aligner ( output logic instr_valid_o, input logic [31:0] branch_addr_i, - input logic branch_i, // Asserted if we are branching/jumping now + input logic branch_i, // Asserted if we are branching/jumping now input logic [31:0] hwlp_addr_i, input logic hwlp_update_pc_i, diff --git a/rtl/cv32e41p_alu.sv b/rtl/cv32e41p_alu.sv index 7cffefe..125d58a 100644 --- a/rtl/cv32e41p_alu.sv +++ b/rtl/cv32e41p_alu.sv @@ -263,36 +263,28 @@ module cv32e41p_alu // right shifts, we let the synthesizer optimize this logic [63:0] shift_op_a_32; - assign shift_op_a_32 = (operator_i == ALU_ROR) ? { - shift_op_a, shift_op_a - } : $signed( - {{32{shift_arithmetic & shift_op_a[31]}}, shift_op_a} - ); + assign shift_op_a_32 = (operator_i == ALU_ROR) ? {shift_op_a, shift_op_a} : $signed( + {{32{shift_arithmetic & shift_op_a[31]}}, shift_op_a} + ); always_comb begin case (vector_mode_i) VEC_MODE16: begin - shift_right_result[31:16] = $signed( - {shift_arithmetic & shift_op_a[31], shift_op_a[31:16]} - ) >>> shift_amt_int[19:16]; - shift_right_result[15:0] = $signed( - {shift_arithmetic & shift_op_a[15], shift_op_a[15:0]} - ) >>> shift_amt_int[3:0]; + shift_right_result[31:16] = $signed({shift_arithmetic & shift_op_a[31], + shift_op_a[31:16]}) >>> shift_amt_int[19:16]; + shift_right_result[15:0] = + $signed({shift_arithmetic & shift_op_a[15], shift_op_a[15:0]}) >>> shift_amt_int[3:0]; end VEC_MODE8: begin - shift_right_result[31:24] = $signed( - {shift_arithmetic & shift_op_a[31], shift_op_a[31:24]} - ) >>> shift_amt_int[26:24]; - shift_right_result[23:16] = $signed( - {shift_arithmetic & shift_op_a[23], shift_op_a[23:16]} - ) >>> shift_amt_int[18:16]; - shift_right_result[15:8] = $signed( - {shift_arithmetic & shift_op_a[15], shift_op_a[15:8]} - ) >>> shift_amt_int[10:8]; - shift_right_result[7:0] = $signed( - {shift_arithmetic & shift_op_a[7], shift_op_a[7:0]} - ) >>> shift_amt_int[2:0]; + shift_right_result[31:24] = $signed({shift_arithmetic & shift_op_a[31], + shift_op_a[31:24]}) >>> shift_amt_int[26:24]; + shift_right_result[23:16] = $signed({shift_arithmetic & shift_op_a[23], + shift_op_a[23:16]}) >>> shift_amt_int[18:16]; + shift_right_result[15:8] = + $signed({shift_arithmetic & shift_op_a[15], shift_op_a[15:8]}) >>> shift_amt_int[10:8]; + shift_right_result[7:0] = $signed({shift_arithmetic & shift_op_a[7], shift_op_a[7:0]}) >>> + shift_amt_int[2:0]; end default: // VEC_MODE32 diff --git a/rtl/cv32e41p_alu_div.sv b/rtl/cv32e41p_alu_div.sv index ae21802..1819edb 100644 --- a/rtl/cv32e41p_alu_div.sv +++ b/rtl/cv32e41p_alu_div.sv @@ -35,8 +35,8 @@ module cv32e41p_alu_div #( input logic [C_LOG_WIDTH-1:0] OpBShift_DI, input logic OpBIsZero_SI, // - input logic OpBSign_SI, // gate this to 0 in case of unsigned ops - input logic [ 1:0] OpCode_SI, // 0: udiv, 2: urem, 1: div, 3: rem + input logic OpBSign_SI, // gate this to 0 in case of unsigned ops + input logic [ 1:0] OpCode_SI, // 0: udiv, 2: urem, 1: div, 3: rem // handshake input logic InVld_SI, // output IF @@ -186,9 +186,9 @@ module cv32e41p_alu_div #( assign AReg_DN = (ARegEn_S) ? AddOut_D : AReg_DP; assign BReg_DN = (BRegEn_S) ? BMux_D : BReg_DP; - assign ResReg_DN = (LoadEn_S) ? '0 : (ResRegEn_S) ? { - ABComp_S, ResReg_DP[$high(ResReg_DP):1] - } : ResReg_DP; + assign ResReg_DN = (LoadEn_S) ? '0 : (ResRegEn_S) ? {ABComp_S, ResReg_DP[$high( + ResReg_DP + ):1]} : ResReg_DP; always_ff @(posedge Clk_CI or negedge Rst_RBI) begin : p_regs if (~Rst_RBI) begin diff --git a/rtl/cv32e41p_compressed_decoder.sv b/rtl/cv32e41p_compressed_decoder.sv deleted file mode 100644 index 44b5207..0000000 --- a/rtl/cv32e41p_compressed_decoder.sv +++ /dev/null @@ -1,596 +0,0 @@ -// Copyright 2018 ETH Zurich and University of Bologna. -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. - -//////////////////////////////////////////////////////////////////////////////// -// Engineer: Sven Stucki - svstucki@student.ethz.ch // -// // -// Additional contributions by: // -// Michael Gautschi - gautschi@iis.ee.ethz.ch // -// // -// Design Name: Compressed instruction decoder // -// Project Name: RI5CY // -// Language: SystemVerilog // -// // -// Description: Decodes RISC-V compressed instructions into their RV32 // -// equivalent. This module is fully combinatorial. // -// Float extensions added // -// // -//////////////////////////////////////////////////////////////////////////////// - -module cv32e41p_compressed_decoder #( - parameter FPU = 0 -) ( - input logic [31:0] instr_i, - output logic [31:0] instr_o, - output logic is_compressed_o, - output logic illegal_instr_o -); - - import cv32e41p_pkg::*; - - ////////////////////////////////////////////////////////////////////////////////////////////////////// - // ____ _ ____ _ // - // / ___|___ _ __ ___ _ __ _ __ ___ ___ ___ ___ __| | | _ \ ___ ___ ___ __| | ___ _ __ // - // | | / _ \| '_ ` _ \| '_ \| '__/ _ \/ __/ __|/ _ \/ _` | | | | |/ _ \/ __/ _ \ / _` |/ _ \ '__| // - // | |__| (_) | | | | | | |_) | | | __/\__ \__ \ __/ (_| | | |_| | __/ (_| (_) | (_| | __/ | // - // \____\___/|_| |_| |_| .__/|_| \___||___/___/\___|\__,_| |____/ \___|\___\___/ \__,_|\___|_| // - // |_| // - ////////////////////////////////////////////////////////////////////////////////////////////////////// - - always_comb begin - illegal_instr_o = 1'b0; - instr_o = '0; - - unique case (instr_i[1:0]) - // C0 - 2'b00: begin - unique case (instr_i[15:13]) - 3'b000: begin - // c.addi4spn -> addi rd', x2, imm - instr_o = { - 2'b0, - instr_i[10:7], - instr_i[12:11], - instr_i[5], - instr_i[6], - 2'b00, - 5'h02, - 3'b000, - 2'b01, - instr_i[4:2], - OPCODE_OPIMM - }; - if (instr_i[12:5] == 8'b0) illegal_instr_o = 1'b1; - end - - 3'b001: begin - // c.fld -> fld rd', imm(rs1') - if (FPU == 1) // instr_i[12:10]-> offset[5:3], instr_i[6:5]-> offset[7:6] - instr_o = { - 4'b0, - instr_i[6:5], - instr_i[12:10], - 3'b000, - 2'b01, - instr_i[9:7], - 3'b011, - 2'b01, - instr_i[4:2], - OPCODE_LOAD_FP - }; - else illegal_instr_o = 1'b1; - end - - 3'b010: begin - // c.lw -> lw rd', imm(rs1') - instr_o = { - 5'b0, - instr_i[5], - instr_i[12:10], - instr_i[6], - 2'b00, - 2'b01, - instr_i[9:7], - 3'b010, - 2'b01, - instr_i[4:2], - OPCODE_LOAD - }; - end - - 3'b011: begin - // c.flw -> flw rd', imm(rs1') - if (FPU == 1) - instr_o = { - 5'b0, - instr_i[5], - instr_i[12:10], - instr_i[6], - 2'b00, - 2'b01, - instr_i[9:7], - 3'b010, - 2'b01, - instr_i[4:2], - OPCODE_LOAD_FP - }; - else illegal_instr_o = 1'b1; - end - - 3'b101: begin - // c.fsd -> fsd rs2', imm(rs1') - if (FPU == 1) // instr_i[12:10] -> offset[5:3], instr_i[6:5] -> offset[7:6] - instr_o = { - 4'b0, - instr_i[6:5], - instr_i[12], - 2'b01, - instr_i[4:2], - 2'b01, - instr_i[9:7], - 3'b011, - instr_i[11:10], - 3'b000, - OPCODE_STORE_FP - }; - else illegal_instr_o = 1'b1; - end - - 3'b110: begin - // c.sw -> sw rs2', imm(rs1') - instr_o = { - 5'b0, - instr_i[5], - instr_i[12], - 2'b01, - instr_i[4:2], - 2'b01, - instr_i[9:7], - 3'b010, - instr_i[11:10], - instr_i[6], - 2'b00, - OPCODE_STORE - }; - end - - 3'b111: begin - // c.fsw -> fsw rs2', imm(rs1') - if (FPU == 1) - instr_o = { - 5'b0, - instr_i[5], - instr_i[12], - 2'b01, - instr_i[4:2], - 2'b01, - instr_i[9:7], - 3'b010, - instr_i[11:10], - instr_i[6], - 2'b00, - OPCODE_STORE_FP - }; - else illegal_instr_o = 1'b1; - end - default: begin - illegal_instr_o = 1'b1; - end - endcase - end - - - // C1 - 2'b01: begin - unique case (instr_i[15:13]) - 3'b000: begin - // c.addi -> addi rd, rd, nzimm - // c.nop - instr_o = { - {6{instr_i[12]}}, - instr_i[12], - instr_i[6:2], - instr_i[11:7], - 3'b0, - instr_i[11:7], - OPCODE_OPIMM - }; - end - - 3'b001, 3'b101: begin - // 001: c.jal -> jal x1, imm - // 101: c.j -> jal x0, imm - instr_o = { - instr_i[12], - instr_i[8], - instr_i[10:9], - instr_i[6], - instr_i[7], - instr_i[2], - instr_i[11], - instr_i[5:3], - {9{instr_i[12]}}, - 4'b0, - ~instr_i[15], - OPCODE_JAL - }; - end - - 3'b010: begin - if (instr_i[11:7] == 5'b0) begin - // Hint -> addi x0, x0, nzimm - instr_o = { - {6{instr_i[12]}}, instr_i[12], instr_i[6:2], 5'b0, 3'b0, instr_i[11:7], OPCODE_OPIMM - }; - end else begin - // c.li -> addi rd, x0, nzimm - instr_o = { - {6{instr_i[12]}}, instr_i[12], instr_i[6:2], 5'b0, 3'b0, instr_i[11:7], OPCODE_OPIMM - }; - end - end - - 3'b011: begin - if ({instr_i[12], instr_i[6:2]} == 6'b0) begin - illegal_instr_o = 1'b1; - end else begin - if (instr_i[11:7] == 5'h02) begin - // c.addi16sp -> addi x2, x2, nzimm - instr_o = { - {3{instr_i[12]}}, - instr_i[4:3], - instr_i[5], - instr_i[2], - instr_i[6], - 4'b0, - 5'h02, - 3'b000, - 5'h02, - OPCODE_OPIMM - }; - end else if (instr_i[11:7] == 5'b0) begin - // Hint -> lui x0, imm - instr_o = {{15{instr_i[12]}}, instr_i[6:2], instr_i[11:7], OPCODE_LUI}; - end else begin - // c.lui -> lui rd, imm - instr_o = {{15{instr_i[12]}}, instr_i[6:2], instr_i[11:7], OPCODE_LUI}; - end - end - end - - 3'b100: begin - unique case (instr_i[11:10]) - 2'b00, 2'b01: begin - // 00: c.srli -> srli rd, rd, shamt - // 01: c.srai -> srai rd, rd, shamt - if (instr_i[12] == 1'b1) begin - // Reserved for future custom extensions (instr_o don't care) - instr_o = { - 1'b0, - instr_i[10], - 5'b0, - instr_i[6:2], - 2'b01, - instr_i[9:7], - 3'b101, - 2'b01, - instr_i[9:7], - OPCODE_OPIMM - }; - illegal_instr_o = 1'b1; - end else begin - if (instr_i[6:2] == 5'b0) begin - // Hint - instr_o = { - 1'b0, - instr_i[10], - 5'b0, - instr_i[6:2], - 2'b01, - instr_i[9:7], - 3'b101, - 2'b01, - instr_i[9:7], - OPCODE_OPIMM - }; - end else begin - instr_o = { - 1'b0, - instr_i[10], - 5'b0, - instr_i[6:2], - 2'b01, - instr_i[9:7], - 3'b101, - 2'b01, - instr_i[9:7], - OPCODE_OPIMM - }; - end - end - end - - 2'b10: begin - // c.andi -> andi rd, rd, imm - instr_o = { - {6{instr_i[12]}}, - instr_i[12], - instr_i[6:2], - 2'b01, - instr_i[9:7], - 3'b111, - 2'b01, - instr_i[9:7], - OPCODE_OPIMM - }; - end - - 2'b11: begin - unique case ({ - instr_i[12], instr_i[6:5] - }) - 3'b000: begin - // c.sub -> sub rd', rd', rs2' - instr_o = { - 2'b01, - 5'b0, - 2'b01, - instr_i[4:2], - 2'b01, - instr_i[9:7], - 3'b000, - 2'b01, - instr_i[9:7], - OPCODE_OP - }; - end - - 3'b001: begin - // c.xor -> xor rd', rd', rs2' - instr_o = { - 7'b0, - 2'b01, - instr_i[4:2], - 2'b01, - instr_i[9:7], - 3'b100, - 2'b01, - instr_i[9:7], - OPCODE_OP - }; - end - - 3'b010: begin - // c.or -> or rd', rd', rs2' - instr_o = { - 7'b0, - 2'b01, - instr_i[4:2], - 2'b01, - instr_i[9:7], - 3'b110, - 2'b01, - instr_i[9:7], - OPCODE_OP - }; - end - - 3'b011: begin - // c.and -> and rd', rd', rs2' - instr_o = { - 7'b0, - 2'b01, - instr_i[4:2], - 2'b01, - instr_i[9:7], - 3'b111, - 2'b01, - instr_i[9:7], - OPCODE_OP - }; - end - - 3'b100, 3'b101, 3'b110, 3'b111: begin - // 100: c.subw - // 101: c.addw - illegal_instr_o = 1'b1; - end - endcase - end - endcase - end - - 3'b110, 3'b111: begin - // 0: c.beqz -> beq rs1', x0, imm - // 1: c.bnez -> bne rs1', x0, imm - instr_o = { - {4{instr_i[12]}}, - instr_i[6:5], - instr_i[2], - 5'b0, - 2'b01, - instr_i[9:7], - 2'b00, - instr_i[13], - instr_i[11:10], - instr_i[4:3], - instr_i[12], - OPCODE_BRANCH - }; - end - endcase - end - - // C2 - 2'b10: begin - unique case (instr_i[15:13]) - 3'b000: begin - if (instr_i[12] == 1'b1) begin - // Reserved for future extensions (instr_o don't care) - instr_o = {7'b0, instr_i[6:2], instr_i[11:7], 3'b001, instr_i[11:7], OPCODE_OPIMM}; - illegal_instr_o = 1'b1; - end else begin - if ((instr_i[6:2] == 5'b0) || (instr_i[11:7] == 5'b0)) begin - // Hint -> slli rd, rd, shamt - instr_o = {7'b0, instr_i[6:2], instr_i[11:7], 3'b001, instr_i[11:7], OPCODE_OPIMM}; - end else begin - // c.slli -> slli rd, rd, shamt - instr_o = {7'b0, instr_i[6:2], instr_i[11:7], 3'b001, instr_i[11:7], OPCODE_OPIMM}; - end - end - end - - 3'b001: begin - // c.fldsp -> fld rd, imm(x2) - if (FPU==1) // instr_i[6:5] -> offset[4:3], instr_i[4:2] -> offset[8:6], instr_i[12] -> offset[5] - instr_o = { - 3'b0, - instr_i[4:2], - instr_i[12], - instr_i[6:5], - 3'b000, - 5'h02, - 3'b011, - instr_i[11:7], - OPCODE_LOAD_FP - }; - else illegal_instr_o = 1'b1; - end - - 3'b010: begin - // c.lwsp -> lw rd, imm(x2) - instr_o = { - 4'b0, - instr_i[3:2], - instr_i[12], - instr_i[6:4], - 2'b00, - 5'h02, - 3'b010, - instr_i[11:7], - OPCODE_LOAD - }; - if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1; - end - - 3'b011: begin - // c.flwsp -> flw rd, imm(x2) - if (FPU == 1) - instr_o = { - 4'b0, - instr_i[3:2], - instr_i[12], - instr_i[6:4], - 2'b00, - 5'h02, - 3'b010, - instr_i[11:7], - OPCODE_LOAD_FP - }; - else illegal_instr_o = 1'b1; - end - - 3'b100: begin - if (instr_i[12] == 1'b0) begin - if (instr_i[6:2] == 5'b0) begin - // c.jr -> jalr x0, rd/rs1, 0 - instr_o = {12'b0, instr_i[11:7], 3'b0, 5'b0, OPCODE_JALR}; - // c.jr with rs1 = 0 is reserved - if (instr_i[11:7] == 5'b0) illegal_instr_o = 1'b1; - end else begin - if (instr_i[11:7] == 5'b0) begin - // Hint -> add x0, x0, rs2 - instr_o = {7'b0, instr_i[6:2], 5'b0, 3'b0, instr_i[11:7], OPCODE_OP}; - end else begin - // c.mv -> add rd, x0, rs2 - instr_o = {7'b0, instr_i[6:2], 5'b0, 3'b0, instr_i[11:7], OPCODE_OP}; - end - end - end else begin - if (instr_i[6:2] == 5'b0) begin - if (instr_i[11:7] == 5'b0) begin - // c.ebreak -> ebreak - instr_o = {32'h00_10_00_73}; - end else begin - // c.jalr -> jalr x1, rs1, 0 - instr_o = {12'b0, instr_i[11:7], 3'b000, 5'b00001, OPCODE_JALR}; - end - end else begin - if (instr_i[11:7] == 5'b0) begin - // Hint -> add x0, x0, rs2 - instr_o = {7'b0, instr_i[6:2], instr_i[11:7], 3'b0, instr_i[11:7], OPCODE_OP}; - end else begin - // c.add -> add rd, rd, rs2 - instr_o = {7'b0, instr_i[6:2], instr_i[11:7], 3'b0, instr_i[11:7], OPCODE_OP}; - end - end - end - end - - 3'b101: begin - // c.fsdsp -> fsd rs2, imm(x2) - if (FPU == 1) // instr_i[12:10] -> offset[5:3], instr_i[9:7] -> offset[8:6] - instr_o = { - 3'b0, - instr_i[9:7], - instr_i[12], - instr_i[6:2], - 5'h02, - 3'b011, - instr_i[11:10], - 3'b000, - OPCODE_STORE_FP - }; - else illegal_instr_o = 1'b1; - end - 3'b110: begin - // c.swsp -> sw rs2, imm(x2) - instr_o = { - 4'b0, - instr_i[8:7], - instr_i[12], - instr_i[6:2], - 5'h02, - 3'b010, - instr_i[11:9], - 2'b00, - OPCODE_STORE - }; - end - - 3'b111: begin - // c.fswsp -> fsw rs2, imm(x2) - if (FPU == 1) - instr_o = { - 4'b0, - instr_i[8:7], - instr_i[12], - instr_i[6:2], - 5'h02, - 3'b010, - instr_i[11:9], - 2'b00, - OPCODE_STORE_FP - }; - else illegal_instr_o = 1'b1; - end - endcase - end - - default: begin - // 32 bit (or more) instruction - instr_o = instr_i; - end - endcase - end - - assign is_compressed_o = (instr_i[1:0] != 2'b11); - -endmodule diff --git a/rtl/cv32e41p_core.sv b/rtl/cv32e41p_core.sv index 2920469..5748538 100644 --- a/rtl/cv32e41p_core.sv +++ b/rtl/cv32e41p_core.sv @@ -34,6 +34,10 @@ module cv32e41p_core parameter PULP_XPULP = 0, // PULP ISA Extension (incl. custom CSRs and hardware loop, excl. p.elw) parameter PULP_CLUSTER = 0, // PULP Cluster interface (incl. p.elw) parameter FPU = 0, // Floating Point Unit (interfaced via APU interface) + parameter Zcea = 0, // + parameter Zceb = 0, // + parameter Zcec = 0, // + parameter Zcee = 0, // parameter PULP_ZFINX = 0, // Float-in-General Purpose registers parameter NUM_MHPMCOUNTERS = 1 ) ( @@ -82,7 +86,7 @@ module cv32e41p_core input logic [APU_NUSFLAGS_CPU-1:0] apu_flags_i, // Interrupt inputs - input logic [31:0] irq_i, // CLINT interrupts + CLINT extension interrupts + input logic [31:0] irq_i, // CLINT interrupts + CLINT extension interrupts output logic irq_ack_o, output logic [ 4:0] irq_id_o, @@ -128,8 +132,6 @@ module cv32e41p_core // IF/ID signals logic instr_valid_id; logic [31:0] instr_rdata_id; // Instruction sampled inside IF stage - logic is_compressed_id; - logic illegal_c_insn_id; logic is_fetch_failed_id; logic clear_instr_valid; @@ -383,9 +385,9 @@ module cv32e41p_core .PULP_CLUSTER(PULP_CLUSTER) ) sleep_unit_i ( // Clock, reset interface - .clk_ungated_i(clk_i), // Ungated clock + .clk_ungated_i(clk_i), // Ungated clock .rst_n (rst_ni), - .clk_gated_o (clk), // Gated clock + .clk_gated_o (clk), // Gated clock .scan_cg_en_i (scan_cg_en_i), // Core sleep @@ -424,7 +426,11 @@ module cv32e41p_core .PULP_XPULP (PULP_XPULP), .PULP_OBI (PULP_OBI), .PULP_SECURE(PULP_SECURE), - .FPU (FPU) + .FPU (FPU), + .Zcea (Zcea), + .Zceb (Zceb), + .Zcec (Zcec), + .Zcee (Zcee) ) if_stage_i ( .clk (clk), .rst_n(rst_ni), @@ -450,8 +456,8 @@ module cv32e41p_core .instr_gnt_i (instr_gnt_pmp), .instr_rvalid_i (instr_rvalid_i), .instr_rdata_i (instr_rdata_i), - .instr_err_i (1'b0), // Bus error (not used yet) - .instr_err_pmp_i(instr_err_pmp), // PMP error + .instr_err_i (1'b0), // Bus error (not used yet) + .instr_err_pmp_i(instr_err_pmp), // PMP error // outputs to ID stage .instr_valid_id_o (instr_valid_id), @@ -467,15 +473,13 @@ module cv32e41p_core .depc_i(depc), // debug return address - .pc_mux_i (pc_mux_id), // sel for pc multiplexer + .pc_mux_i (pc_mux_id), // sel for pc multiplexer .exc_pc_mux_i(exc_pc_mux_id), .pc_id_o(pc_id), .pc_if_o(pc_if), - .is_compressed_id_o (is_compressed_id), - .illegal_c_insn_id_o(illegal_c_insn_id), .m_exc_vec_pc_mux_i(m_exc_vec_pc_mux_id), .u_exc_vec_pc_mux_i(u_exc_vec_pc_mux_id), @@ -510,6 +514,10 @@ module cv32e41p_core ///////////////////////////////////////////////// cv32e41p_id_stage #( .PULP_XPULP (PULP_XPULP), + .Zcea (Zcea), + .Zceb (Zceb), + .Zcec (Zcec), + .Zcee (Zcee), .PULP_CLUSTER (PULP_CLUSTER), .N_HWLP (N_HWLP), .PULP_SECURE (PULP_SECURE), @@ -524,7 +532,7 @@ module cv32e41p_core .APU_NUSFLAGS_CPU(APU_NUSFLAGS_CPU), .DEBUG_TRIGGER_EN(DEBUG_TRIGGER_EN) ) id_stage_i ( - .clk (clk), // Gated clock + .clk (clk), // Gated clock .clk_ungated_i(clk_i), // Ungated clock .rst_n (rst_ni), @@ -557,9 +565,6 @@ module cv32e41p_core .pc_id_i(pc_id), - .is_compressed_i (is_compressed_id), - .illegal_c_insn_i(illegal_c_insn_id), - // Stalls .halt_if_o(halt_if), @@ -593,14 +598,14 @@ module cv32e41p_core .regfile_alu_waddr_ex_o(regfile_alu_waddr_ex), // MUL - .mult_operator_ex_o (mult_operator_ex), // from ID to EX stage - .mult_en_ex_o (mult_en_ex), // from ID to EX stage + .mult_operator_ex_o (mult_operator_ex), // from ID to EX stage + .mult_en_ex_o (mult_en_ex), // from ID to EX stage .mult_sel_subword_ex_o(mult_sel_subword_ex), // from ID to EX stage .mult_signed_mode_ex_o(mult_signed_mode_ex), // from ID to EX stage - .mult_operand_a_ex_o (mult_operand_a_ex), // from ID to EX stage - .mult_operand_b_ex_o (mult_operand_b_ex), // from ID to EX stage - .mult_operand_c_ex_o (mult_operand_c_ex), // from ID to EX stage - .mult_imm_ex_o (mult_imm_ex), // from ID to EX stage + .mult_operand_a_ex_o (mult_operand_a_ex), // from ID to EX stage + .mult_operand_b_ex_o (mult_operand_b_ex), // from ID to EX stage + .mult_operand_c_ex_o (mult_operand_c_ex), // from ID to EX stage + .mult_imm_ex_o (mult_imm_ex), // from ID to EX stage .mult_dot_op_a_ex_o (mult_dot_op_a_ex), // from ID to EX stage .mult_dot_op_b_ex_o (mult_dot_op_b_ex), // from ID to EX stage @@ -636,9 +641,9 @@ module cv32e41p_core .current_priv_lvl_i (current_priv_lvl), .csr_irq_sec_o (csr_irq_sec), .csr_cause_o (csr_cause), - .csr_save_if_o (csr_save_if), // control signal to save pc - .csr_save_id_o (csr_save_id), // control signal to save pc - .csr_save_ex_o (csr_save_ex), // control signal to save pc + .csr_save_if_o (csr_save_if), // control signal to save pc + .csr_save_id_o (csr_save_id), // control signal to save pc + .csr_save_ex_o (csr_save_ex), // control signal to save pc .csr_restore_mret_id_o(csr_restore_mret_id), // control signal to restore pc .csr_restore_uret_id_o(csr_restore_uret_id), // control signal to restore pc @@ -660,11 +665,11 @@ module cv32e41p_core .csr_hwlp_data_i (csr_hwlp_data), // LSU - .data_req_ex_o (data_req_ex), // to load store unit - .data_we_ex_o (data_we_ex), // to load store unit + .data_req_ex_o (data_req_ex), // to load store unit + .data_we_ex_o (data_we_ex), // to load store unit .atop_ex_o (data_atop_ex), - .data_type_ex_o (data_type_ex), // to load store unit - .data_sign_ext_ex_o (data_sign_ext_ex), // to load store unit + .data_type_ex_o (data_type_ex), // to load store unit + .data_sign_ext_ex_o (data_sign_ext_ex), // to load store unit .data_reg_offset_ex_o(data_reg_offset_ex), // to load store unit .data_load_event_ex_o(data_load_event_ex), // to load store unit @@ -753,34 +758,34 @@ module cv32e41p_core // Alu signals from ID stage .alu_en_i (alu_en_ex), - .alu_operator_i (alu_operator_ex), // from ID/EX pipe registers + .alu_operator_i (alu_operator_ex), // from ID/EX pipe registers .alu_operand_a_i (alu_operand_a_ex), // from ID/EX pipe registers .alu_operand_b_i (alu_operand_b_ex), // from ID/EX pipe registers .alu_operand_c_i (alu_operand_c_ex), // from ID/EX pipe registers - .bmask_a_i (bmask_a_ex), // from ID/EX pipe registers - .bmask_b_i (bmask_b_ex), // from ID/EX pipe registers - .imm_vec_ext_i (imm_vec_ext_ex), // from ID/EX pipe registers - .alu_vec_mode_i (alu_vec_mode_ex), // from ID/EX pipe registers - .alu_is_clpx_i (alu_is_clpx_ex), // from ID/EX pipe registers + .bmask_a_i (bmask_a_ex), // from ID/EX pipe registers + .bmask_b_i (bmask_b_ex), // from ID/EX pipe registers + .imm_vec_ext_i (imm_vec_ext_ex), // from ID/EX pipe registers + .alu_vec_mode_i (alu_vec_mode_ex), // from ID/EX pipe registers + .alu_is_clpx_i (alu_is_clpx_ex), // from ID/EX pipe registers .alu_is_subrot_i (alu_is_subrot_ex), // from ID/Ex pipe registers - .alu_clpx_shift_i(alu_clpx_shift_ex), // from ID/EX pipe registers + .alu_clpx_shift_i(alu_clpx_shift_ex), // from ID/EX pipe registers // Multipler - .mult_operator_i (mult_operator_ex), // from ID/EX pipe registers - .mult_operand_a_i (mult_operand_a_ex), // from ID/EX pipe registers - .mult_operand_b_i (mult_operand_b_ex), // from ID/EX pipe registers - .mult_operand_c_i (mult_operand_c_ex), // from ID/EX pipe registers - .mult_en_i (mult_en_ex), // from ID/EX pipe registers + .mult_operator_i (mult_operator_ex), // from ID/EX pipe registers + .mult_operand_a_i (mult_operand_a_ex), // from ID/EX pipe registers + .mult_operand_b_i (mult_operand_b_ex), // from ID/EX pipe registers + .mult_operand_c_i (mult_operand_c_ex), // from ID/EX pipe registers + .mult_en_i (mult_en_ex), // from ID/EX pipe registers .mult_sel_subword_i(mult_sel_subword_ex), // from ID/EX pipe registers .mult_signed_mode_i(mult_signed_mode_ex), // from ID/EX pipe registers - .mult_imm_i (mult_imm_ex), // from ID/EX pipe registers - .mult_dot_op_a_i (mult_dot_op_a_ex), // from ID/EX pipe registers - .mult_dot_op_b_i (mult_dot_op_b_ex), // from ID/EX pipe registers - .mult_dot_op_c_i (mult_dot_op_c_ex), // from ID/EX pipe registers - .mult_dot_signed_i (mult_dot_signed_ex), // from ID/EX pipe registers - .mult_is_clpx_i (mult_is_clpx_ex), // from ID/EX pipe registers - .mult_clpx_shift_i (mult_clpx_shift_ex), // from ID/EX pipe registers - .mult_clpx_img_i (mult_clpx_img_ex), // from ID/EX pipe registers + .mult_imm_i (mult_imm_ex), // from ID/EX pipe registers + .mult_dot_op_a_i (mult_dot_op_a_ex), // from ID/EX pipe registers + .mult_dot_op_b_i (mult_dot_op_b_ex), // from ID/EX pipe registers + .mult_dot_op_c_i (mult_dot_op_c_ex), // from ID/EX pipe registers + .mult_dot_signed_i (mult_dot_signed_ex), // from ID/EX pipe registers + .mult_is_clpx_i (mult_is_clpx_ex), // from ID/EX pipe registers + .mult_clpx_shift_i (mult_clpx_shift_ex), // from ID/EX pipe registers + .mult_clpx_img_i (mult_clpx_img_ex), // from ID/EX pipe registers .mult_multicycle_o(mult_multicycle), // to ID/EX pipe registers @@ -878,8 +883,8 @@ module cv32e41p_core .data_req_o (data_req_pmp), .data_gnt_i (data_gnt_pmp), .data_rvalid_i (data_rvalid_i), - .data_err_i (1'b0), // Bus error (not used yet) - .data_err_pmp_i(data_err_pmp), // PMP error + .data_err_i (1'b0), // Bus error (not used yet) + .data_err_pmp_i(data_err_pmp), // PMP error .data_addr_o (data_addr_pmp), .data_we_o (data_we_o), @@ -895,7 +900,7 @@ module cv32e41p_core .data_wdata_ex_i (alu_operand_c_ex), .data_reg_offset_ex_i(data_reg_offset_ex), .data_load_event_ex_i(data_load_event_ex), - .data_sign_ext_ex_i (data_sign_ext_ex), // sign extension + .data_sign_ext_ex_i (data_sign_ext_ex), // sign extension .data_rdata_ex_o (lsu_rdata), .data_req_ex_i (data_req_ex), @@ -1137,7 +1142,7 @@ module cv32e41p_core always_ff @(posedge rst_ni) begin if (PULP_XPULP) begin $warning( - "PULP_XPULP == 1 has not been verified yet and non-backward compatible RTL fixes are expected (see https://github.com/openhwgroup/cv32e41p/issues/452)"); + "PULP_XPULP == 1 has not been verified yet and non-backward compatible RTL fixes are expected (see https://github.com/openhwgroup/cv32e40p/issues/452)"); end if (PULP_CLUSTER) begin $warning("PULP_CLUSTER == 1 has not been verified yet"); diff --git a/rtl/cv32e41p_cs_registers.sv b/rtl/cv32e41p_cs_registers.sv index 67293b8..f7fcde5 100644 --- a/rtl/cv32e41p_cs_registers.sv +++ b/rtl/cv32e41p_cs_registers.sv @@ -670,13 +670,13 @@ module cv32e41p_cs_registers CSR_MSTATUS: if (csr_we_int) begin mstatus_n = '{ - uie: csr_wdata_int[MSTATUS_UIE_BIT], - mie: csr_wdata_int[MSTATUS_MIE_BIT], - upie: csr_wdata_int[MSTATUS_UPIE_BIT], - mpie: csr_wdata_int[MSTATUS_MPIE_BIT], - mpp: PrivLvl_t'(csr_wdata_int[MSTATUS_MPP_BIT_HIGH:MSTATUS_MPP_BIT_LOW]), - mprv: csr_wdata_int[MSTATUS_MPRV_BIT] - }; + uie: csr_wdata_int[MSTATUS_UIE_BIT], + mie: csr_wdata_int[MSTATUS_MIE_BIT], + upie: csr_wdata_int[MSTATUS_UPIE_BIT], + mpie: csr_wdata_int[MSTATUS_MPIE_BIT], + mpp: PrivLvl_t'(csr_wdata_int[MSTATUS_MPP_BIT_HIGH:MSTATUS_MPP_BIT_LOW]), + mprv: csr_wdata_int[MSTATUS_MPRV_BIT] + }; end // mie: machine interrupt enable CSR_MIE: @@ -806,13 +806,13 @@ module cv32e41p_cs_registers CSR_USTATUS: if (csr_we_int) begin mstatus_n = '{ - uie: csr_wdata_int[MSTATUS_UIE_BIT], - mie: mstatus_q.mie, - upie: csr_wdata_int[MSTATUS_UPIE_BIT], - mpie: mstatus_q.mpie, - mpp: mstatus_q.mpp, - mprv: mstatus_q.mprv - }; + uie: csr_wdata_int[MSTATUS_UIE_BIT], + mie: mstatus_q.mie, + upie: csr_wdata_int[MSTATUS_UPIE_BIT], + mpie: mstatus_q.mpie, + mpp: mstatus_q.mpp, + mprv: mstatus_q.mprv + }; end // utvec: user trap-handler base address CSR_UTVEC: @@ -983,13 +983,13 @@ module cv32e41p_cs_registers CSR_MSTATUS: if (csr_we_int) begin mstatus_n = '{ - uie: csr_wdata_int[MSTATUS_UIE_BIT], - mie: csr_wdata_int[MSTATUS_MIE_BIT], - upie: csr_wdata_int[MSTATUS_UPIE_BIT], - mpie: csr_wdata_int[MSTATUS_MPIE_BIT], - mpp: PrivLvl_t'(csr_wdata_int[MSTATUS_MPP_BIT_HIGH:MSTATUS_MPP_BIT_LOW]), - mprv: csr_wdata_int[MSTATUS_MPRV_BIT] - }; + uie: csr_wdata_int[MSTATUS_UIE_BIT], + mie: csr_wdata_int[MSTATUS_MIE_BIT], + upie: csr_wdata_int[MSTATUS_UPIE_BIT], + mpie: csr_wdata_int[MSTATUS_MPIE_BIT], + mpp: PrivLvl_t'(csr_wdata_int[MSTATUS_MPP_BIT_HIGH:MSTATUS_MPP_BIT_LOW]), + mprv: csr_wdata_int[MSTATUS_MPRV_BIT] + }; end // mie: machine interrupt enable CSR_MIE: @@ -1223,23 +1223,23 @@ module cv32e41p_cs_registers if (rst_n == 1'b0) begin frm_q <= '0; fflags_q <= '0; - mstatus_q <= '{ - uie: 1'b0, - mie: 1'b0, - upie: 1'b0, - mpie: 1'b0, - mpp: PRIV_LVL_M, - mprv: 1'b0 - }; + mstatus_q <= '{ + uie: 1'b0, + mie: 1'b0, + upie: 1'b0, + mpie: 1'b0, + mpp: PRIV_LVL_M, + mprv: 1'b0 + }; mepc_q <= '0; mcause_q <= '0; depc_q <= '0; - dcsr_q <= '{ + dcsr_q <= '{ xdebugver: XDEBUGVER_STD, - cause: DBG_CAUSE_NONE, // 3'h0 - prv: PRIV_LVL_M, - default: '0 + cause: DBG_CAUSE_NONE, // 3'h0 + prv: PRIV_LVL_M, + default: '0 }; dscratch0_q <= '0; dscratch1_q <= '0; @@ -1259,14 +1259,14 @@ module cv32e41p_cs_registers if (PULP_SECURE == 1) begin mstatus_q <= mstatus_n; end else begin - mstatus_q <= '{ - uie: 1'b0, - mie: mstatus_n.mie, - upie: 1'b0, - mpie: mstatus_n.mpie, - mpp: PRIV_LVL_M, - mprv: 1'b0 - }; + mstatus_q <= '{ + uie: 1'b0, + mie: mstatus_n.mie, + upie: 1'b0, + mpie: mstatus_n.mpie, + mpp: PRIV_LVL_M, + mprv: 1'b0 + }; end mepc_q <= mepc_n; mcause_q <= mcause_n; diff --git a/rtl/cv32e41p_decoder.sv b/rtl/cv32e41p_decoder.sv deleted file mode 100644 index 19c6e40..0000000 --- a/rtl/cv32e41p_decoder.sv +++ /dev/null @@ -1,2631 +0,0 @@ -// Copyright 2018 ETH Zurich and University of Bologna. -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. - -//////////////////////////////////////////////////////////////////////////////// -// Engineer Andreas Traber - atraber@iis.ee.ethz.ch // -// // -// Additional contributions by: // -// Matthias Baer - baermatt@student.ethz.ch // -// Igor Loi - igor.loi@unibo.it // -// Sven Stucki - svstucki@student.ethz.ch // -// Davide Schiavone - pschiavo@iis.ee.ethz.ch // -// // -// Design Name: Decoder // -// Project Name: RI5CY // -// Language: SystemVerilog // -// // -// Description: Decoder // -// // -//////////////////////////////////////////////////////////////////////////////// - -module cv32e41p_decoder import cv32e41p_pkg::*; import cv32e41p_apu_core_pkg::*; import cv32e41p_fpu_pkg::*; -#( - parameter PULP_XPULP = 1, // PULP ISA Extension (including PULP specific CSRs and hardware loop, excluding p.elw) - parameter PULP_CLUSTER = 0, - parameter A_EXTENSION = 0, - parameter FPU = 0, - parameter PULP_SECURE = 0, - parameter USE_PMP = 0, - parameter APU_WOP_CPU = 6, - parameter DEBUG_TRIGGER_EN = 1 -) -( - // singals running to/from controller - input logic deassert_we_i, // deassert we, we are stalled or not active - - output logic illegal_insn_o, // illegal instruction encountered - output logic ebrk_insn_o, // trap instruction encountered - - output logic mret_insn_o, // return from exception instruction encountered (M) - output logic uret_insn_o, // return from exception instruction encountered (S) - output logic dret_insn_o, // return from debug (M) - - output logic mret_dec_o, // return from exception instruction encountered (M) without deassert - output logic uret_dec_o, // return from exception instruction encountered (S) without deassert - output logic dret_dec_o, // return from debug (M) without deassert - - output logic ecall_insn_o, // environment call (syscall) instruction encountered - output logic wfi_o , // pipeline flush is requested - - output logic fencei_insn_o, // fence.i instruction - - output logic rega_used_o, // rs1 is used by current instruction - output logic regb_used_o, // rs2 is used by current instruction - output logic regc_used_o, // rs3 is used by current instruction - - output logic reg_fp_a_o, // fp reg a is used - output logic reg_fp_b_o, // fp reg b is used - output logic reg_fp_c_o, // fp reg c is used - output logic reg_fp_d_o, // fp reg d is used - - output logic [ 0:0] bmask_a_mux_o, // bit manipulation mask a mux - output logic [ 1:0] bmask_b_mux_o, // bit manipulation mask b mux - output logic alu_bmask_a_mux_sel_o, // bit manipulation mask a mux (reg or imm) - output logic alu_bmask_b_mux_sel_o, // bit manipulation mask b mux (reg or imm) - - // from IF/ID pipeline - input logic [31:0] instr_rdata_i, // instruction read from instr memory/cache - input logic illegal_c_insn_i, // compressed instruction decode failed - - // ALU signals - output logic alu_en_o, // ALU enable - output alu_opcode_e alu_operator_o, // ALU operation selection - output logic [2:0] alu_op_a_mux_sel_o, // operand a selection: reg value, PC, immediate or zero - output logic [2:0] alu_op_b_mux_sel_o, // operand b selection: reg value or immediate - output logic [1:0] alu_op_c_mux_sel_o, // operand c selection: reg value or jump target - output logic [1:0] alu_vec_mode_o, // selects between 32 bit, 16 bit and 8 bit vectorial modes - output logic scalar_replication_o, // scalar replication enable - output logic scalar_replication_c_o, // scalar replication enable for operand C - output logic [0:0] imm_a_mux_sel_o, // immediate selection for operand a - output logic [3:0] imm_b_mux_sel_o, // immediate selection for operand b - output logic [1:0] regc_mux_o, // register c selection: S3, RD or 0 - output logic is_clpx_o, // whether the instruction is complex (pulpv3) or not - output logic is_subrot_o, - - // MUL related control signals - output mul_opcode_e mult_operator_o, // Multiplication operation selection - output logic mult_int_en_o, // perform integer multiplication - output logic mult_dot_en_o, // perform dot multiplication - output logic [0:0] mult_imm_mux_o, // Multiplication immediate mux selector - output logic mult_sel_subword_o, // Select subwords for 16x16 bit of multiplier - output logic [1:0] mult_signed_mode_o, // Multiplication in signed mode - output logic [1:0] mult_dot_signed_o, // Dot product in signed mode - - // FPU - input logic [C_RM-1:0] frm_i, // Rounding mode from float CSR - - output logic [cv32e41p_fpu_pkg::FP_FORMAT_BITS-1:0] fpu_dst_fmt_o, // fpu destination format - output logic [cv32e41p_fpu_pkg::FP_FORMAT_BITS-1:0] fpu_src_fmt_o, // fpu source format - output logic [cv32e41p_fpu_pkg::INT_FORMAT_BITS-1:0] fpu_int_fmt_o, // fpu integer format (for casts) - - // APU - output logic apu_en_o, - output logic [APU_WOP_CPU-1:0] apu_op_o, - output logic [1:0] apu_lat_o, - output logic [2:0] fp_rnd_mode_o, - - // register file related signals - output logic regfile_mem_we_o, // write enable for regfile - output logic regfile_alu_we_o, // write enable for 2nd regfile port - output logic regfile_alu_we_dec_o, // write enable for 2nd regfile port without deassert - output logic regfile_alu_waddr_sel_o, // Select register write address for ALU/MUL operations - - // CSR manipulation - output logic csr_access_o, // access to CSR - output logic csr_status_o, // access to xstatus CSR - output csr_opcode_e csr_op_o, // operation to perform on CSR - input PrivLvl_t current_priv_lvl_i, // The current privilege level - - // LD/ST unit signals - output logic data_req_o, // start transaction to data memory - output logic data_we_o, // data memory write enable - output logic prepost_useincr_o, // when not active bypass the alu result for address calculation - output logic [1:0] data_type_o, // data type on data memory: byte, half word or word - output logic [1:0] data_sign_extension_o, // sign extension on read data from data memory / NaN boxing - output logic [1:0] data_reg_offset_o, // offset in byte inside register for stores - output logic data_load_event_o, // data request is in the special event range - - // Atomic memory access - output logic [5:0] atop_o, - - // hwloop signals - output logic [2:0] hwlp_we_o, // write enable for hwloop regs - output logic hwlp_target_mux_sel_o, // selects immediate for hwloop target - output logic hwlp_start_mux_sel_o, // selects hwloop start address input - output logic hwlp_cnt_mux_sel_o, // selects hwloop counter input - - input logic debug_mode_i, // processor is in debug mode - input logic debug_wfi_no_sleep_i, // do not let WFI cause sleep - - // jump/branches - output logic [1:0] ctrl_transfer_insn_in_dec_o, // control transfer instruction without deassert - output logic [1:0] ctrl_transfer_insn_in_id_o, // control transfer instructio is decoded - output logic [1:0] ctrl_transfer_target_mux_sel_o, // jump target selection - - // HPM related control signals - input logic [31:0] mcounteren_i -); - - // write enable/request control - logic regfile_mem_we; - logic regfile_alu_we; - logic data_req; - logic [2:0] hwlp_we; - logic csr_illegal; - logic [1:0] ctrl_transfer_insn; - - csr_opcode_e csr_op; - - logic alu_en; - logic mult_int_en; - logic mult_dot_en; - logic apu_en; - - // this instruction needs floating-point rounding-mode verification - logic check_fprm; - - logic [cv32e41p_fpu_pkg::OP_BITS-1:0] fpu_op; // fpu operation - logic fpu_op_mod; // fpu operation modifier - logic fpu_vec_op; // fpu vectorial operation - // unittypes for latencies to help us decode for APU - enum logic[1:0] {ADDMUL, DIVSQRT, NONCOMP, CONV} fp_op_group; - - - ///////////////////////////////////////////// - // ____ _ // - // | _ \ ___ ___ ___ __| | ___ _ __ // - // | | | |/ _ \/ __/ _ \ / _` |/ _ \ '__| // - // | |_| | __/ (_| (_) | (_| | __/ | // - // |____/ \___|\___\___/ \__,_|\___|_| // - // // - ///////////////////////////////////////////// - - always_comb - begin - ctrl_transfer_insn = BRANCH_NONE; - ctrl_transfer_target_mux_sel_o = JT_JAL; - - alu_en = 1'b1; - alu_operator_o = ALU_SLTU; - alu_op_a_mux_sel_o = OP_A_REGA_OR_FWD; - alu_op_b_mux_sel_o = OP_B_REGB_OR_FWD; - alu_op_c_mux_sel_o = OP_C_REGC_OR_FWD; - alu_vec_mode_o = VEC_MODE32; - scalar_replication_o = 1'b0; - scalar_replication_c_o = 1'b0; - regc_mux_o = REGC_ZERO; - imm_a_mux_sel_o = IMMA_ZERO; - imm_b_mux_sel_o = IMMB_I; - - mult_operator_o = MUL_I; - mult_int_en = 1'b0; - mult_dot_en = 1'b0; - mult_imm_mux_o = MIMM_ZERO; - mult_signed_mode_o = 2'b00; - mult_sel_subword_o = 1'b0; - mult_dot_signed_o = 2'b00; - - apu_en = 1'b0; - apu_op_o = '0; - apu_lat_o = '0; - fp_rnd_mode_o = '0; - fpu_op = cv32e41p_fpu_pkg::SGNJ; - fpu_op_mod = 1'b0; - fpu_vec_op = 1'b0; - fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP32; - fpu_src_fmt_o = cv32e41p_fpu_pkg::FP32; - fpu_int_fmt_o = cv32e41p_fpu_pkg::INT32; - check_fprm = 1'b0; - fp_op_group = ADDMUL; - - regfile_mem_we = 1'b0; - regfile_alu_we = 1'b0; - regfile_alu_waddr_sel_o = 1'b1; - - prepost_useincr_o = 1'b1; - - hwlp_we = 3'b0; - hwlp_target_mux_sel_o = 1'b0; - hwlp_start_mux_sel_o = 1'b0; - hwlp_cnt_mux_sel_o = 1'b0; - - csr_access_o = 1'b0; - csr_status_o = 1'b0; - csr_illegal = 1'b0; - csr_op = CSR_OP_READ; - mret_insn_o = 1'b0; - uret_insn_o = 1'b0; - - dret_insn_o = 1'b0; - - data_we_o = 1'b0; - data_type_o = 2'b00; - data_sign_extension_o = 2'b00; - data_reg_offset_o = 2'b00; - data_req = 1'b0; - data_load_event_o = 1'b0; - - atop_o = 6'b000000; - - illegal_insn_o = 1'b0; - ebrk_insn_o = 1'b0; - ecall_insn_o = 1'b0; - wfi_o = 1'b0; - - fencei_insn_o = 1'b0; - - rega_used_o = 1'b0; - regb_used_o = 1'b0; - regc_used_o = 1'b0; - reg_fp_a_o = 1'b0; - reg_fp_b_o = 1'b0; - reg_fp_c_o = 1'b0; - reg_fp_d_o = 1'b0; - - bmask_a_mux_o = BMASK_A_ZERO; - bmask_b_mux_o = BMASK_B_ZERO; - alu_bmask_a_mux_sel_o = BMASK_A_IMM; - alu_bmask_b_mux_sel_o = BMASK_B_IMM; - - is_clpx_o = 1'b0; - is_subrot_o = 1'b0; - - mret_dec_o = 1'b0; - uret_dec_o = 1'b0; - dret_dec_o = 1'b0; - - unique case (instr_rdata_i[6:0]) - - ////////////////////////////////////// - // _ _ _ __ __ ____ ____ // - // | | | | | \/ | _ \/ ___| // - // _ | | | | | |\/| | |_) \___ \ // - // | |_| | |_| | | | | __/ ___) | // - // \___/ \___/|_| |_|_| |____/ // - // // - ////////////////////////////////////// - - OPCODE_JAL: begin // Jump and Link - ctrl_transfer_target_mux_sel_o = JT_JAL; - ctrl_transfer_insn = BRANCH_JAL; - // Calculate and store PC+4 - alu_op_a_mux_sel_o = OP_A_CURRPC; - alu_op_b_mux_sel_o = OP_B_IMM; - imm_b_mux_sel_o = IMMB_PCINCR; - alu_operator_o = ALU_ADD; - regfile_alu_we = 1'b1; - // Calculate jump target (= PC + UJ imm) - end - - OPCODE_JALR: begin // Jump and Link Register - ctrl_transfer_target_mux_sel_o = JT_JALR; - ctrl_transfer_insn = BRANCH_JALR; - // Calculate and store PC+4 - alu_op_a_mux_sel_o = OP_A_CURRPC; - alu_op_b_mux_sel_o = OP_B_IMM; - imm_b_mux_sel_o = IMMB_PCINCR; - alu_operator_o = ALU_ADD; - regfile_alu_we = 1'b1; - // Calculate jump target (= RS1 + I imm) - rega_used_o = 1'b1; - - if (instr_rdata_i[14:12] != 3'b0) begin - ctrl_transfer_insn = BRANCH_NONE; - regfile_alu_we = 1'b0; - illegal_insn_o = 1'b1; - end - end - - OPCODE_BRANCH: begin // Branch - ctrl_transfer_target_mux_sel_o = JT_COND; - ctrl_transfer_insn = BRANCH_COND; - alu_op_c_mux_sel_o = OP_C_JT; - rega_used_o = 1'b1; - regb_used_o = 1'b1; - - unique case (instr_rdata_i[14:12]) - 3'b000: alu_operator_o = ALU_EQ; - 3'b001: alu_operator_o = ALU_NE; - 3'b100: alu_operator_o = ALU_LTS; - 3'b101: alu_operator_o = ALU_GES; - 3'b110: alu_operator_o = ALU_LTU; - 3'b111: alu_operator_o = ALU_GEU; - 3'b010: begin // p.beqimm - if (PULP_XPULP) begin - alu_operator_o = ALU_EQ; - regb_used_o = 1'b0; - alu_op_b_mux_sel_o = OP_B_IMM; - imm_b_mux_sel_o = IMMB_BI; - end else begin - illegal_insn_o = 1'b1; - end - end - 3'b011: begin // p.bneimm - if (PULP_XPULP) begin - alu_operator_o = ALU_NE; - regb_used_o = 1'b0; - alu_op_b_mux_sel_o = OP_B_IMM; - imm_b_mux_sel_o = IMMB_BI; - end else begin - illegal_insn_o = 1'b1; - end - end - endcase - end - - - ////////////////////////////////// - // _ ____ ______ _____ // - // | | | _ \ / / ___|_ _| // - // | | | | | |/ /\___ \ | | // - // | |___| |_| / / ___) || | // - // |_____|____/_/ |____/ |_| // - // // - ////////////////////////////////// - - OPCODE_STORE, - OPCODE_STORE_POST: begin - if (PULP_XPULP || (instr_rdata_i[6:0] == OPCODE_STORE)) begin - data_req = 1'b1; - data_we_o = 1'b1; - rega_used_o = 1'b1; - regb_used_o = 1'b1; - alu_operator_o = ALU_ADD; - // pass write data through ALU operand c - alu_op_c_mux_sel_o = OP_C_REGB_OR_FWD; - - // post-increment setup - if (instr_rdata_i[6:0] == OPCODE_STORE_POST) begin - prepost_useincr_o = 1'b0; - regfile_alu_waddr_sel_o = 1'b0; - regfile_alu_we = 1'b1; - end - - if (instr_rdata_i[14] == 1'b0) begin - // offset from immediate - imm_b_mux_sel_o = IMMB_S; - alu_op_b_mux_sel_o = OP_B_IMM; - end else begin - if (PULP_XPULP) begin - // offset from register - regc_used_o = 1'b1; - alu_op_b_mux_sel_o = OP_B_REGC_OR_FWD; - regc_mux_o = REGC_RD; - end else begin - illegal_insn_o = 1'b1; - end - end - - // store size - unique case (instr_rdata_i[13:12]) - 2'b00: data_type_o = 2'b10; // SB - 2'b01: data_type_o = 2'b01; // SH - 2'b10: data_type_o = 2'b00; // SW - default: begin - data_req = 1'b0; - data_we_o = 1'b0; - illegal_insn_o = 1'b1; - end - endcase - end else begin - illegal_insn_o = 1'b1; - end - end - - OPCODE_LOAD, - OPCODE_LOAD_POST: begin - if (PULP_XPULP || (instr_rdata_i[6:0] == OPCODE_LOAD)) begin - data_req = 1'b1; - regfile_mem_we = 1'b1; - rega_used_o = 1'b1; - data_type_o = 2'b00; - // offset from immediate - alu_operator_o = ALU_ADD; - alu_op_b_mux_sel_o = OP_B_IMM; - imm_b_mux_sel_o = IMMB_I; - - // post-increment setup - if (instr_rdata_i[6:0] == OPCODE_LOAD_POST) begin - prepost_useincr_o = 1'b0; - regfile_alu_waddr_sel_o = 1'b0; - regfile_alu_we = 1'b1; - end - - // sign/zero extension - data_sign_extension_o = {1'b0,~instr_rdata_i[14]}; - - // load size - unique case (instr_rdata_i[13:12]) - 2'b00: data_type_o = 2'b10; // LB - 2'b01: data_type_o = 2'b01; // LH - 2'b10: data_type_o = 2'b00; // LW - default: data_type_o = 2'b00; // illegal or reg-reg - endcase - - // reg-reg load (different encoding) - if (instr_rdata_i[14:12] == 3'b111) begin - if (PULP_XPULP) begin - // offset from RS2 - regb_used_o = 1'b1; - alu_op_b_mux_sel_o = OP_B_REGB_OR_FWD; - - // sign/zero extension - data_sign_extension_o = {1'b0, ~instr_rdata_i[30]}; - - // load size - unique case (instr_rdata_i[31:25]) - 7'b0000_000, - 7'b0100_000: data_type_o = 2'b10; // LB, LBU - 7'b0001_000, - 7'b0101_000: data_type_o = 2'b01; // LH, LHU - 7'b0010_000: data_type_o = 2'b00; // LW - default: begin - illegal_insn_o = 1'b1; - end - endcase - end else begin - illegal_insn_o = 1'b1; - end - end - - // special p.elw (event load) - if (instr_rdata_i[14:12] == 3'b110) begin - if (PULP_CLUSTER && (instr_rdata_i[6:0] == OPCODE_LOAD)) begin - data_load_event_o = 1'b1; - end else begin - // p.elw only valid for PULP_CLUSTER = 1; p.elw with post increment does not exist - illegal_insn_o = 1'b1; - end - end - - if (instr_rdata_i[14:12] == 3'b011) begin - // LD -> RV64 only - illegal_insn_o = 1'b1; - end - end else begin - illegal_insn_o = 1'b1; - end - end - - OPCODE_AMO: begin - if (A_EXTENSION) begin : decode_amo - if (instr_rdata_i[14:12] == 3'b010) begin // RV32A Extension (word) - data_req = 1'b1; - data_type_o = 2'b00; - rega_used_o = 1'b1; - regb_used_o = 1'b1; - regfile_mem_we = 1'b1; - prepost_useincr_o = 1'b0; // only use alu_operand_a as address (not a+b) - alu_op_a_mux_sel_o = OP_A_REGA_OR_FWD; - - data_sign_extension_o = 1'b1; - - // Apply AMO instruction at `atop_o`. - atop_o = {1'b1, instr_rdata_i[31:27]}; - - unique case (instr_rdata_i[31:27]) - AMO_LR: begin - data_we_o = 1'b0; - end - AMO_SC, - AMO_SWAP, - AMO_ADD, - AMO_XOR, - AMO_AND, - AMO_OR, - AMO_MIN, - AMO_MAX, - AMO_MINU, - AMO_MAXU: begin - data_we_o = 1'b1; - alu_op_c_mux_sel_o = OP_C_REGB_OR_FWD; // pass write data through ALU operand c - end - default : illegal_insn_o = 1'b1; - endcase - end - else begin - illegal_insn_o = 1'b1; - end - end else begin : no_decode_amo - illegal_insn_o = 1'b1; - end - end - - - ////////////////////////// - // _ _ _ _ // - // / \ | | | | | | // - // / _ \ | | | | | | // - // / ___ \| |__| |_| | // - // /_/ \_\_____\___/ // - // // - ////////////////////////// - - OPCODE_LUI: begin // Load Upper Immediate - alu_op_a_mux_sel_o = OP_A_IMM; - alu_op_b_mux_sel_o = OP_B_IMM; - imm_a_mux_sel_o = IMMA_ZERO; - imm_b_mux_sel_o = IMMB_U; - alu_operator_o = ALU_ADD; - regfile_alu_we = 1'b1; - end - - OPCODE_AUIPC: begin // Add Upper Immediate to PC - alu_op_a_mux_sel_o = OP_A_CURRPC; - alu_op_b_mux_sel_o = OP_B_IMM; - imm_b_mux_sel_o = IMMB_U; - alu_operator_o = ALU_ADD; - regfile_alu_we = 1'b1; - end - - OPCODE_OPIMM: begin // Register-Immediate ALU Operations - alu_op_b_mux_sel_o = OP_B_IMM; - imm_b_mux_sel_o = IMMB_I; - regfile_alu_we = 1'b1; - rega_used_o = 1'b1; - - unique case (instr_rdata_i[14:12]) - 3'b000: alu_operator_o = ALU_ADD; // Add Immediate - 3'b010: alu_operator_o = ALU_SLTS; // Set to one if Lower Than Immediate - 3'b011: alu_operator_o = ALU_SLTU; // Set to one if Lower Than Immediate Unsigned - 3'b100: alu_operator_o = ALU_XOR; // Exclusive Or with Immediate - 3'b110: alu_operator_o = ALU_OR; // Or with Immediate - 3'b111: alu_operator_o = ALU_AND; // And with Immediate - - 3'b001: begin - alu_operator_o = ALU_SLL; // Shift Left Logical by Immediate - if (instr_rdata_i[31:25] != 7'b0) - illegal_insn_o = 1'b1; - end - - 3'b101: begin - if (instr_rdata_i[31:25] == 7'b0) - alu_operator_o = ALU_SRL; // Shift Right Logical by Immediate - else if (instr_rdata_i[31:25] == 7'b010_0000) - alu_operator_o = ALU_SRA; // Shift Right Arithmetically by Immediate - else - illegal_insn_o = 1'b1; - end - - - endcase - end - - OPCODE_OP: begin // Register-Register ALU operation - - // PREFIX 11 - if (instr_rdata_i[31:30] == 2'b11) begin - if (PULP_XPULP) begin - ////////////////////////////// - // IMMEDIATE BIT-MANIPULATION - ////////////////////////////// - - regfile_alu_we = 1'b1; - rega_used_o = 1'b1; - - // bit-manipulation instructions - bmask_a_mux_o = BMASK_A_S3; - bmask_b_mux_o = BMASK_B_S2; - alu_op_b_mux_sel_o = OP_B_IMM; - - unique case (instr_rdata_i[14:12]) - 3'b000: begin - alu_operator_o = ALU_BEXT; - imm_b_mux_sel_o = IMMB_S2; - bmask_b_mux_o = BMASK_B_ZERO; - end - 3'b001: begin - alu_operator_o = ALU_BEXTU; - imm_b_mux_sel_o = IMMB_S2; - bmask_b_mux_o = BMASK_B_ZERO; - end - 3'b010: begin - alu_operator_o = ALU_BINS; - imm_b_mux_sel_o = IMMB_S2; - regc_used_o = 1'b1; - regc_mux_o = REGC_RD; - end - 3'b011: begin - alu_operator_o = ALU_BCLR; - end - 3'b100: begin - alu_operator_o = ALU_BSET; - end - 3'b101: begin - alu_operator_o = ALU_BREV; - // Enable write back to RD - regc_used_o = 1'b1; - regc_mux_o = REGC_RD; - // Extract the source register on operand a - imm_b_mux_sel_o = IMMB_S2; - // Map the radix to bmask_a immediate - alu_bmask_a_mux_sel_o = BMASK_A_IMM; - end - default: illegal_insn_o = 1'b1; - endcase - end else begin - illegal_insn_o = 1'b1; - end - end - - // PREFIX 10 - else if (instr_rdata_i[31:30] == 2'b10) begin - ////////////////////////////// - // REGISTER BIT-MANIPULATION - ////////////////////////////// - if (instr_rdata_i[29:25]==5'b00000) begin - if (PULP_XPULP) begin - regfile_alu_we = 1'b1; - rega_used_o = 1'b1; - - bmask_a_mux_o = BMASK_A_S3; - bmask_b_mux_o = BMASK_B_S2; - alu_op_b_mux_sel_o = OP_B_IMM; - - unique case (instr_rdata_i[14:12]) - 3'b000: begin - alu_operator_o = ALU_BEXT; - imm_b_mux_sel_o = IMMB_S2; - bmask_b_mux_o = BMASK_B_ZERO; - //register variant - alu_op_b_mux_sel_o = OP_B_BMASK; - alu_bmask_a_mux_sel_o = BMASK_A_REG; - regb_used_o = 1'b1; - end - 3'b001: begin - alu_operator_o = ALU_BEXTU; - imm_b_mux_sel_o = IMMB_S2; - bmask_b_mux_o = BMASK_B_ZERO; - //register variant - alu_op_b_mux_sel_o = OP_B_BMASK; - alu_bmask_a_mux_sel_o = BMASK_A_REG; - regb_used_o = 1'b1; - end - 3'b010: begin - alu_operator_o = ALU_BINS; - imm_b_mux_sel_o = IMMB_S2; - regc_used_o = 1'b1; - regc_mux_o = REGC_RD; - //register variant - alu_op_b_mux_sel_o = OP_B_BMASK; - alu_bmask_a_mux_sel_o = BMASK_A_REG; - alu_bmask_b_mux_sel_o = BMASK_B_REG; - regb_used_o = 1'b1; - end - 3'b011: begin - alu_operator_o = ALU_BCLR; - //register variant - regb_used_o = 1'b1; - alu_bmask_a_mux_sel_o = BMASK_A_REG; - alu_bmask_b_mux_sel_o = BMASK_B_REG; - end - 3'b100: begin - alu_operator_o = ALU_BSET; - //register variant - regb_used_o = 1'b1; - alu_bmask_a_mux_sel_o = BMASK_A_REG; - alu_bmask_b_mux_sel_o = BMASK_B_REG; - end - default: illegal_insn_o = 1'b1; - endcase - end else begin - illegal_insn_o = 1'b1; - end - - /////////////////////// - // VECTORIAL FLOAT OPS - /////////////////////// - end else begin - // Vectorial FP not available in 'old' shared FPU - if (FPU==1 && C_XFVEC) begin - - // using APU instead of ALU - apu_en = 1'b1; - alu_en = 1'b0; - // by default, set all registers to FP registers and use 2 - rega_used_o = 1'b1; - regb_used_o = 1'b1; - reg_fp_a_o = 1'b1; - reg_fp_b_o = 1'b1; - reg_fp_d_o = 1'b1; - fpu_vec_op = 1'b1; - // replication bit comes from instruction (can change for some ops) - scalar_replication_o = instr_rdata_i[14]; - // by default we need to verify rm is legal but assume it is for now - check_fprm = 1'b1; - fp_rnd_mode_o = frm_i; // all vectorial ops have rm from fcsr - - // Decode Formats - unique case (instr_rdata_i[13:12]) - // FP32 - 2'b00: begin - fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP32; - alu_vec_mode_o = VEC_MODE32; - end - // FP16ALT - 2'b01: begin - fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP16ALT; - alu_vec_mode_o = VEC_MODE16; - end - // FP16 - 2'b10: begin - fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP16; - alu_vec_mode_o = VEC_MODE16; - end - // FP8 - 2'b11: begin - fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP8; - alu_vec_mode_o = VEC_MODE8; - end - endcase - - // By default, src=dst - fpu_src_fmt_o = fpu_dst_fmt_o; - - // decode vectorial FP instruction - unique case (instr_rdata_i[29:25]) inside - // vfadd.vfmt - Vectorial FP Addition - 5'b00001: begin - fpu_op = cv32e41p_fpu_pkg::ADD; - fp_op_group = ADDMUL; - // FPnew needs addition operands as operand B and C - alu_op_b_mux_sel_o = OP_B_REGA_OR_FWD; - alu_op_c_mux_sel_o = OP_C_REGB_OR_FWD; - scalar_replication_o = 1'b0; - scalar_replication_c_o = instr_rdata_i[14]; - end - // vfsub.vfmt - Vectorial FP Subtraction - 5'b00010: begin - fpu_op = cv32e41p_fpu_pkg::ADD; - fpu_op_mod = 1'b1; - fp_op_group = ADDMUL; - // FPnew needs addition operands as operand B and C - alu_op_b_mux_sel_o = OP_B_REGA_OR_FWD; - alu_op_c_mux_sel_o = OP_C_REGB_OR_FWD; - scalar_replication_o = 1'b0; - scalar_replication_c_o = instr_rdata_i[14]; - end - // vfmul.vfmt - Vectorial FP Multiplication - 5'b00011: begin - fpu_op = cv32e41p_fpu_pkg::MUL; - fp_op_group = ADDMUL; - end - // vfdiv.vfmt - Vectorial FP Division - 5'b00100: begin - fpu_op = cv32e41p_fpu_pkg::DIV; - fp_op_group = DIVSQRT; - end - // vfmin.vfmt - Vectorial FP Minimum - 5'b00101: begin - fpu_op = cv32e41p_fpu_pkg::MINMAX; - fp_rnd_mode_o = 3'b000; // min - fp_op_group = NONCOMP; - check_fprm = 1'b0; // instruction encoded in rm - end - // vfmax.vfmt - Vectorial FP Maximum - 5'b00110: begin - fpu_op = cv32e41p_fpu_pkg::MINMAX; - fp_rnd_mode_o = 3'b001; // max - fp_op_group = NONCOMP; - check_fprm = 1'b0; // instruction encoded in rm - end - // vfsqrt.vfmt - Vectorial FP Square Root - 5'b00111: begin - regb_used_o = 1'b0; - fpu_op = cv32e41p_fpu_pkg::SQRT; - fp_op_group = DIVSQRT; - // rs2 and R must be zero - if ((instr_rdata_i[24:20] != 5'b00000) || instr_rdata_i[14]) begin - illegal_insn_o = 1'b1; - end - end - // vfmac.vfmt - Vectorial FP Multiply-Accumulate - 5'b01000: begin - regc_used_o = 1'b1; - regc_mux_o = REGC_RD; // third operand is rd - reg_fp_c_o = 1'b1; - fpu_op = cv32e41p_fpu_pkg::FMADD; - fp_op_group = ADDMUL; - end - // vfmre.vfmt - Vectorial FP Multiply-Reduce - 5'b01001: begin - regc_used_o = 1'b1; - regc_mux_o = REGC_RD; // third operand is rd - reg_fp_c_o = 1'b1; - fpu_op = cv32e41p_fpu_pkg::FMADD; - fpu_op_mod = 1'b1; - fp_op_group = ADDMUL; - end - // Moves, Conversions, Classifications - 5'b01100: begin - regb_used_o = 1'b0; - scalar_replication_o = 1'b0; - // Decode Operation in rs2 - unique case (instr_rdata_i[24:20]) inside - // vfmv.{x.vfmt/vfmt.x} - Vectorial FP Reg <-> GP Reg Moves - 5'b00000: begin - alu_op_b_mux_sel_o = OP_B_REGA_OR_FWD; // set rs2 = rs1 so we can map FMV to SGNJ in the unit - fpu_op = cv32e41p_fpu_pkg::SGNJ; - fp_rnd_mode_o = 3'b011; // passthrough without checking nan-box - fp_op_group = NONCOMP; - check_fprm = 1'b0; - // GP reg to FP reg - if (instr_rdata_i[14]) begin - reg_fp_a_o = 1'b0; // go from integer regfile - fpu_op_mod = 1'b0; // nan-box result - end - // FP reg to GP reg - else begin - reg_fp_d_o = 1'b0; // go to integer regfile - fpu_op_mod = 1'b1; // sign-extend result - end - end - // vfclass.vfmt - Vectorial FP Classifications - 5'b00001: begin - reg_fp_d_o = 1'b0; // go to integer regfile - fpu_op = cv32e41p_fpu_pkg::CLASSIFY; - fp_rnd_mode_o = 3'b000; - fp_op_group = NONCOMP; - check_fprm = 1'b0; - // R must not be set - if (instr_rdata_i[14]) illegal_insn_o = 1'b1; - end - // vfcvt.{x.vfmt/vfmt.x} - Vectorial FP <-> Int Conversions - 5'b0001?: begin - fp_op_group = CONV; - fpu_op_mod = instr_rdata_i[14]; // signed/unsigned switch - // Integer width matches FP width - unique case (instr_rdata_i[13:12]) - // FP32 - 2'b00 : fpu_int_fmt_o = cv32e41p_fpu_pkg::INT32; - // FP16[ALT] - 2'b01, - 2'b10: fpu_int_fmt_o = cv32e41p_fpu_pkg::INT16; - // FP8 - 2'b11: fpu_int_fmt_o = cv32e41p_fpu_pkg::INT8; - endcase - // Int to FP conversion - if (instr_rdata_i[20]) begin - reg_fp_a_o = 1'b0; // go from integer regfile - fpu_op = cv32e41p_fpu_pkg::I2F; - end - // FP to Int conversion - else begin - reg_fp_d_o = 1'b0; // go to integer regfile - fpu_op = cv32e41p_fpu_pkg::F2I; - end - end - // vfcvt.vfmt.vfmt - Vectorial FP <-> FP Conversions - 5'b001??: begin - fpu_op = cv32e41p_fpu_pkg::F2F; - fp_op_group = CONV; - // check source format - unique case (instr_rdata_i[21:20]) - // Only process instruction if corresponding extension is active (static) - 2'b00: begin - fpu_src_fmt_o = cv32e41p_fpu_pkg::FP32; - if (~C_RVF) illegal_insn_o = 1'b1; - end - 2'b01: begin - fpu_src_fmt_o = cv32e41p_fpu_pkg::FP16ALT; - if (~C_XF16ALT) illegal_insn_o = 1'b1; - end - 2'b10: begin - fpu_src_fmt_o = cv32e41p_fpu_pkg::FP16; - if (~C_XF16) illegal_insn_o = 1'b1; - end - 2'b11: begin - fpu_src_fmt_o = cv32e41p_fpu_pkg::FP8; - if (~C_XF8) illegal_insn_o = 1'b1; - end - endcase - // R must not be set - if (instr_rdata_i[14]) illegal_insn_o = 1'b1; - end - // others - default : illegal_insn_o = 1'b1; - endcase - end - // vfsgnj.vfmt - Vectorial FP Sign Injection - 5'b01101: begin - fpu_op = cv32e41p_fpu_pkg::SGNJ; - fp_rnd_mode_o = 3'b000; // sgnj - fp_op_group = NONCOMP; - check_fprm = 1'b0; - end - // vfsgnjn.vfmt - Vectorial FP Negated Sign Injection - 5'b01110: begin - fpu_op = cv32e41p_fpu_pkg::SGNJ; - fp_rnd_mode_o = 3'b001; // sgnjn - fp_op_group = NONCOMP; - check_fprm = 1'b0; - end - // vfsgnjx.vfmt - Vectorial FP Xored Sign Injection - 5'b01111: begin - fpu_op = cv32e41p_fpu_pkg::SGNJ; - fp_rnd_mode_o = 3'b010; // sgnjx - fp_op_group = NONCOMP; - check_fprm = 1'b0; - end - // vfeq.vfmt - Vectorial FP Equals - 5'b10000: begin - reg_fp_d_o = 1'b0; // go to integer regfile - fpu_op = cv32e41p_fpu_pkg::CMP; - fp_rnd_mode_o = 3'b010; // eq - fp_op_group = NONCOMP; - check_fprm = 1'b0; - end - // vfne.vfmt - Vectorial FP Not Equals - 5'b10001: begin - reg_fp_d_o = 1'b0; // go to integer regfile - fpu_op = cv32e41p_fpu_pkg::CMP; - fpu_op_mod = 1'b1; // invert output - fp_rnd_mode_o = 3'b010; // eq - fp_op_group = NONCOMP; - check_fprm = 1'b0; - end - // vflt.vfmt - Vectorial FP Less Than - 5'b10010: begin - reg_fp_d_o = 1'b0; // go to integer regfile - fpu_op = cv32e41p_fpu_pkg::CMP; - fp_rnd_mode_o = 3'b001; // lt - fp_op_group = NONCOMP; - check_fprm = 1'b0; - end - // vfge.vfmt - Vectorial FP Greater Than or Equals - 5'b10011: begin - reg_fp_d_o = 1'b0; // go to integer regfile - fpu_op = cv32e41p_fpu_pkg::CMP; - fpu_op_mod = 1'b1; // invert output - fp_rnd_mode_o = 3'b001; // lt - fp_op_group = NONCOMP; - check_fprm = 1'b0; - end - // vfle.vfmt - Vectorial FP Less Than or Equals - 5'b10100: begin - reg_fp_d_o = 1'b0; // go to integer regfile - fpu_op = cv32e41p_fpu_pkg::CMP; - fp_rnd_mode_o = 3'b000; // le - fp_op_group = NONCOMP; - check_fprm = 1'b0; - end - // vfgt.vfmt - Vectorial FP Greater Than - 5'b10101: begin - reg_fp_d_o = 1'b0; // go to integer regfile - fpu_op = cv32e41p_fpu_pkg::CMP; - fpu_op_mod = 1'b1; // invert output - fp_rnd_mode_o = 3'b000; // le - fp_op_group = NONCOMP; - check_fprm = 1'b0; - end - // vfcpk{a-d}.vfmt.s/d - 5'b110??: begin - // vfcpk{{a/c}/{b/d}} selection in R bit - fpu_op_mod = instr_rdata_i[14]; - fp_op_group = CONV; - scalar_replication_o = 1'b0; - - if (instr_rdata_i[25]) fpu_op = cv32e41p_fpu_pkg::CPKCD; // vfcpk{c/d} - else fpu_op = cv32e41p_fpu_pkg::CPKAB; // vfcpk{a/b} - - // vfcpk{a-d}.vfmt.d - from double - if (instr_rdata_i[26]) begin - fpu_src_fmt_o = cv32e41p_fpu_pkg::FP64; - if (~C_RVD) illegal_insn_o = 1'b1; - end - // vfcpk{a-d}.vfmt.s - else begin - fpu_src_fmt_o = cv32e41p_fpu_pkg::FP32; - if (~C_RVF) illegal_insn_o = 1'b1; - end - // Resolve legal vfcpk / format combinations (mostly static) - if (fpu_op == cv32e41p_fpu_pkg::CPKCD) begin // vfcpk{c/d} not possible unless FP8 and FLEN>=64 - if (~C_XF8 || ~C_RVD) illegal_insn_o = 1'b1; - end else begin - if (instr_rdata_i[14]) begin // vfcpkb - // vfcpkb not possible for FP32 - if (fpu_dst_fmt_o == cv32e41p_fpu_pkg::FP32) illegal_insn_o = 1'b1; - // vfcpkb not possible for FP16[ALT] if not RVD - if (~C_RVD && (fpu_dst_fmt_o != cv32e41p_fpu_pkg::FP8)) illegal_insn_o = 1'b1; - end - end - end - // Rest are illegal instructions - default: begin - illegal_insn_o = 1'b1; - end - endcase - - // check enabled formats (static) - // need RVD for F vectors - if ((~C_RVF || ~C_RVD) && fpu_dst_fmt_o == cv32e41p_fpu_pkg::FP32) illegal_insn_o = 1'b1; - // need RVF for F16 vectors - if ((~C_XF16 || ~C_RVF) && fpu_dst_fmt_o == cv32e41p_fpu_pkg::FP16) illegal_insn_o = 1'b1; - // need RVF for F16 vectors - if ((~C_XF16ALT || ~C_RVF) && fpu_dst_fmt_o == cv32e41p_fpu_pkg::FP16ALT) begin - illegal_insn_o = 1'b1; - end - // need F16 for F8 vectors - if ((~C_XF8 || (~C_XF16 && ~C_XF16ALT)) && fpu_dst_fmt_o == cv32e41p_fpu_pkg::FP8) begin - illegal_insn_o = 1'b1; - end - - // check rounding mode - if (check_fprm) begin - unique case (frm_i) inside - [3'b000:3'b100] : ; //legal rounding modes - default : illegal_insn_o = 1'b1; - endcase - end - - // Set latencies for FPnew from config. The C_LAT constants contain the number - // of pipeline registers. the APU takes the following values: - // 1 = single cycle (no latency), 2 = one pipestage, 3 = two or more pipestages - case (fp_op_group) - // ADDMUL has format dependent latency - ADDMUL : begin - unique case (fpu_dst_fmt_o) - cv32e41p_fpu_pkg::FP32 : apu_lat_o = (C_LAT_FP32<2) ? C_LAT_FP32+1 : 2'h3; - cv32e41p_fpu_pkg::FP16 : apu_lat_o = (C_LAT_FP16<2) ? C_LAT_FP16+1 : 2'h3; - cv32e41p_fpu_pkg::FP16ALT : apu_lat_o = (C_LAT_FP16ALT<2) ? C_LAT_FP16ALT+1 : 2'h3; - cv32e41p_fpu_pkg::FP8 : apu_lat_o = (C_LAT_FP8<2) ? C_LAT_FP8+1 : 2'h3; - default : ; - endcase - end - // DIVSQRT is iterative and takes more than 2 cycles - DIVSQRT : apu_lat_o = 2'h3; - // NONCOMP uses the same latency for all formats - NONCOMP : apu_lat_o = (C_LAT_NONCOMP<2) ? C_LAT_NONCOMP+1 : 2'h3; - // CONV uses the same latency for all formats - CONV : apu_lat_o = (C_LAT_CONV<2) ? C_LAT_CONV+1 : 2'h3; - endcase - - // Set FPnew OP and OPMOD as the APU op - apu_op_o = {fpu_vec_op, fpu_op_mod, fpu_op}; - end - // FPU!=1 or no Vectors or old shared unit - else begin - illegal_insn_o = 1'b1; - end - end // Vectorial Float Ops - - end // prefix 10 - - // PREFIX 00/01 - else begin - // non bit-manipulation instructions - regfile_alu_we = 1'b1; - rega_used_o = 1'b1; - - if (~instr_rdata_i[28]) regb_used_o = 1'b1; - - unique case ({instr_rdata_i[30:25], instr_rdata_i[14:12]}) - // RV32I ALU operations - {6'b00_0000, 3'b000}: alu_operator_o = ALU_ADD; // Add - {6'b10_0000, 3'b000}: alu_operator_o = ALU_SUB; // Sub - {6'b00_0000, 3'b010}: alu_operator_o = ALU_SLTS; // Set Lower Than - {6'b00_0000, 3'b011}: alu_operator_o = ALU_SLTU; // Set Lower Than Unsigned - {6'b00_0000, 3'b100}: alu_operator_o = ALU_XOR; // Xor - {6'b00_0000, 3'b110}: alu_operator_o = ALU_OR; // Or - {6'b00_0000, 3'b111}: alu_operator_o = ALU_AND; // And - {6'b00_0000, 3'b001}: alu_operator_o = ALU_SLL; // Shift Left Logical - {6'b00_0000, 3'b101}: alu_operator_o = ALU_SRL; // Shift Right Logical - {6'b10_0000, 3'b101}: alu_operator_o = ALU_SRA; // Shift Right Arithmetic - - // supported RV32M instructions - {6'b00_0001, 3'b000}: begin // mul - alu_en = 1'b0; - mult_int_en = 1'b1; - mult_operator_o = MUL_MAC32; - regc_mux_o = REGC_ZERO; - end - {6'b00_0001, 3'b001}: begin // mulh - alu_en = 1'b0; - regc_used_o = 1'b1; - regc_mux_o = REGC_ZERO; - mult_signed_mode_o = 2'b11; - mult_int_en = 1'b1; - mult_operator_o = MUL_H; - end - {6'b00_0001, 3'b010}: begin // mulhsu - alu_en = 1'b0; - regc_used_o = 1'b1; - regc_mux_o = REGC_ZERO; - mult_signed_mode_o = 2'b01; - mult_int_en = 1'b1; - mult_operator_o = MUL_H; - end - {6'b00_0001, 3'b011}: begin // mulhu - alu_en = 1'b0; - regc_used_o = 1'b1; - regc_mux_o = REGC_ZERO; - mult_signed_mode_o = 2'b00; - mult_int_en = 1'b1; - mult_operator_o = MUL_H; - end - {6'b00_0001, 3'b100}: begin // div - alu_op_a_mux_sel_o = OP_A_REGB_OR_FWD; - alu_op_b_mux_sel_o = OP_B_REGA_OR_FWD; - regb_used_o = 1'b1; - alu_operator_o = ALU_DIV; - end - {6'b00_0001, 3'b101}: begin // divu - alu_op_a_mux_sel_o = OP_A_REGB_OR_FWD; - alu_op_b_mux_sel_o = OP_B_REGA_OR_FWD; - regb_used_o = 1'b1; - alu_operator_o = ALU_DIVU; - end - {6'b00_0001, 3'b110}: begin // rem - alu_op_a_mux_sel_o = OP_A_REGB_OR_FWD; - alu_op_b_mux_sel_o = OP_B_REGA_OR_FWD; - regb_used_o = 1'b1; - alu_operator_o = ALU_REM; - end - {6'b00_0001, 3'b111}: begin // remu - alu_op_a_mux_sel_o = OP_A_REGB_OR_FWD; - alu_op_b_mux_sel_o = OP_B_REGA_OR_FWD; - regb_used_o = 1'b1; - alu_operator_o = ALU_REMU; - end - - // PULP specific instructions - {6'b10_0001, 3'b000}: begin // p.mac - if (PULP_XPULP) begin - alu_en = 1'b0; - regc_used_o = 1'b1; - regc_mux_o = REGC_RD; - mult_int_en = 1'b1; - mult_operator_o = MUL_MAC32; - end else begin - illegal_insn_o = 1'b1; - end - end - {6'b10_0001, 3'b001}: begin // p.msu - if (PULP_XPULP) begin - alu_en = 1'b0; - regc_used_o = 1'b1; - regc_mux_o = REGC_RD; - mult_int_en = 1'b1; - mult_operator_o = MUL_MSU32; - end else begin - illegal_insn_o = 1'b1; - end - end - {6'b00_0010, 3'b010}: begin // Set Lower Equal Than - p.slet - if (PULP_XPULP) begin - alu_operator_o = ALU_SLETS; - end else begin - illegal_insn_o = 1'b1; - end - end - {6'b00_0010, 3'b011}: begin // Set Lower Equal Than Unsigned; p.sletu - if (PULP_XPULP) begin - alu_operator_o = ALU_SLETU; - end else begin - illegal_insn_o = 1'b1; - end - end - {6'b00_0010, 3'b100}: begin // Min - p.min - if (PULP_XPULP) begin - alu_operator_o = ALU_MIN; - end else begin - illegal_insn_o = 1'b1; - end - end - {6'b00_0010, 3'b101}: begin // Min Unsigned - p.minu - if (PULP_XPULP) begin - alu_operator_o = ALU_MINU; - end else begin - illegal_insn_o = 1'b1; - end - end - {6'b00_0010, 3'b110}: begin // Max - p.max - if (PULP_XPULP) begin - alu_operator_o = ALU_MAX; - end else begin - illegal_insn_o = 1'b1; - end - end - {6'b00_0010, 3'b111}: begin // Max Unsigned - p.maxu - if (PULP_XPULP) begin - alu_operator_o = ALU_MAXU; - end else begin - illegal_insn_o = 1'b1; - end - end - {6'b00_0100, 3'b101}: begin // Rotate Right - p.ror - if (PULP_XPULP) begin - alu_operator_o = ALU_ROR; - end else begin - illegal_insn_o = 1'b1; - end - end - - // PULP specific instructions using only one source register - - {6'b00_1000, 3'b000}: begin // Find First 1 - p.ff1 - if (PULP_XPULP) begin - alu_operator_o = ALU_FF1; - end else begin - illegal_insn_o = 1'b1; - end - end - {6'b00_1000, 3'b001}: begin // Find Last 1 - p.fl1 - if (PULP_XPULP) begin - alu_operator_o = ALU_FL1; - end else begin - illegal_insn_o = 1'b1; - end - end - {6'b00_1000, 3'b010}: begin // Count Leading Bits - p.clb - if (PULP_XPULP) begin - alu_operator_o = ALU_CLB; - end else begin - illegal_insn_o = 1'b1; - end - end - {6'b00_1000, 3'b011}: begin // Count set bits (popcount) - p.cnt - if (PULP_XPULP) begin - alu_operator_o = ALU_CNT; - end else begin - illegal_insn_o = 1'b1; - end - end - {6'b00_1000, 3'b100}: begin // Sign-extend Halfword - p.exths - if (PULP_XPULP) begin - alu_operator_o = ALU_EXTS; - alu_vec_mode_o = VEC_MODE16; - end else begin - illegal_insn_o = 1'b1; - end - end - {6'b00_1000, 3'b101}: begin // Zero-extend Halfword - p.exthz - if (PULP_XPULP) begin - alu_operator_o = ALU_EXT; - alu_vec_mode_o = VEC_MODE16; - end else begin - illegal_insn_o = 1'b1; - end - end - {6'b00_1000, 3'b110}: begin // Sign-extend Byte - p.extbs - if (PULP_XPULP) begin - alu_operator_o = ALU_EXTS; - alu_vec_mode_o = VEC_MODE8; - end else begin - illegal_insn_o = 1'b1; - end - end - {6'b00_1000, 3'b111}: begin // Zero-extend Byte - p.extbz - if (PULP_XPULP) begin - alu_operator_o = ALU_EXT; - alu_vec_mode_o = VEC_MODE8; - end else begin - illegal_insn_o = 1'b1; - end - end - {6'b00_0010, 3'b000}: begin // p.abs - if (PULP_XPULP) begin - alu_operator_o = ALU_ABS; - end else begin - illegal_insn_o = 1'b1; - end - end - {6'b00_1010, 3'b001}: begin // p.clip - if (PULP_XPULP) begin - alu_operator_o = ALU_CLIP; - alu_op_b_mux_sel_o = OP_B_IMM; - imm_b_mux_sel_o = IMMB_CLIP; - end else begin - illegal_insn_o = 1'b1; - end - end - {6'b00_1010, 3'b010}: begin // p.clipu - if (PULP_XPULP) begin - alu_operator_o = ALU_CLIPU; - alu_op_b_mux_sel_o = OP_B_IMM; - imm_b_mux_sel_o = IMMB_CLIP; - end else begin - illegal_insn_o = 1'b1; - end - end - {6'b00_1010, 3'b101}: begin // p.clipr - if (PULP_XPULP) begin - alu_operator_o = ALU_CLIP; - regb_used_o = 1'b1; - end else begin - illegal_insn_o = 1'b1; - end - end - {6'b00_1010, 3'b110}: begin // p.clipur - if (PULP_XPULP) begin - alu_operator_o = ALU_CLIPU; - regb_used_o = 1'b1; - end else begin - illegal_insn_o = 1'b1; - end - end - - default: begin - illegal_insn_o = 1'b1; - end - endcase - end - end - - //////////////////////////// - // ______ _____ _ _ // - // | ____| __ \| | | | // - // | |__ | |__) | | | | // - // | __| | ___/| | | | // - // | | | | | |__| | // - // |_| |_| \____/ // - // // - //////////////////////////// - - // floating point arithmetic - OPCODE_OP_FP: begin - if (FPU==1) begin - - // using APU instead of ALU - apu_en = 1'b1; - alu_en = 1'b0; - // by default, set all registers to FP registers and use 2 - rega_used_o = 1'b1; - regb_used_o = 1'b1; - reg_fp_a_o = 1'b1; - reg_fp_b_o = 1'b1; - reg_fp_d_o = 1'b1; - // by default we need to verify rm is legal but assume it is for now - check_fprm = 1'b1; - fp_rnd_mode_o = instr_rdata_i[14:12]; - - // Decode Formats (preliminary, can change for some ops) - unique case (instr_rdata_i[26:25]) - // FP32 - 2'b00: fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP32; - // FP64 - 2'b01: fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP64; - // FP16 or FP16ALT - 2'b10: begin - // FP16alt encoded in rm field - if (instr_rdata_i[14:12]==3'b101) fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP16ALT; - // this can still change to FP16ALT - else fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP16; - end - // FP8 - 2'b11: fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP8; - endcase - - // By default, src=dst - fpu_src_fmt_o = fpu_dst_fmt_o; - - // decode FP instruction - unique case (instr_rdata_i[31:27]) - // fadd.fmt - FP Addition - 5'b00000: begin - fpu_op = cv32e41p_fpu_pkg::ADD; - fp_op_group = ADDMUL; - apu_op_o = 2'b0; - apu_lat_o = (PIPE_REG_ADDSUB==1) ? 2'h2 : 2'h1; - alu_op_b_mux_sel_o = OP_B_REGA_OR_FWD; - alu_op_c_mux_sel_o = OP_C_REGB_OR_FWD; - end - // fsub.fmt - FP Subtraction - 5'b00001: begin - fpu_op = cv32e41p_fpu_pkg::ADD; - fpu_op_mod = 1'b1; - fp_op_group = ADDMUL; - apu_op_o = 2'b1; - apu_lat_o = (PIPE_REG_ADDSUB==1) ? 2'h2 : 2'h1; - alu_op_b_mux_sel_o = OP_B_REGA_OR_FWD; - alu_op_c_mux_sel_o = OP_C_REGB_OR_FWD; - end - // fmul.fmt - FP Multiplication - 5'b00010: begin - fpu_op = cv32e41p_fpu_pkg::MUL; - fp_op_group = ADDMUL; - apu_lat_o = (PIPE_REG_MULT==1) ? 2'h2 : 2'h1; - end - // fdiv.fmt - FP Division - 5'b00011: begin - fpu_op = cv32e41p_fpu_pkg::DIV; - fp_op_group = DIVSQRT; - apu_lat_o = 2'h3; - end - // fsqrt.fmt - FP Square Root - 5'b01011: begin - regb_used_o = 1'b0; - fpu_op = cv32e41p_fpu_pkg::SQRT; - fp_op_group = DIVSQRT; - apu_op_o = 1'b1; - apu_lat_o = 2'h3; - // rs2 must be zero - if (instr_rdata_i[24:20] != 5'b00000) illegal_insn_o = 1'b1; - end - // fsgn{j[n]/jx}.fmt - FP Sign Injection - 5'b00100: begin - fpu_op = cv32e41p_fpu_pkg::SGNJ; - fp_op_group = NONCOMP; - check_fprm = 1'b0; // instruction encoded in rm, do the check here - if (C_XF16ALT) begin // FP16ALT instructions encoded in rm separately (static) - if (!(instr_rdata_i[14:12] inside {[3'b000:3'b010], [3'b100:3'b110]})) begin - illegal_insn_o = 1'b1; - end - // FP16ALT uses special encoding here - if (instr_rdata_i[14]) begin - fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP16ALT; - fpu_src_fmt_o = cv32e41p_fpu_pkg::FP16ALT; - end else begin - fp_rnd_mode_o = {1'b0, instr_rdata_i[13:12]}; - end - end else begin - if (!(instr_rdata_i[14:12] inside {[3'b000:3'b010]})) illegal_insn_o = 1'b1; - end - end - // fmin/fmax.fmt - FP Minimum / Maximum - 5'b00101: begin - fpu_op = cv32e41p_fpu_pkg::MINMAX; - fp_op_group = NONCOMP; - check_fprm = 1'b0; // instruction encoded in rm, do the check here - if (C_XF16ALT) begin // FP16ALT instructions encoded in rm separately (static) - if (!(instr_rdata_i[14:12] inside {[3'b000:3'b001], [3'b100:3'b101]})) begin - illegal_insn_o = 1'b1; - end - // FP16ALT uses special encoding here - if (instr_rdata_i[14]) begin - fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP16ALT; - fpu_src_fmt_o = cv32e41p_fpu_pkg::FP16ALT; - end else begin - fp_rnd_mode_o = {1'b0, instr_rdata_i[13:12]}; - end - end else begin - if (!(instr_rdata_i[14:12] inside {[3'b000:3'b001]})) illegal_insn_o = 1'b1; - end - end - // fcvt.fmt.fmt - FP to FP Conversion - 5'b01000: begin - regb_used_o = 1'b0; - fpu_op = cv32e41p_fpu_pkg::F2F; - fp_op_group = CONV; - // bits [22:20] used, other bits must be 0 - if (instr_rdata_i[24:23]) illegal_insn_o = 1'b1; - // check source format - unique case (instr_rdata_i[22:20]) - // Only process instruction if corresponding extension is active (static) - 3'b000: begin - if (~C_RVF) illegal_insn_o = 1'b1; - fpu_src_fmt_o = cv32e41p_fpu_pkg::FP32; - end - 3'b001: begin - if (~C_RVD) illegal_insn_o = 1'b1; - fpu_src_fmt_o = cv32e41p_fpu_pkg::FP64; - end - 3'b010: begin - if (~C_XF16) illegal_insn_o = 1'b1; - fpu_src_fmt_o = cv32e41p_fpu_pkg::FP16; - end - 3'b110: begin - if (~C_XF16ALT) illegal_insn_o = 1'b1; - fpu_src_fmt_o = cv32e41p_fpu_pkg::FP16ALT; - end - 3'b011: begin - if (~C_XF8) illegal_insn_o = 1'b1; - fpu_src_fmt_o = cv32e41p_fpu_pkg::FP8; - end - default: illegal_insn_o = 1'b1; - endcase - end - // fmulex.s.fmt - FP Expanding Multiplication to FP32 - 5'b01001: begin - fpu_op = cv32e41p_fpu_pkg::MUL; - fp_op_group = ADDMUL; - apu_lat_o = (PIPE_REG_MULT==1) ? 2'h2 : 2'h1; - // set dst format to FP32 - fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP32; - end - // fmacex.s.fmt - FP Expanding Multipy-Accumulate to FP32 - 5'b01010: begin - regc_used_o = 1'b1; - regc_mux_o = REGC_RD; // third operand is rd - reg_fp_c_o = 1'b1; - fpu_op = cv32e41p_fpu_pkg::FMADD; - fp_op_group = ADDMUL; - apu_lat_o = (PIPE_REG_MULT==1) ? 2'h2 : 2'h1; - // set dst format to FP32 - fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP32; - end - // feq/flt/fle.fmt - FP Comparisons - 5'b10100: begin - fpu_op = cv32e41p_fpu_pkg::CMP; - fp_op_group = NONCOMP; - reg_fp_d_o = 1'b0; // go to integer regfile - check_fprm = 1'b0; // instruction encoded in rm, do the check here - if (C_XF16ALT) begin // FP16ALT instructions encoded in rm separately (static) - if (!(instr_rdata_i[14:12] inside {[3'b000:3'b010], [3'b100:3'b110]})) begin - illegal_insn_o = 1'b1; - end - // FP16ALT uses special encoding here - if (instr_rdata_i[14]) begin - fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP16ALT; - fpu_src_fmt_o = cv32e41p_fpu_pkg::FP16ALT; - end else begin - fp_rnd_mode_o = {1'b0, instr_rdata_i[13:12]}; - end - end else begin - if (!(instr_rdata_i[14:12] inside {[3'b000:3'b010]})) illegal_insn_o = 1'b1; - end - end - // fcvt.ifmt.fmt - FP to Int Conversion - 5'b11000: begin - regb_used_o = 1'b0; - reg_fp_d_o = 1'b0; // go to integer regfile - fpu_op = cv32e41p_fpu_pkg::F2I; - fp_op_group = CONV; - fpu_op_mod = instr_rdata_i[20]; // signed/unsigned switch - apu_op_o = 2'b1; - apu_lat_o = (PIPE_REG_CAST==1) ? 2'h2 : 2'h1; - - unique case (instr_rdata_i[26:25]) //fix for casting to different formats other than FP32 - 2'b00: begin - if (~C_RVF) illegal_insn_o = 1; - else fpu_src_fmt_o = cv32e41p_fpu_pkg::FP32; - end - 2'b01: begin - if (~C_RVD) illegal_insn_o = 1; - else fpu_src_fmt_o = cv32e41p_fpu_pkg::FP64; - end - 2'b10: begin - if (instr_rdata_i[14:12] == 3'b101) begin - if (~C_XF16ALT) illegal_insn_o = 1; - else fpu_src_fmt_o = cv32e41p_fpu_pkg::FP16ALT; - end else if (~C_XF16) begin - illegal_insn_o = 1; - end else begin - fpu_src_fmt_o = cv32e41p_fpu_pkg::FP16; - end - end - 2'b11: begin - if (~C_XF8) illegal_insn_o = 1; - else fpu_src_fmt_o = cv32e41p_fpu_pkg::FP8; - end - endcase // unique case (instr_rdata_i[26:25]) - // bits [21:20] used, other bits must be 0 - if (instr_rdata_i[24:21]) illegal_insn_o = 1'b1; // in RV32, no casts to L allowed. - end - // fcvt.fmt.ifmt - Int to FP Conversion - 5'b11010: begin - regb_used_o = 1'b0; - reg_fp_a_o = 1'b0; // go from integer regfile - fpu_op = cv32e41p_fpu_pkg::I2F; - fp_op_group = CONV; - fpu_op_mod = instr_rdata_i[20]; // signed/unsigned switch - apu_op_o = 2'b0; - apu_lat_o = (PIPE_REG_CAST==1) ? 2'h2 : 2'h1; - // bits [21:20] used, other bits must be 0 - if (instr_rdata_i[24:21]) illegal_insn_o = 1'b1; // in RV32, no casts to L allowed. - end - // move and class - 5'b11100: begin - regb_used_o = 1'b0; - reg_fp_d_o = 1'b0; // go to integer regfile - fp_op_group = NONCOMP; - check_fprm = 1'b0; // instruction encoded in rm, do the check here - // fmv.x.fmt - FPR to GPR Move - if (instr_rdata_i[14:12] == 3'b000 || (C_XF16ALT && instr_rdata_i[14:12] == 3'b100)) begin - alu_op_b_mux_sel_o = OP_B_REGA_OR_FWD; // set rs2 = rs1 so we can map FMV to SGNJ in the unit - fpu_op = cv32e41p_fpu_pkg::SGNJ; // mapped to SGNJ-passthrough since no recoding - fpu_op_mod = 1'b1; // sign-extend result - fp_rnd_mode_o = 3'b011; // passthrough without checking nan-box - // FP16ALT uses special encoding here - if (instr_rdata_i[14]) begin - fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP16ALT; - fpu_src_fmt_o = cv32e41p_fpu_pkg::FP16ALT; - end - // fclass.fmt - FP Classify - end else if (instr_rdata_i[14:12] == 3'b001 || (C_XF16ALT && instr_rdata_i[14:12] == 3'b101)) begin - fpu_op = cv32e41p_fpu_pkg::CLASSIFY; - fp_rnd_mode_o = 3'b000; - // FP16ALT uses special encoding here - if (instr_rdata_i[14]) begin - fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP16ALT; - fpu_src_fmt_o = cv32e41p_fpu_pkg::FP16ALT; - end - end else begin - illegal_insn_o = 1'b1; - end - // rs2 must be zero - if (instr_rdata_i[24:20]) illegal_insn_o = 1'b1; - end - // fmv.fmt.x - GPR to FPR Move - 5'b11110: begin - regb_used_o = 1'b0; - reg_fp_a_o = 1'b0; // go from integer regfile - alu_op_b_mux_sel_o = OP_B_REGA_OR_FWD; // set rs2 = rs1 so we can map FMV to SGNJ in the unit - fpu_op = cv32e41p_fpu_pkg::SGNJ; // mapped to SGNJ-passthrough since no recoding - fpu_op_mod = 1'b0; // nan-box result - fp_op_group = NONCOMP; - fp_rnd_mode_o = 3'b011; // passthrough without checking nan-box - check_fprm = 1'b0; // instruction encoded in rm, do the check here - if (instr_rdata_i[14:12] == 3'b000 || (C_XF16ALT && instr_rdata_i[14:12] == 3'b100)) begin - // FP16ALT uses special encoding here - if (instr_rdata_i[14]) begin - fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP16ALT; - fpu_src_fmt_o = cv32e41p_fpu_pkg::FP16ALT; - end - end else begin - illegal_insn_o = 1'b1; - end - // rs2 must be zero - if (instr_rdata_i[24:20] != 5'b00000) illegal_insn_o = 1'b1; - end - // Rest are illegal instructions - default: begin - illegal_insn_o = 1'b1; - end - endcase - - // check enabled formats (static) - if (~C_RVF && fpu_dst_fmt_o == cv32e41p_fpu_pkg::FP32) illegal_insn_o = 1'b1; - if ((~C_RVD) && fpu_dst_fmt_o == cv32e41p_fpu_pkg::FP64) illegal_insn_o = 1'b1; - if ((~C_XF16) && fpu_dst_fmt_o == cv32e41p_fpu_pkg::FP16) illegal_insn_o = 1'b1; - if ((~C_XF16ALT) && fpu_dst_fmt_o == cv32e41p_fpu_pkg::FP16ALT) begin - illegal_insn_o = 1'b1; - end - if ((~C_XF8) && fpu_dst_fmt_o == cv32e41p_fpu_pkg::FP8) illegal_insn_o = 1'b1; - - // check rounding mode - if (check_fprm) begin - unique case (instr_rdata_i[14:12]) inside - [3'b000:3'b100]: ; //legal rounding modes - 3'b101: begin // Alternative Half-Precsision encded as fmt=10 and rm=101 - if (~C_XF16ALT || fpu_dst_fmt_o != cv32e41p_fpu_pkg::FP16ALT) illegal_insn_o = 1'b1; - // actual rounding mode from frm csr - unique case (frm_i) inside - [3'b000:3'b100] : fp_rnd_mode_o = frm_i; //legal rounding modes - default : illegal_insn_o = 1'b1; - endcase - end - 3'b111: begin - // rounding mode from frm csr - unique case (frm_i) inside - [3'b000:3'b100] : fp_rnd_mode_o = frm_i; //legal rounding modes - default : illegal_insn_o = 1'b1; - endcase - end - default : illegal_insn_o = 1'b1; - endcase - end - - // Set latencies for FPnew from config. The C_LAT constants contain the number - // of pipeline registers. the APU takes the following values: - // 1 = single cycle (no latency), 2 = one pipestage, 3 = two or more pipestages - case (fp_op_group) - // ADDMUL has format dependent latency - ADDMUL : begin - unique case (fpu_dst_fmt_o) - cv32e41p_fpu_pkg::FP32 : apu_lat_o = (C_LAT_FP32<2) ? C_LAT_FP32+1 : 2'h3; - cv32e41p_fpu_pkg::FP64 : apu_lat_o = (C_LAT_FP64<2) ? C_LAT_FP64+1 : 2'h3; - cv32e41p_fpu_pkg::FP16 : apu_lat_o = (C_LAT_FP16<2) ? C_LAT_FP16+1 : 2'h3; - cv32e41p_fpu_pkg::FP16ALT : apu_lat_o = (C_LAT_FP16ALT<2) ? C_LAT_FP16ALT+1 : 2'h3; - cv32e41p_fpu_pkg::FP8 : apu_lat_o = (C_LAT_FP8<2) ? C_LAT_FP8+1 : 2'h3; - default : ; - endcase - end - // DIVSQRT is iterative and takes more than 2 cycles - DIVSQRT : apu_lat_o = 2'h3; - // NONCOMP uses the same latency for all formats - NONCOMP : apu_lat_o = (C_LAT_NONCOMP<2) ? C_LAT_NONCOMP+1 : 2'h3; - // CONV uses the same latency for all formats - CONV : apu_lat_o = (C_LAT_CONV<2) ? C_LAT_CONV+1 : 2'h3; - endcase - - // Set FPnew OP and OPMOD as the APU op - apu_op_o = {fpu_vec_op, fpu_op_mod, fpu_op}; - - end - // FPU!=1 - else - illegal_insn_o = 1'b1; - end - - // floating point fused arithmetic - OPCODE_OP_FMADD, - OPCODE_OP_FMSUB, - OPCODE_OP_FNMSUB, - OPCODE_OP_FNMADD : begin - if (FPU==1) begin - // using APU instead of ALU - apu_en = 1'b1; - alu_en = 1'b0; - apu_lat_o = (PIPE_REG_MAC>1) ? 2'h3 : 2'h2; - // all registers are FP registers and use three - rega_used_o = 1'b1; - regb_used_o = 1'b1; - regc_used_o = 1'b1; - regc_mux_o = REGC_S4; - reg_fp_a_o = 1'b1; - reg_fp_b_o = 1'b1; - reg_fp_c_o = 1'b1; - reg_fp_d_o = 1'b1; - fp_rnd_mode_o = instr_rdata_i[14:12]; - - // Decode Formats - unique case (instr_rdata_i[26:25]) - // FP32 - 2'b00 : fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP32; - // FP64 - 2'b01 : fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP64; - // FP16 or FP16ALT - 2'b10 : begin - // FP16alt encoded in rm field - if (instr_rdata_i[14:12]==3'b101) fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP16ALT; - else fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP16; - end - // FP8 - 2'b11 : fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP8; - endcase - - // By default, src=dst - fpu_src_fmt_o = fpu_dst_fmt_o; - - // decode FP intstruction - unique case (instr_rdata_i[6:0]) - // fmadd.fmt - FP Fused multiply-add - OPCODE_OP_FMADD : begin - fpu_op = cv32e41p_fpu_pkg::FMADD; - apu_op_o = 2'b00; - end - // fmsub.fmt - FP Fused multiply-subtract - OPCODE_OP_FMSUB : begin - fpu_op = cv32e41p_fpu_pkg::FMADD; - fpu_op_mod = 1'b1; - apu_op_o = 2'b01; - end - // fnmsub.fmt - FP Negated fused multiply-subtract - OPCODE_OP_FNMSUB : begin - fpu_op = cv32e41p_fpu_pkg::FNMSUB; - apu_op_o = 2'b10; - end - // fnmadd.fmt - FP Negated fused multiply-add - OPCODE_OP_FNMADD : begin - fpu_op = cv32e41p_fpu_pkg::FNMSUB; - fpu_op_mod = 1'b1; - apu_op_o = 2'b11; - end - endcase - - // check enabled formats (static) - if (~C_RVF && fpu_dst_fmt_o == cv32e41p_fpu_pkg::FP32) illegal_insn_o = 1'b1; - if ((~C_RVD) && fpu_dst_fmt_o == cv32e41p_fpu_pkg::FP64) illegal_insn_o = 1'b1; - if ((~C_XF16) && fpu_dst_fmt_o == cv32e41p_fpu_pkg::FP16) illegal_insn_o = 1'b1; - if ((~C_XF16ALT) && fpu_dst_fmt_o == cv32e41p_fpu_pkg::FP16ALT) begin - illegal_insn_o = 1'b1; - end - if ((~C_XF8) && fpu_dst_fmt_o == cv32e41p_fpu_pkg::FP8) illegal_insn_o = 1'b1; - - // check rounding mode - unique case (instr_rdata_i[14:12]) inside - [3'b000:3'b100]: ; //legal rounding modes - 3'b101: begin // Alternative Half-Precsision encded as fmt=10 and rm=101 - if (~C_XF16ALT || fpu_dst_fmt_o != cv32e41p_fpu_pkg::FP16ALT) illegal_insn_o = 1'b1; - // actual rounding mode from frm csr - unique case (frm_i) inside - [3'b000:3'b100] : fp_rnd_mode_o = frm_i; //legal rounding modes - default : illegal_insn_o = 1'b1; - endcase - end - 3'b111: begin - // rounding mode from frm csr - unique case (frm_i) inside - [3'b000:3'b100] : fp_rnd_mode_o = frm_i; //legal rounding modes - default : illegal_insn_o = 1'b1; - endcase - end - default : illegal_insn_o = 1'b1; - endcase - - // Set latencies for FPnew from config. The C_LAT constants contain the number - // of pipeline registers. the APU takes the following values: - // 1 = single cycle (no latency), 2 = one pipestage, 3 = two or more pipestages - // format dependent latency - unique case (fpu_dst_fmt_o) - cv32e41p_fpu_pkg::FP32 : apu_lat_o = (C_LAT_FP32<2) ? C_LAT_FP32+1 : 2'h3; - cv32e41p_fpu_pkg::FP64 : apu_lat_o = (C_LAT_FP64<2) ? C_LAT_FP64+1 : 2'h3; - cv32e41p_fpu_pkg::FP16 : apu_lat_o = (C_LAT_FP16<2) ? C_LAT_FP16+1 : 2'h3; - cv32e41p_fpu_pkg::FP16ALT : apu_lat_o = (C_LAT_FP16ALT<2) ? C_LAT_FP16ALT+1 : 2'h3; - cv32e41p_fpu_pkg::FP8 : apu_lat_o = (C_LAT_FP8<2) ? C_LAT_FP8+1 : 2'h3; - default : ; - endcase - - // Set FPnew OP and OPMOD as the APU op - apu_op_o = {fpu_vec_op, fpu_op_mod, fpu_op}; - end - // FPU!=1 - else begin - illegal_insn_o = 1'b1; - end - end - - OPCODE_STORE_FP: begin - if (FPU==1) begin - data_req = 1'b1; - data_we_o = 1'b1; - rega_used_o = 1'b1; - regb_used_o = 1'b1; - alu_operator_o = ALU_ADD; - reg_fp_b_o = 1'b1; - - // offset from immediate - imm_b_mux_sel_o = IMMB_S; - alu_op_b_mux_sel_o = OP_B_IMM; - - // pass write data through ALU operand c - alu_op_c_mux_sel_o = OP_C_REGB_OR_FWD; - - // Decode data type - unique case (instr_rdata_i[14:12]) - // fsb - FP8 store - 3'b000 : if (C_XF8) data_type_o = 2'b10; - else illegal_insn_o = 1'b1; - // fsh - FP16 store - 3'b001 : if (C_XF16 | C_XF16ALT) data_type_o = 2'b01; - else illegal_insn_o = 1'b1; - // fsw - FP32 store - 3'b010 : if (C_RVF) data_type_o = 2'b00; - else illegal_insn_o = 1'b1; - // fsd - FP64 store - 3'b011 : if (C_RVD) data_type_o = 2'b00; // 64bit stores unsupported! - else illegal_insn_o = 1'b1; - default: illegal_insn_o = 1'b1; - endcase - - // sanitize memory bus signals for illegal instr (not sure if needed??) - if (illegal_insn_o) begin - data_req = 1'b0; - data_we_o = 1'b0; - end - end - // FPU!=1 - else - illegal_insn_o = 1'b1; - end - - OPCODE_LOAD_FP: begin - if (FPU==1) begin - data_req = 1'b1; - regfile_mem_we = 1'b1; - reg_fp_d_o = 1'b1; - rega_used_o = 1'b1; - alu_operator_o = ALU_ADD; - - // offset from immediate - imm_b_mux_sel_o = IMMB_I; - alu_op_b_mux_sel_o = OP_B_IMM; - - // NaN boxing - data_sign_extension_o = 2'b10; - - // Decode data type - unique case (instr_rdata_i[14:12]) - // flb - FP8 load - 3'b000 : if (C_XF8) data_type_o = 2'b10; - else illegal_insn_o = 1'b1; - // flh - FP16 load - 3'b001 : if (C_XF16 | C_XF16ALT) data_type_o = 2'b01; - else illegal_insn_o = 1'b1; - // flw - FP32 load - 3'b010 : if (C_RVF) data_type_o = 2'b00; - else illegal_insn_o = 1'b1; - // fld - FP64 load - 3'b011 : if (C_RVD) data_type_o = 2'b00; // 64bit loads unsupported! - else illegal_insn_o = 1'b1; - default: illegal_insn_o = 1'b1; - endcase - end - // FPU!=1 - else - illegal_insn_o = 1'b1; - end - - OPCODE_PULP_OP: begin // PULP specific ALU instructions with three source operands - if (PULP_XPULP) begin - regfile_alu_we = 1'b1; - rega_used_o = 1'b1; - regb_used_o = 1'b1; - - case (instr_rdata_i[13:12]) - 2'b00: begin // multiply with subword selection - alu_en = 1'b0; - - mult_sel_subword_o = instr_rdata_i[30]; - mult_signed_mode_o = {2{instr_rdata_i[31]}}; - - mult_imm_mux_o = MIMM_S3; - regc_mux_o = REGC_ZERO; - mult_int_en = 1'b1; - - if (instr_rdata_i[14]) - mult_operator_o = MUL_IR; - else - mult_operator_o = MUL_I; - end - - 2'b01: begin // MAC with subword selection - alu_en = 1'b0; - - mult_sel_subword_o = instr_rdata_i[30]; - mult_signed_mode_o = {2{instr_rdata_i[31]}}; - - regc_used_o = 1'b1; - regc_mux_o = REGC_RD; - mult_imm_mux_o = MIMM_S3; - mult_int_en = 1'b1; - - if (instr_rdata_i[14]) - mult_operator_o = MUL_IR; - else - mult_operator_o = MUL_I; - end - - 2'b10: begin // add with normalization and rounding - // decide between using unsigned and rounding, and combinations - // thereof - case ({instr_rdata_i[31],instr_rdata_i[14]}) - 2'b00: alu_operator_o = ALU_ADD; - 2'b01: alu_operator_o = ALU_ADDR; - 2'b10: alu_operator_o = ALU_ADDU; - 2'b11: alu_operator_o = ALU_ADDUR; - endcase - - bmask_a_mux_o = BMASK_A_ZERO; - bmask_b_mux_o = BMASK_B_S3; - - if (instr_rdata_i[30]) begin - //register variant - regc_used_o = 1'b1; - regc_mux_o = REGC_RD; - alu_bmask_b_mux_sel_o = BMASK_B_REG; - alu_op_a_mux_sel_o = OP_A_REGC_OR_FWD; - alu_op_b_mux_sel_o = OP_B_REGA_OR_FWD; - end - - end - - 2'b11: begin // sub with normalization and rounding - // decide between using unsigned and rounding, and combinations - // thereof - case ({instr_rdata_i[31],instr_rdata_i[14]}) - 2'b00: alu_operator_o = ALU_SUB; - 2'b01: alu_operator_o = ALU_SUBR; - 2'b10: alu_operator_o = ALU_SUBU; - 2'b11: alu_operator_o = ALU_SUBUR; - endcase - - bmask_a_mux_o = BMASK_A_ZERO; - bmask_b_mux_o = BMASK_B_S3; - - if (instr_rdata_i[30]) begin - //register variant - regc_used_o = 1'b1; - regc_mux_o = REGC_RD; - alu_bmask_b_mux_sel_o = BMASK_B_REG; - alu_op_a_mux_sel_o = OP_A_REGC_OR_FWD; - alu_op_b_mux_sel_o = OP_B_REGA_OR_FWD; - end - - end - endcase - end else begin - illegal_insn_o = 1'b1; - end - end - - OPCODE_VECOP: begin - if (PULP_XPULP) begin - regfile_alu_we = 1'b1; - rega_used_o = 1'b1; - imm_b_mux_sel_o = IMMB_VS; - - // vector size - if (instr_rdata_i[12]) begin - alu_vec_mode_o = VEC_MODE8; - mult_operator_o = MUL_DOT8; - end else begin - alu_vec_mode_o = VEC_MODE16; - mult_operator_o = MUL_DOT16; - end - - // distinguish normal vector, sc and sci modes - if (instr_rdata_i[14]) begin - scalar_replication_o = 1'b1; - - if (instr_rdata_i[13]) begin - // immediate scalar replication, .sci - alu_op_b_mux_sel_o = OP_B_IMM; - end else begin - // register scalar replication, .sc - regb_used_o = 1'b1; - end - end else begin - // normal register use - regb_used_o = 1'b1; - end - - // now decode the instruction - unique case (instr_rdata_i[31:26]) - 6'b00000_0: begin alu_operator_o = ALU_ADD; imm_b_mux_sel_o = IMMB_VS; end // pv.add - 6'b00001_0: begin alu_operator_o = ALU_SUB; imm_b_mux_sel_o = IMMB_VS; end // pv.sub - 6'b00010_0: begin alu_operator_o = ALU_ADD; imm_b_mux_sel_o = IMMB_VS; bmask_b_mux_o = BMASK_B_ONE; end // pv.avg - 6'b00011_0: begin alu_operator_o = ALU_ADDU; imm_b_mux_sel_o = IMMB_VU; bmask_b_mux_o = BMASK_B_ONE; end // pv.avgu - 6'b00100_0: begin alu_operator_o = ALU_MIN; imm_b_mux_sel_o = IMMB_VS; end // pv.min - 6'b00101_0: begin alu_operator_o = ALU_MINU; imm_b_mux_sel_o = IMMB_VU; end // pv.minu - 6'b00110_0: begin alu_operator_o = ALU_MAX; imm_b_mux_sel_o = IMMB_VS; end // pv.max - 6'b00111_0: begin alu_operator_o = ALU_MAXU; imm_b_mux_sel_o = IMMB_VU; end // pv.maxu - 6'b01000_0: begin alu_operator_o = ALU_SRL; imm_b_mux_sel_o = IMMB_VS; end // pv.srl - 6'b01001_0: begin alu_operator_o = ALU_SRA; imm_b_mux_sel_o = IMMB_VS; end // pv.sra - 6'b01010_0: begin alu_operator_o = ALU_SLL; imm_b_mux_sel_o = IMMB_VS; end // pv.sll - 6'b01011_0: begin alu_operator_o = ALU_OR; imm_b_mux_sel_o = IMMB_VS; end // pv.or - 6'b01100_0: begin alu_operator_o = ALU_XOR; imm_b_mux_sel_o = IMMB_VS; end // pv.xor - 6'b01101_0: begin alu_operator_o = ALU_AND; imm_b_mux_sel_o = IMMB_VS; end // pv.and - 6'b01110_0: begin alu_operator_o = ALU_ABS; imm_b_mux_sel_o = IMMB_VS; end // pv.abs - - // shuffle/pack - 6'b11101_0, // pv.shuffleI1 - 6'b11110_0, // pv.shuffleI2 - 6'b11111_0, // pv.shuffleI3 - 6'b11000_0: begin // pv.shuffle, pv.shuffleI0 - alu_operator_o = ALU_SHUF; - imm_b_mux_sel_o = IMMB_SHUF; - regb_used_o = 1'b1; - scalar_replication_o = 1'b0; - end - 6'b11001_0: begin // pv.shuffle2 - alu_operator_o = ALU_SHUF2; - regb_used_o = 1'b1; - regc_used_o = 1'b1; - regc_mux_o = REGC_RD; - scalar_replication_o = 1'b0; - end - 6'b11010_0: begin // pv.pack - alu_operator_o = instr_rdata_i[25] ? ALU_PCKHI : ALU_PCKLO; - regb_used_o = 1'b1; - end - 6'b11011_0: begin // pv.packhi - alu_operator_o = ALU_PCKHI; - regb_used_o = 1'b1; - regc_used_o = 1'b1; - regc_mux_o = REGC_RD; - end - 6'b11100_0: begin // pv.packlo - alu_operator_o = ALU_PCKLO; - regb_used_o = 1'b1; - regc_used_o = 1'b1; - regc_mux_o = REGC_RD; - end - 6'b01111_0: begin // pv.extract - alu_operator_o = ALU_EXTS; - end - 6'b10010_0: begin // pv.extractu - alu_operator_o = ALU_EXT; - end - 6'b10110_0: begin // pv.insert - alu_operator_o = ALU_INS; - regc_used_o = 1'b1; - regc_mux_o = REGC_RD; - alu_op_b_mux_sel_o = OP_B_REGC_OR_FWD; - end - 6'b10000_0: begin // pv.dotup - alu_en = 1'b0; - mult_dot_en = 1'b1; - mult_dot_signed_o = 2'b00; - imm_b_mux_sel_o = IMMB_VU; - end - 6'b10001_0: begin // pv.dotusp - alu_en = 1'b0; - mult_dot_en = 1'b1; - mult_dot_signed_o = 2'b01; - end - 6'b10011_0: begin // pv.dotsp - alu_en = 1'b0; - mult_dot_en = 1'b1; - mult_dot_signed_o = 2'b11; - end - 6'b10100_0: begin // pv.sdotup - alu_en = 1'b0; - mult_dot_en = 1'b1; - mult_dot_signed_o = 2'b00; - regc_used_o = 1'b1; - regc_mux_o = REGC_RD; - imm_b_mux_sel_o = IMMB_VU; - end - 6'b10101_0: begin // pv.sdotusp - alu_en = 1'b0; - mult_dot_en = 1'b1; - mult_dot_signed_o = 2'b01; - regc_used_o = 1'b1; - regc_mux_o = REGC_RD; - end - 6'b10111_0: begin // pv.sdotsp - alu_en = 1'b0; - mult_dot_en = 1'b1; - mult_dot_signed_o = 2'b11; - regc_used_o = 1'b1; - regc_mux_o = REGC_RD; - end - - /* COMPLEX INSTRUCTIONS */ - - 6'b01010_1: begin // pc.clpxmul.{r,i}.{/,div2,div4,div8} - alu_en = 1'b0; - mult_dot_en = 1'b1; - mult_dot_signed_o = 2'b11; - is_clpx_o = 1'b1; - regc_used_o = 1'b1; - regc_mux_o = REGC_RD; - scalar_replication_o = 1'b0; - alu_op_b_mux_sel_o = OP_B_REGB_OR_FWD; - regb_used_o = 1'b1; - illegal_insn_o = instr_rdata_i[12]; - end - - 6'b01101_1: begin // pv.subrotmj.{/,div2,div4,div8} - alu_operator_o = ALU_SUB; - is_clpx_o = 1'b1; - scalar_replication_o = 1'b0; - alu_op_b_mux_sel_o = OP_B_REGB_OR_FWD; - regb_used_o = 1'b1; - is_subrot_o = 1'b1; - illegal_insn_o = instr_rdata_i[12]; - end - - 6'b01011_1: begin // pv.cplxconj - alu_operator_o = ALU_ABS; - is_clpx_o = 1'b1; - scalar_replication_o = 1'b0; - regb_used_o = 1'b0; - illegal_insn_o = instr_rdata_i[12] || (instr_rdata_i[24:20]!='0); - end - - 6'b01110_1: begin // pv.add.{div2,div4,div8} - alu_operator_o = ALU_ADD; - is_clpx_o = 1'b1; - scalar_replication_o = 1'b0; - alu_op_b_mux_sel_o = OP_B_REGB_OR_FWD; - regb_used_o = 1'b1; - illegal_insn_o = instr_rdata_i[12]; - end - - 6'b01100_1: begin // pv.sub.{div2,div4,div8} - alu_operator_o = ALU_SUB; - is_clpx_o = 1'b1; - scalar_replication_o = 1'b0; - alu_op_b_mux_sel_o = OP_B_REGB_OR_FWD; - regb_used_o = 1'b1; - illegal_insn_o = instr_rdata_i[12]; - end - - // comparisons, always have bit 26 set - 6'b00000_1: begin alu_operator_o = ALU_EQ; imm_b_mux_sel_o = IMMB_VS; end // pv.cmpeq - 6'b00001_1: begin alu_operator_o = ALU_NE; imm_b_mux_sel_o = IMMB_VS; end // pv.cmpne - 6'b00010_1: begin alu_operator_o = ALU_GTS; imm_b_mux_sel_o = IMMB_VS; end // pv.cmpgt - 6'b00011_1: begin alu_operator_o = ALU_GES; imm_b_mux_sel_o = IMMB_VS; end // pv.cmpge - 6'b00100_1: begin alu_operator_o = ALU_LTS; imm_b_mux_sel_o = IMMB_VS; end // pv.cmplt - 6'b00101_1: begin alu_operator_o = ALU_LES; imm_b_mux_sel_o = IMMB_VS; end // pv.cmple - 6'b00110_1: begin alu_operator_o = ALU_GTU; imm_b_mux_sel_o = IMMB_VU; end // pv.cmpgtu - 6'b00111_1: begin alu_operator_o = ALU_GEU; imm_b_mux_sel_o = IMMB_VU; end // pv.cmpgeu - 6'b01000_1: begin alu_operator_o = ALU_LTU; imm_b_mux_sel_o = IMMB_VU; end // pv.cmpltu - 6'b01001_1: begin alu_operator_o = ALU_LEU; imm_b_mux_sel_o = IMMB_VU; end // pv.cmpleu - - default: illegal_insn_o = 1'b1; - endcase - end else begin - illegal_insn_o = 1'b1; - end - end - - //////////////////////////////////////////////// - // ____ ____ _____ ____ ___ _ _ // - // / ___|| _ \| ____/ ___|_ _| / \ | | // - // \___ \| |_) | _|| | | | / _ \ | | // - // ___) | __/| |__| |___ | | / ___ \| |___ // - // |____/|_| |_____\____|___/_/ \_\_____| // - // // - //////////////////////////////////////////////// - - OPCODE_FENCE: begin - unique case (instr_rdata_i[14:12]) - 3'b000: begin // FENCE (FENCE.I instead, a bit more conservative) - // flush pipeline - fencei_insn_o = 1'b1; - end - - 3'b001: begin // FENCE.I - // flush prefetch buffer, flush pipeline - fencei_insn_o = 1'b1; - end - - default: begin - illegal_insn_o = 1'b1; - end - endcase - end - - OPCODE_SYSTEM: begin - if (instr_rdata_i[14:12] == 3'b000) - begin - // non CSR related SYSTEM instructions - if ( {instr_rdata_i[19:15], instr_rdata_i[11:7]} == '0) - begin - unique case (instr_rdata_i[31:20]) - 12'h000: // ECALL - begin - // environment (system) call - ecall_insn_o = 1'b1; - end - - 12'h001: // ebreak - begin - // debugger trap - ebrk_insn_o = 1'b1; - end - - 12'h302: // mret - begin - illegal_insn_o = (PULP_SECURE) ? current_priv_lvl_i != PRIV_LVL_M : 1'b0; - mret_insn_o = ~illegal_insn_o; - mret_dec_o = 1'b1; - end - - 12'h002: // uret - begin - illegal_insn_o = (PULP_SECURE) ? 1'b0 : 1'b1; - uret_insn_o = ~illegal_insn_o; - uret_dec_o = 1'b1; - end - - 12'h7b2: // dret - begin - illegal_insn_o = !debug_mode_i; - dret_insn_o = debug_mode_i; - dret_dec_o = 1'b1; - end - - 12'h105: // wfi - begin - wfi_o = 1'b1; - if (debug_wfi_no_sleep_i) begin - // Treat as NOP (do not cause sleep mode entry) - // Using decoding similar to ADDI, but without register reads/writes, i.e. - // keep regfile_alu_we = 0, rega_used_o = 0 - alu_op_b_mux_sel_o = OP_B_IMM; - imm_b_mux_sel_o = IMMB_I; - alu_operator_o = ALU_ADD; - end - end - - default: - begin - illegal_insn_o = 1'b1; - end - endcase - end else illegal_insn_o = 1'b1; - end - else - begin - // instruction to read/modify CSR - csr_access_o = 1'b1; - regfile_alu_we = 1'b1; - alu_op_b_mux_sel_o = OP_B_IMM; - imm_a_mux_sel_o = IMMA_Z; - imm_b_mux_sel_o = IMMB_I; // CSR address is encoded in I imm - - if (instr_rdata_i[14] == 1'b1) begin - // rs1 field is used as immediate - alu_op_a_mux_sel_o = OP_A_IMM; - end else begin - rega_used_o = 1'b1; - alu_op_a_mux_sel_o = OP_A_REGA_OR_FWD; - end - - // instr_rdata_i[19:14] = rs or immediate value - // if set or clear with rs==x0 or imm==0, - // then do not perform a write action - unique case (instr_rdata_i[13:12]) - 2'b01: csr_op = CSR_OP_WRITE; - 2'b10: csr_op = instr_rdata_i[19:15] == 5'b0 ? CSR_OP_READ : CSR_OP_SET; - 2'b11: csr_op = instr_rdata_i[19:15] == 5'b0 ? CSR_OP_READ : CSR_OP_CLEAR; - default: csr_illegal = 1'b1; - endcase - - if (instr_rdata_i[29:28] > current_priv_lvl_i) begin - // No access to higher privilege CSR - csr_illegal = 1'b1; - end - - // Determine if CSR access is illegal - case (instr_rdata_i[31:20]) - // Floating point - CSR_FFLAGS, - CSR_FRM, - CSR_FCSR : - if(!FPU) csr_illegal = 1'b1; - - // Writes to read only CSRs results in illegal instruction - CSR_MVENDORID, - CSR_MARCHID, - CSR_MIMPID, - CSR_MHARTID : - if(csr_op != CSR_OP_READ) csr_illegal = 1'b1; - - // These are valid CSR registers - CSR_MSTATUS, - CSR_MEPC, - CSR_MTVEC, - CSR_MCAUSE : - // Not illegal, but treat as status CSR for side effect handling - csr_status_o = 1'b1; - - // These are valid CSR registers - CSR_MISA, - CSR_MIE, - CSR_MSCRATCH, - CSR_MTVAL, - CSR_MIP : - ; // do nothing, not illegal - - // Hardware Performance Monitor - CSR_MCYCLE, - CSR_MINSTRET, - CSR_MHPMCOUNTER3, - CSR_MHPMCOUNTER4, CSR_MHPMCOUNTER5, CSR_MHPMCOUNTER6, CSR_MHPMCOUNTER7, - CSR_MHPMCOUNTER8, CSR_MHPMCOUNTER9, CSR_MHPMCOUNTER10, CSR_MHPMCOUNTER11, - CSR_MHPMCOUNTER12, CSR_MHPMCOUNTER13, CSR_MHPMCOUNTER14, CSR_MHPMCOUNTER15, - CSR_MHPMCOUNTER16, CSR_MHPMCOUNTER17, CSR_MHPMCOUNTER18, CSR_MHPMCOUNTER19, - CSR_MHPMCOUNTER20, CSR_MHPMCOUNTER21, CSR_MHPMCOUNTER22, CSR_MHPMCOUNTER23, - CSR_MHPMCOUNTER24, CSR_MHPMCOUNTER25, CSR_MHPMCOUNTER26, CSR_MHPMCOUNTER27, - CSR_MHPMCOUNTER28, CSR_MHPMCOUNTER29, CSR_MHPMCOUNTER30, CSR_MHPMCOUNTER31, - CSR_MCYCLEH, - CSR_MINSTRETH, - CSR_MHPMCOUNTER3H, - CSR_MHPMCOUNTER4H, CSR_MHPMCOUNTER5H, CSR_MHPMCOUNTER6H, CSR_MHPMCOUNTER7H, - CSR_MHPMCOUNTER8H, CSR_MHPMCOUNTER9H, CSR_MHPMCOUNTER10H, CSR_MHPMCOUNTER11H, - CSR_MHPMCOUNTER12H, CSR_MHPMCOUNTER13H, CSR_MHPMCOUNTER14H, CSR_MHPMCOUNTER15H, - CSR_MHPMCOUNTER16H, CSR_MHPMCOUNTER17H, CSR_MHPMCOUNTER18H, CSR_MHPMCOUNTER19H, - CSR_MHPMCOUNTER20H, CSR_MHPMCOUNTER21H, CSR_MHPMCOUNTER22H, CSR_MHPMCOUNTER23H, - CSR_MHPMCOUNTER24H, CSR_MHPMCOUNTER25H, CSR_MHPMCOUNTER26H, CSR_MHPMCOUNTER27H, - CSR_MHPMCOUNTER28H, CSR_MHPMCOUNTER29H, CSR_MHPMCOUNTER30H, CSR_MHPMCOUNTER31H, - CSR_MCOUNTINHIBIT, - CSR_MHPMEVENT3, - CSR_MHPMEVENT4, CSR_MHPMEVENT5, CSR_MHPMEVENT6, CSR_MHPMEVENT7, - CSR_MHPMEVENT8, CSR_MHPMEVENT9, CSR_MHPMEVENT10, CSR_MHPMEVENT11, - CSR_MHPMEVENT12, CSR_MHPMEVENT13, CSR_MHPMEVENT14, CSR_MHPMEVENT15, - CSR_MHPMEVENT16, CSR_MHPMEVENT17, CSR_MHPMEVENT18, CSR_MHPMEVENT19, - CSR_MHPMEVENT20, CSR_MHPMEVENT21, CSR_MHPMEVENT22, CSR_MHPMEVENT23, - CSR_MHPMEVENT24, CSR_MHPMEVENT25, CSR_MHPMEVENT26, CSR_MHPMEVENT27, - CSR_MHPMEVENT28, CSR_MHPMEVENT29, CSR_MHPMEVENT30, CSR_MHPMEVENT31 : - // Not illegal, but treat as status CSR to get accurate counts - csr_status_o = 1'b1; - - // Hardware Performance Monitor (unprivileged read-only mirror CSRs) - CSR_CYCLE, - CSR_INSTRET, - CSR_HPMCOUNTER3, - CSR_HPMCOUNTER4, CSR_HPMCOUNTER5, CSR_HPMCOUNTER6, CSR_HPMCOUNTER7, - CSR_HPMCOUNTER8, CSR_HPMCOUNTER9, CSR_HPMCOUNTER10, CSR_HPMCOUNTER11, - CSR_HPMCOUNTER12, CSR_HPMCOUNTER13, CSR_HPMCOUNTER14, CSR_HPMCOUNTER15, - CSR_HPMCOUNTER16, CSR_HPMCOUNTER17, CSR_HPMCOUNTER18, CSR_HPMCOUNTER19, - CSR_HPMCOUNTER20, CSR_HPMCOUNTER21, CSR_HPMCOUNTER22, CSR_HPMCOUNTER23, - CSR_HPMCOUNTER24, CSR_HPMCOUNTER25, CSR_HPMCOUNTER26, CSR_HPMCOUNTER27, - CSR_HPMCOUNTER28, CSR_HPMCOUNTER29, CSR_HPMCOUNTER30, CSR_HPMCOUNTER31, - CSR_CYCLEH, - CSR_INSTRETH, - CSR_HPMCOUNTER3H, - CSR_HPMCOUNTER4H, CSR_HPMCOUNTER5H, CSR_HPMCOUNTER6H, CSR_HPMCOUNTER7H, - CSR_HPMCOUNTER8H, CSR_HPMCOUNTER9H, CSR_HPMCOUNTER10H, CSR_HPMCOUNTER11H, - CSR_HPMCOUNTER12H, CSR_HPMCOUNTER13H, CSR_HPMCOUNTER14H, CSR_HPMCOUNTER15H, - CSR_HPMCOUNTER16H, CSR_HPMCOUNTER17H, CSR_HPMCOUNTER18H, CSR_HPMCOUNTER19H, - CSR_HPMCOUNTER20H, CSR_HPMCOUNTER21H, CSR_HPMCOUNTER22H, CSR_HPMCOUNTER23H, - CSR_HPMCOUNTER24H, CSR_HPMCOUNTER25H, CSR_HPMCOUNTER26H, CSR_HPMCOUNTER27H, - CSR_HPMCOUNTER28H, CSR_HPMCOUNTER29H, CSR_HPMCOUNTER30H, CSR_HPMCOUNTER31H : - // Read-only and readable from user mode only if the bit of mcounteren is set - if((csr_op != CSR_OP_READ) || (PULP_SECURE && (current_priv_lvl_i != PRIV_LVL_M) && !mcounteren_i[instr_rdata_i[24:20]])) begin - csr_illegal = 1'b1; - end else begin - csr_status_o = 1'b1; - end - - // This register only exists in user mode - CSR_MCOUNTEREN : - if(!PULP_SECURE) begin - csr_illegal = 1'b1; - end else begin - csr_status_o = 1'b1; - end - - // Debug register access - CSR_DCSR, - CSR_DPC, - CSR_DSCRATCH0, - CSR_DSCRATCH1 : - if(!debug_mode_i) begin - csr_illegal = 1'b1; - end else begin - csr_status_o = 1'b1; - end - - // Debug Trigger register access - CSR_TSELECT, - CSR_TDATA1, - CSR_TDATA2, - CSR_TDATA3, - CSR_TINFO, - CSR_MCONTEXT, - CSR_SCONTEXT : - if(DEBUG_TRIGGER_EN != 1) - csr_illegal = 1'b1; - - // Hardware Loop register, UHARTID access - CSR_LPSTART0, - CSR_LPEND0, - CSR_LPCOUNT0, - CSR_LPSTART1, - CSR_LPEND1, - CSR_LPCOUNT1, - CSR_UHARTID : - if(!PULP_XPULP) csr_illegal = 1'b1; - - // PRIVLV access - CSR_PRIVLV : - if(!PULP_XPULP) begin - csr_illegal = 1'b1; - end else begin - csr_status_o = 1'b1; - end - - // PMP register access - CSR_PMPCFG0, - CSR_PMPCFG1, - CSR_PMPCFG2, - CSR_PMPCFG3, - CSR_PMPADDR0, - CSR_PMPADDR1, - CSR_PMPADDR2, - CSR_PMPADDR3, - CSR_PMPADDR4, - CSR_PMPADDR5, - CSR_PMPADDR6, - CSR_PMPADDR7, - CSR_PMPADDR8, - CSR_PMPADDR9, - CSR_PMPADDR10, - CSR_PMPADDR11, - CSR_PMPADDR12, - CSR_PMPADDR13, - CSR_PMPADDR14, - CSR_PMPADDR15 : - if(!USE_PMP) csr_illegal = 1'b1; - - // User register access - CSR_USTATUS, - CSR_UEPC, - CSR_UTVEC, - CSR_UCAUSE : - if (!PULP_SECURE) begin - csr_illegal = 1'b1; - end else begin - csr_status_o = 1'b1; - end - - default : csr_illegal = 1'b1; - - endcase // case (instr_rdata_i[31:20]) - - illegal_insn_o = csr_illegal; - - end - - end - - - /////////////////////////////////////////////// - // _ ___ ___ ___ ___ ____ // - // | | | \ \ / / | / _ \ / _ \| _ \ // - // | |_| |\ \ /\ / /| | | | | | | | | |_) | // - // | _ | \ V V / | |__| |_| | |_| | __/ // - // |_| |_| \_/\_/ |_____\___/ \___/|_| // - // // - /////////////////////////////////////////////// - - OPCODE_HWLOOP: begin - if(PULP_XPULP) begin : HWLOOP_FEATURE_ENABLED - hwlp_target_mux_sel_o = 1'b0; - - unique case (instr_rdata_i[14:12]) - 3'b000: begin - // lp.starti: set start address to PC + I-type immediate - hwlp_we[0] = 1'b1; - hwlp_start_mux_sel_o = 1'b0; - end - - 3'b001: begin - // lp.endi: set end address to PC + I-type immediate - hwlp_we[1] = 1'b1; - end - - 3'b010: begin - // lp.count: initialize counter from rs1 - hwlp_we[2] = 1'b1; - hwlp_cnt_mux_sel_o = 1'b1; - rega_used_o = 1'b1; - end - - 3'b011: begin - // lp.counti: initialize counter from I-type immediate - hwlp_we[2] = 1'b1; - hwlp_cnt_mux_sel_o = 1'b0; - end - - 3'b100: begin - // lp.setup: initialize counter from rs1, set start address to - // next instruction and end address to PC + I-type immediate - hwlp_we = 3'b111; - hwlp_start_mux_sel_o = 1'b1; - hwlp_cnt_mux_sel_o = 1'b1; - rega_used_o = 1'b1; - end - - 3'b101: begin - // lp.setupi: initialize counter from immediate, set start address to - // next instruction and end address to PC + I-type immediate - hwlp_we = 3'b111; - hwlp_target_mux_sel_o = 1'b1; - hwlp_start_mux_sel_o = 1'b1; - hwlp_cnt_mux_sel_o = 1'b0; - end - - default: begin - illegal_insn_o = 1'b1; - end - endcase // case (instr_rdata_i[14:12]) - - end else begin // block: HWLOOP_FEATURE_ENABLED - illegal_insn_o = 1'b1; - end - end // case: OPCODE_HWLOOP - - default: begin - illegal_insn_o = 1'b1; - end - endcase - - - // make sure invalid compressed instruction causes an exception - if (illegal_c_insn_i) begin - illegal_insn_o = 1'b1; - end - - end - - // deassert we signals (in case of stalls) - assign alu_en_o = (deassert_we_i) ? 1'b0 : alu_en; - assign apu_en_o = (deassert_we_i) ? 1'b0 : apu_en; - assign mult_int_en_o = (deassert_we_i) ? 1'b0 : mult_int_en; - assign mult_dot_en_o = (deassert_we_i) ? 1'b0 : mult_dot_en; - assign regfile_mem_we_o = (deassert_we_i) ? 1'b0 : regfile_mem_we; - assign regfile_alu_we_o = (deassert_we_i) ? 1'b0 : regfile_alu_we; - assign data_req_o = (deassert_we_i) ? 1'b0 : data_req; - assign hwlp_we_o = (deassert_we_i) ? 3'b0 : hwlp_we; - assign csr_op_o = (deassert_we_i) ? CSR_OP_READ : csr_op; - assign ctrl_transfer_insn_in_id_o = (deassert_we_i) ? BRANCH_NONE : ctrl_transfer_insn; - - assign ctrl_transfer_insn_in_dec_o = ctrl_transfer_insn; - assign regfile_alu_we_dec_o = regfile_alu_we; - -endmodule // cv32e41p_decoder diff --git a/rtl/cv32e41p_ex_stage.sv b/rtl/cv32e41p_ex_stage.sv index 8bb380f..d98b984 100644 --- a/rtl/cv32e41p_ex_stage.sv +++ b/rtl/cv32e41p_ex_stage.sv @@ -149,7 +149,7 @@ module cv32e41p_ex_stage output logic ex_ready_o, // EX stage ready for new data output logic ex_valid_o, // EX stage gets new data - input logic wb_ready_i // WB stage ready for new data + input logic wb_ready_i // WB stage ready for new data ); logic [31:0] alu_result; diff --git a/rtl/cv32e41p_hwloop_regs.sv b/rtl/cv32e41p_hwloop_regs.sv index 3229d9b..4f82ad1 100644 --- a/rtl/cv32e41p_hwloop_regs.sv +++ b/rtl/cv32e41p_hwloop_regs.sv @@ -34,7 +34,7 @@ module cv32e41p_hwloop_regs #( input logic [ 31:0] hwlp_end_data_i, input logic [ 31:0] hwlp_cnt_data_i, input logic [ 2:0] hwlp_we_i, - input logic [N_REG_BITS-1:0] hwlp_regid_i, // selects the register set + input logic [N_REG_BITS-1:0] hwlp_regid_i, // selects the register set // from controller input logic valid_i, diff --git a/rtl/cv32e41p_id_stage.sv b/rtl/cv32e41p_id_stage.sv index c3d9692..107f387 100644 --- a/rtl/cv32e41p_id_stage.sv +++ b/rtl/cv32e41p_id_stage.sv @@ -35,6 +35,10 @@ module cv32e41p_id_stage parameter PULP_CLUSTER = 0, parameter N_HWLP = 2, parameter N_HWLP_BITS = $clog2(N_HWLP), + parameter Zcea = 0, + parameter Zceb = 0, + parameter Zcec = 0, + parameter Zcee = 0, parameter PULP_SECURE = 0, parameter USE_PMP = 0, parameter A_EXTENSION = 0, @@ -61,8 +65,6 @@ module cv32e41p_id_stage input logic instr_valid_i, input logic [31:0] instr_rdata_i, // comes from pipeline of IF stage output logic instr_req_o, - input logic is_compressed_i, - input logic illegal_c_insn_i, // Jumps and branches output logic branch_in_ex_o, @@ -197,8 +199,8 @@ module cv32e41p_id_stage // Interrupt signals input logic [31:0] irq_i, input logic irq_sec_i, - input logic [31:0] mie_bypass_i, // MIE CSR (bypass) - output logic [31:0] mip_o, // MIP CSR + input logic [31:0] mie_bypass_i, // MIE CSR (bypass) + output logic [31:0] mip_o, // MIP CSR input logic m_irq_enable_i, input logic u_irq_enable_i, output logic irq_ack_o, @@ -251,21 +253,12 @@ module cv32e41p_id_stage input logic [31:0] mcounteren_i ); - // Source/Destination register instruction index - localparam REG_S1_MSB = 19; - localparam REG_S1_LSB = 15; - localparam REG_S2_MSB = 24; - localparam REG_S2_LSB = 20; - - localparam REG_S4_MSB = 31; - localparam REG_S4_LSB = 27; - - localparam REG_D_MSB = 11; - localparam REG_D_LSB = 7; logic [31:0] instr; + logic is_compressed; + logic illegal_c_insn; // Decoder/Controller ID stage internal signals logic deassert_we; @@ -318,6 +311,20 @@ module cv32e41p_id_stage logic [31:0] imm_shuffle_type; logic [31:0] imm_clip_type; + logic [31:0] imm_cjal_type; + logic [31:0] imm_cspn_type; + logic [31:0] imm_cfldsp_type; + logic [31:0] imm_caddi_type; + logic [31:0] imm_clwsp_type; + logic [31:0] imm_cld_type; + logic [31:0] imm_cswsp_type; + logic [31:0] imm_fsdp_type; + logic [31:0] imm_clw_type; + logic [31:0] imm_csrli_type; + logic [31:0] imm_candi_type; + logic [31:0] imm_cbeq_type; + logic [31:0] imm_clui_type; + logic [31:0] imm_a; // contains the immediate for operand b logic [31:0] imm_b; // contains the immediate for operand b @@ -334,6 +341,11 @@ module cv32e41p_id_stage logic [ 5:0] regfile_addr_rb_id; logic [ 5:0] regfile_addr_rc_id; + logic [ 4:0] addr_ra_id; + logic [ 4:0] addr_rb_id; + logic [ 4:0] addr_rc_id; + logic [ 4:0] decoder_waddr_id; + logic regfile_fp_a; logic regfile_fp_b; logic regfile_fp_c; @@ -358,8 +370,8 @@ module cv32e41p_id_stage logic [1:0] regc_mux; logic [0:0] imm_a_mux_sel; - logic [3:0] imm_b_mux_sel; - logic [1:0] ctrl_transfer_target_mux_sel; + logic [4:0] imm_b_mux_sel; + logic [2:0] ctrl_transfer_target_mux_sel; // Multiplier Control mul_opcode_e mult_operator; // multiplication operation selection @@ -487,21 +499,51 @@ module cv32e41p_id_stage // immediate extraction and sign extension assign imm_i_type = {{20{instr[31]}}, instr[31:20]}; - assign imm_iz_type = {20'b0, instr[31:20]}; assign imm_s_type = {{20{instr[31]}}, instr[31:25], instr[11:7]}; assign imm_sb_type = {{19{instr[31]}}, instr[31], instr[7], instr[30:25], instr[11:8], 1'b0}; assign imm_u_type = {instr[31:12], 12'b0}; assign imm_uj_type = {{12{instr[31]}}, instr[19:12], instr[20], instr[30:21], 1'b0}; - // immediate for CSR manipulatin (zero extended) + assign imm_cjal_type = { + {20{instr[12]}}, + instr[12:12], + instr[8:8], + instr[10:9], + instr[6:6], + instr[7:7], + instr[2:2], + instr[11:11], + instr[5:3], + 1'b0 + }; + assign imm_cspn_type = {22'b0, instr[10:7], instr[12:11], instr[5], instr[6], 2'b0}; + assign imm_cfldsp_type = {22'b0, instr[4:2], instr[12], instr[6:5], 3'b0}; + assign imm_caddi_type = { + {22{instr[12]}}, instr[12:12], instr[4:3], instr[5:5], instr[2:2], instr[6:6], 4'b0 + }; + assign imm_clwsp_type = {24'b0, instr[3:2], instr[12:12], instr[6:4], 2'b0}; + assign imm_cld_type = {24'b0, instr[6:5], instr[12:10], 3'b0}; + assign imm_cswsp_type = {24'b0, instr[8:7], instr[12:9], 2'b0}; + assign imm_fsdp_type = {24'b0, instr[9:7], instr[12:10], 2'b0}; + assign imm_clw_type = {25'b0, instr[5], instr[12:10], instr[6], 2'b0}; + assign imm_csrli_type = {26'b0, instr[12:12], instr[6:2]}; + assign imm_candi_type = {{26{instr[12]}}, instr[12:12], instr[6:2]}; + assign imm_cbeq_type = { + {23{instr[12]}}, instr[12:12], instr[6:5], instr[2:2], instr[11:10], instr[4:3], 1'b0 + }; + assign imm_clui_type = {{14{instr[12]}}, instr[12:12], instr[6:2], 12'b0}; + + assign imm_iz_type = {20'b0, instr[31:20]}; + + // immediate for CSR manipulatin (zero extended) Used only in 32 bit instructions ! assign imm_z_type = {27'b0, instr[REG_S1_MSB:REG_S1_LSB]}; + // PULP Immediate formats assign imm_s2_type = {27'b0, instr[24:20]}; assign imm_bi_type = {{27{instr[24]}}, instr[24:20]}; assign imm_s3_type = {27'b0, instr[29:25]}; assign imm_vs_type = {{26{instr[24]}}, instr[24:20], instr[25]}; assign imm_vu_type = {26'b0, instr[24:20], instr[25]}; - // same format as rS2 for shuffle needs, expands immediate assign imm_shuffleb_type = { 6'b0, instr[28:27], 6'b0, instr[24:23], 6'b0, instr[22:21], 6'b0, instr[20], instr[25] @@ -522,8 +564,8 @@ module cv32e41p_id_stage //--------------------------------------------------------------------------- // source register selection regfile_fp_x=1 <=> CV32E40P_REG_x is a FP-register //--------------------------------------------------------------------------- - assign regfile_addr_ra_id = {fregfile_ena & regfile_fp_a, instr[REG_S1_MSB:REG_S1_LSB]}; - assign regfile_addr_rb_id = {fregfile_ena & regfile_fp_b, instr[REG_S2_MSB:REG_S2_LSB]}; + assign regfile_addr_ra_id = {fregfile_ena & regfile_fp_a, addr_ra_id}; + assign regfile_addr_rb_id = {fregfile_ena & regfile_fp_b, addr_rb_id}; // register C mux always_comb begin @@ -538,7 +580,7 @@ module cv32e41p_id_stage //--------------------------------------------------------------------------- // destination registers regfile_fp_d=1 <=> REG_D is a FP-register //--------------------------------------------------------------------------- - assign regfile_waddr_id = {fregfile_ena & regfile_fp_d, instr[REG_D_MSB:REG_D_LSB]}; + assign regfile_waddr_id = {fregfile_ena & regfile_fp_d, decoder_waddr_id}; // Second Register Write Address Selection // Used for prepost load/store and multiplier @@ -577,12 +619,15 @@ module cv32e41p_id_stage always_comb begin : jump_target_mux unique case (ctrl_transfer_target_mux_sel) - JT_JAL: jump_target = pc_id_i + imm_uj_type; - JT_COND: jump_target = pc_id_i + imm_sb_type; + JT_JAL: jump_target = pc_id_i + imm_uj_type; + JT_COND: jump_target = pc_id_i + imm_sb_type; + JT_CJAL: jump_target = pc_id_i + imm_cjal_type; + JT_CCOND: jump_target = pc_id_i + imm_cbeq_type; // JALR: Cannot forward RS1, since the path is too long - JT_JALR: jump_target = regfile_data_ra_id + imm_i_type; - default: jump_target = regfile_data_ra_id + imm_i_type; + JT_JALR: jump_target = regfile_data_ra_id + imm_i_type; + JT_CJALR: jump_target = regfile_data_ra_id; + default: jump_target = regfile_data_ra_id + imm_i_type; endcase end @@ -644,7 +689,7 @@ module cv32e41p_id_stage IMMB_I: imm_b = imm_i_type; IMMB_S: imm_b = imm_s_type; IMMB_U: imm_b = imm_u_type; - IMMB_PCINCR: imm_b = is_compressed_i ? 32'h2 : 32'h4; + IMMB_PCINCR: imm_b = is_compressed ? 32'h2 : 32'h4; IMMB_S2: imm_b = imm_s2_type; IMMB_BI: imm_b = imm_bi_type; IMMB_S3: imm_b = imm_s3_type; @@ -652,6 +697,18 @@ module cv32e41p_id_stage IMMB_VU: imm_b = imm_vu_type; IMMB_SHUF: imm_b = imm_shuffle_type; IMMB_CLIP: imm_b = {1'b0, imm_clip_type[31:1]}; + IMMB_CJAL: imm_b = imm_cjal_type; + IMMB_CSPN: imm_b = imm_cspn_type; + IMMB_CFLDSP: imm_b = imm_cfldsp_type; + IMMB_CADDI: imm_b = imm_caddi_type; + IMMB_CLWSP: imm_b = imm_clwsp_type; + IMMB_CLD: imm_b = imm_cld_type; + IMMB_CSWSP: imm_b = imm_cswsp_type; + IMMB_FSDP: imm_b = imm_fsdp_type; + IMMB_CLW: imm_b = imm_clw_type; + IMMB_CSRLI: imm_b = imm_csrli_type; + IMMB_CANDI: imm_b = imm_candi_type; + IMMB_CLUI: imm_b = imm_clui_type; default: imm_b = imm_i_type; endcase end @@ -947,8 +1004,12 @@ module cv32e41p_id_stage // // /////////////////////////////////////////////// - cv32e41p_decoder #( + cv32e41p_merged_decoder #( .PULP_XPULP (PULP_XPULP), + .Zcea (Zcea), + .Zceb (Zceb), + .Zcec (Zcec), + .Zcee (Zcee), .PULP_CLUSTER (PULP_CLUSTER), .A_EXTENSION (A_EXTENSION), .FPU (FPU), @@ -991,8 +1052,11 @@ module cv32e41p_id_stage .alu_bmask_b_mux_sel_o(alu_bmask_b_mux_sel), // from IF/ID pipeline - .instr_rdata_i (instr), - .illegal_c_insn_i(illegal_c_insn_i), + .instr_rdata_i(instr), + + // Compressed decoder output + .is_compressed_o (is_compressed), + .illegal_c_insn_o(illegal_c_insn), // ALU signals .alu_en_o (alu_en), @@ -1067,6 +1131,10 @@ module cv32e41p_id_stage .ctrl_transfer_insn_in_id_o (ctrl_transfer_insn_in_id), .ctrl_transfer_target_mux_sel_o(ctrl_transfer_target_mux_sel), + + .addr_ra_id_o(addr_ra_id), + .addr_rb_id_o(addr_rb_id), + .waddr_id_o (decoder_waddr_id), // HPM related control signals .mcounteren_i(mcounteren_i) @@ -1085,7 +1153,7 @@ module cv32e41p_id_stage .PULP_CLUSTER(PULP_CLUSTER), .PULP_XPULP (PULP_XPULP) ) controller_i ( - .clk (clk), // Gated clock + .clk (clk), // Gated clock .clk_ungated_i(clk_ungated_i), // Ungated clock .rst_n (rst_n), @@ -1131,7 +1199,7 @@ module cv32e41p_id_stage // HWLoop signls .pc_id_i (pc_id_i), - .is_compressed_i(is_compressed_i), + .is_compressed_i(is_compressed), .hwlp_start_addr_i(hwlp_start_o), .hwlp_end_addr_i (hwlp_end_o), @@ -1635,7 +1703,7 @@ module cv32e41p_id_stage mhpmevent_store_o <= minstret && data_req_id && data_we_id; mhpmevent_jump_o <= minstret && ((ctrl_transfer_insn_in_id == BRANCH_JAL) || (ctrl_transfer_insn_in_id == BRANCH_JALR)); mhpmevent_branch_o <= minstret && (ctrl_transfer_insn_in_id == BRANCH_COND); - mhpmevent_compressed_o <= minstret && is_compressed_i; + mhpmevent_compressed_o <= minstret && is_compressed; // EX stage count mhpmevent_branch_taken_o <= mhpmevent_branch_o && branch_decision_i; // IF stage count @@ -1679,7 +1747,7 @@ module cv32e41p_id_stage // the instruction delivered to the ID stage should always be valid a_valid_instr : - assert property (@(posedge clk) (instr_valid_i & (~illegal_c_insn_i)) |-> (!$isunknown(instr))) + assert property (@(posedge clk) (instr_valid_i & (~illegal_c_insn)) |-> (!$isunknown(instr))) else $warning("%t, Instruction is valid, but has at least one X", $time); // Check that instruction after taken branch is flushed (more should actually be flushed, but that is not checked here) diff --git a/rtl/cv32e41p_if_stage.sv b/rtl/cv32e41p_if_stage.sv index 4a5a66b..c9c14f4 100644 --- a/rtl/cv32e41p_if_stage.sv +++ b/rtl/cv32e41p_if_stage.sv @@ -29,7 +29,11 @@ module cv32e41p_if_stage #( parameter PULP_XPULP = 0, // PULP ISA Extension (including PULP specific CSRs and hardware loop, excluding p.elw) parameter PULP_OBI = 0, // Legacy PULP OBI behavior parameter PULP_SECURE = 0, - parameter FPU = 0 + parameter FPU = 0, + parameter Zcea = 0, + parameter Zceb = 0, + parameter Zcec = 0, + parameter Zcee = 0 ) ( input logic clk, input logic rst_n, @@ -60,8 +64,6 @@ module cv32e41p_if_stage #( // Output of IF Pipeline stage output logic instr_valid_id_o, // instruction in IF/ID pipeline is valid output logic [31:0] instr_rdata_id_o, // read instruction is sampled and sent to ID stage for decoding - output logic is_compressed_id_o, // compressed decoder thinks this is a compressed instruction - output logic illegal_c_insn_id_o, // compressed decoder thinks this is an invalid instruction output logic [31:0] pc_if_o, output logic [31:0] pc_id_o, output logic is_fetch_failed_o, @@ -79,7 +81,7 @@ module cv32e41p_if_stage #( input logic [4:0] m_exc_vec_pc_mux_i, // selects ISR address for vectorized interrupt lines input logic [4:0] u_exc_vec_pc_mux_i, // selects ISR address for vectorized interrupt lines - output logic csr_mtvec_init_o, // tell CS regfile to init mtvec + output logic csr_mtvec_init_o, // tell CS regfile to init mtvec // jump and branch target and decision input logic [31:0] jump_target_id_i, // jump target address @@ -94,7 +96,7 @@ module cv32e41p_if_stage #( input logic id_ready_i, // misc signals - output logic if_busy_o, // is the IF stage busy fetching instructions? + output logic if_busy_o, // is the IF stage busy fetching instructions? output logic perf_imiss_o // Instruction Fetch Miss ); @@ -120,7 +122,6 @@ module cv32e41p_if_stage #( logic aligner_ready; logic instr_valid; - logic illegal_c_insn; logic [31:0] instr_aligned; logic [31:0] instr_decompressed; logic instr_compressed_int; @@ -199,7 +200,7 @@ module cv32e41p_if_stage #( .instr_addr_o (instr_addr_o), .instr_gnt_i (instr_gnt_i), .instr_rvalid_i (instr_rvalid_i), - .instr_err_i (instr_err_i), // Not supported (yet) + .instr_err_i (instr_err_i), // Not supported (yet) .instr_err_pmp_i(instr_err_pmp_i), // Not supported (yet) .instr_rdata_i (instr_rdata_i), @@ -228,21 +229,17 @@ module cv32e41p_if_stage #( // IF-ID pipeline registers, frozen when the ID stage is stalled always_ff @(posedge clk, negedge rst_n) begin : IF_ID_PIPE_REGISTERS if (rst_n == 1'b0) begin - instr_valid_id_o <= 1'b0; - instr_rdata_id_o <= '0; - is_fetch_failed_o <= 1'b0; - pc_id_o <= '0; - is_compressed_id_o <= 1'b0; - illegal_c_insn_id_o <= 1'b0; + instr_valid_id_o <= 1'b0; + instr_rdata_id_o <= '0; + is_fetch_failed_o <= 1'b0; + pc_id_o <= '0; end else begin if (if_valid && instr_valid) begin - instr_valid_id_o <= 1'b1; - instr_rdata_id_o <= instr_decompressed; - is_compressed_id_o <= instr_compressed_int; - illegal_c_insn_id_o <= illegal_c_insn; - is_fetch_failed_o <= 1'b0; - pc_id_o <= pc_if_o; + instr_valid_id_o <= 1'b1; + instr_rdata_id_o <= instr_aligned; + is_fetch_failed_o <= 1'b0; + pc_id_o <= pc_if_o; end else if (clear_instr_valid_i) begin instr_valid_id_o <= 1'b0; is_fetch_failed_o <= fetch_failed; @@ -269,14 +266,6 @@ module cv32e41p_if_stage #( .pc_o (pc_if_o) ); - cv32e41p_compressed_decoder #( - .FPU(FPU) - ) compressed_decoder_i ( - .instr_i (instr_aligned), - .instr_o (instr_decompressed), - .is_compressed_o(instr_compressed_int), - .illegal_instr_o(illegal_c_insn) - ); //---------------------------------------------------------------------------- // Assertions diff --git a/rtl/cv32e41p_int_controller.sv b/rtl/cv32e41p_int_controller.sv index d82d1ce..7403513 100644 --- a/rtl/cv32e41p_int_controller.sv +++ b/rtl/cv32e41p_int_controller.sv @@ -30,8 +30,8 @@ module cv32e41p_int_controller input logic rst_n, // External interrupt lines - input logic [31:0] irq_i, // Level-triggered interrupt inputs - input logic irq_sec_i, // Interrupt secure bit from EU + input logic [31:0] irq_i, // Level-triggered interrupt inputs + input logic irq_sec_i, // Interrupt secure bit from EU // To cv32e41p_controller output logic irq_req_ctrl_o, @@ -40,10 +40,10 @@ module cv32e41p_int_controller output logic irq_wu_ctrl_o, // To/from cv32e41p_cs_registers - input logic [31:0] mie_bypass_i, // MIE CSR (bypass) - output logic [31:0] mip_o, // MIP CSR - input logic m_ie_i, // Interrupt enable bit from CSR (M mode) - input logic u_ie_i, // Interrupt enable bit from CSR (U mode) + input logic [31:0] mie_bypass_i, // MIE CSR (bypass) + output logic [31:0] mip_o, // MIP CSR + input logic m_ie_i, // Interrupt enable bit from CSR (M mode) + input logic u_ie_i, // Interrupt enable bit from CSR (U mode) input PrivLvl_t current_priv_lvl_i ); diff --git a/rtl/cv32e41p_load_store_unit.sv b/rtl/cv32e41p_load_store_unit.sv index fccbc7e..4b3458e 100644 --- a/rtl/cv32e41p_load_store_unit.sv +++ b/rtl/cv32e41p_load_store_unit.sv @@ -43,27 +43,27 @@ module cv32e41p_load_store_unit #( input logic [31:0] data_rdata_i, // signals from ex stage - input logic data_we_ex_i, // write enable -> from ex stage - input logic [ 1:0] data_type_ex_i, // Data type word, halfword, byte -> from ex stage - input logic [31:0] data_wdata_ex_i, // data to write to memory -> from ex stage + input logic data_we_ex_i, // write enable -> from ex stage + input logic [ 1:0] data_type_ex_i, // Data type word, halfword, byte -> from ex stage + input logic [31:0] data_wdata_ex_i, // data to write to memory -> from ex stage input logic [ 1:0] data_reg_offset_ex_i, // offset inside register for stores -> from ex stage input logic data_load_event_ex_i, // load event -> from ex stage - input logic [ 1:0] data_sign_ext_ex_i, // sign extension -> from ex stage + input logic [ 1:0] data_sign_ext_ex_i, // sign extension -> from ex stage - output logic [31:0] data_rdata_ex_o, // requested data -> to ex stage - input logic data_req_ex_i, // data request -> from ex stage - input logic [31:0] operand_a_ex_i, // operand a from RF for address -> from ex stage - input logic [31:0] operand_b_ex_i, // operand b from RF for address -> from ex stage - input logic addr_useincr_ex_i, // use a + b or just a for address -> from ex stage + output logic [31:0] data_rdata_ex_o, // requested data -> to ex stage + input logic data_req_ex_i, // data request -> from ex stage + input logic [31:0] operand_a_ex_i, // operand a from RF for address -> from ex stage + input logic [31:0] operand_b_ex_i, // operand b from RF for address -> from ex stage + input logic addr_useincr_ex_i, // use a + b or just a for address -> from ex stage - input logic data_misaligned_ex_i, // misaligned access in last ld/st -> from ID/EX pipeline + input logic data_misaligned_ex_i, // misaligned access in last ld/st -> from ID/EX pipeline output logic data_misaligned_o, // misaligned access was detected -> to controller - input logic [5:0] data_atop_ex_i, // atomic instructions signal -> from ex stage + input logic [5:0] data_atop_ex_i, // atomic instructions signal -> from ex stage output logic [5:0] data_atop_o, // atomic instruction signal -> core output output logic p_elw_start_o, // load event starts - output logic p_elw_finish_o, // load event finishes + output logic p_elw_finish_o, // load event finishes // stall signal output logic lsu_ready_ex_o, // LSU ready for new data in EX stage @@ -468,7 +468,7 @@ module cv32e41p_load_store_unit #( .resp_valid_o(resp_valid), .resp_rdata_o(resp_rdata), - .resp_err_o (resp_err), // Unused for now + .resp_err_o (resp_err), // Unused for now .obi_req_o (data_req_o), .obi_gnt_i (data_gnt_i), @@ -476,10 +476,10 @@ module cv32e41p_load_store_unit #( .obi_we_o (data_we_o), .obi_be_o (data_be_o), .obi_wdata_o (data_wdata_o), - .obi_atop_o (data_atop_o), // Not (yet) defined in OBI 1.0 spec + .obi_atop_o (data_atop_o), // Not (yet) defined in OBI 1.0 spec .obi_rdata_i (data_rdata_i), .obi_rvalid_i(data_rvalid_i), - .obi_err_i (data_err_i) // External bus error (validity defined by obi_rvalid_i) + .obi_err_i (data_err_i) // External bus error (validity defined by obi_rvalid_i) ); diff --git a/rtl/cv32e41p_merged_decoder.sv b/rtl/cv32e41p_merged_decoder.sv new file mode 100644 index 0000000..4f3236d --- /dev/null +++ b/rtl/cv32e41p_merged_decoder.sv @@ -0,0 +1,3248 @@ +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +//////////////////////////////////////////////////////////////////////////////// +// Engineer Andreas Traber - atraber@iis.ee.ethz.ch // +// // +// Additional contributions by: // +// Matthias Baer - baermatt@student.ethz.ch // +// Igor Loi - igor.loi@unibo.it // +// Sven Stucki - svstucki@student.ethz.ch // +// Davide Schiavone - pschiavo@iis.ee.ethz.ch // +// // +// Design Name: Decoder // +// Project Name: RI5CY // +// Language: SystemVerilog // +// // +// Description: Decoder // +// // +//////////////////////////////////////////////////////////////////////////////// + +module cv32e41p_merged_decoder import cv32e41p_pkg::*; import cv32e41p_apu_core_pkg::*; import cv32e41p_fpu_pkg::*; +#( + parameter PULP_XPULP = 1, // PULP ISA Extension (including PULP specific CSRs and hardware loop, excluding p.elw) + parameter Zcea = 0, + parameter Zceb = 0, + parameter Zcec = 0, + parameter Zcee = 0, + parameter PULP_CLUSTER = 0, + parameter A_EXTENSION = 0, + parameter FPU = 0, + parameter PULP_SECURE = 0, + parameter USE_PMP = 0, + parameter APU_WOP_CPU = 6, + parameter DEBUG_TRIGGER_EN = 1 +) +( + // singals running to/from controller + input logic deassert_we_i, // deassert we, we are stalled or not active + + output logic illegal_insn_o, // illegal instruction encountered + output logic ebrk_insn_o, // trap instruction encountered + + output logic mret_insn_o, // return from exception instruction encountered (M) + output logic uret_insn_o, // return from exception instruction encountered (S) + output logic dret_insn_o, // return from debug (M) + + output logic mret_dec_o, // return from exception instruction encountered (M) without deassert + output logic uret_dec_o, // return from exception instruction encountered (S) without deassert + output logic dret_dec_o, // return from debug (M) without deassert + + output logic ecall_insn_o, // environment call (syscall) instruction encountered + output logic wfi_o , // pipeline flush is requested + + output logic fencei_insn_o, // fence.i instruction + + output logic rega_used_o, // rs1 is used by current instruction + output logic regb_used_o, // rs2 is used by current instruction + output logic regc_used_o, // rs3 is used by current instruction + + output logic reg_fp_a_o, // fp reg a is used + output logic reg_fp_b_o, // fp reg b is used + output logic reg_fp_c_o, // fp reg c is used + output logic reg_fp_d_o, // fp reg d is used + + output logic [ 0:0] bmask_a_mux_o, // bit manipulation mask a mux + output logic [ 1:0] bmask_b_mux_o, // bit manipulation mask b mux + output logic alu_bmask_a_mux_sel_o, // bit manipulation mask a mux (reg or imm) + output logic alu_bmask_b_mux_sel_o, // bit manipulation mask b mux (reg or imm) + + // from IF/ID pipeline + input logic [31:0] instr_rdata_i, // instruction read from instr memory/cache + + + output logic is_compressed_o, // compressed instruction decode failed + output logic illegal_c_insn_o, // compressed instruction decode failed + + // ALU signals + output logic alu_en_o, // ALU enable + output alu_opcode_e alu_operator_o, // ALU operation selection + output logic [2:0] alu_op_a_mux_sel_o, // operand a selection: reg value, PC, immediate or zero + output logic [2:0] alu_op_b_mux_sel_o, // operand b selection: reg value or immediate + output logic [1:0] alu_op_c_mux_sel_o, // operand c selection: reg value or jump target + output logic [1:0] alu_vec_mode_o, // selects between 32 bit, 16 bit and 8 bit vectorial modes + output logic scalar_replication_o, // scalar replication enable + output logic scalar_replication_c_o, // scalar replication enable for operand C + output logic [0:0] imm_a_mux_sel_o, // immediate selection for operand a + output logic [4:0] imm_b_mux_sel_o, // immediate selection for operand b + output logic [1:0] regc_mux_o, // register c selection: S3, RD or 0 + output logic is_clpx_o, // whether the instruction is complex (pulpv3) or not + output logic is_subrot_o, + + // MUL related control signals + output mul_opcode_e mult_operator_o, // Multiplication operation selection + output logic mult_int_en_o, // perform integer multiplication + output logic mult_dot_en_o, // perform dot multiplication + output logic [0:0] mult_imm_mux_o, // Multiplication immediate mux selector + output logic mult_sel_subword_o, // Select subwords for 16x16 bit of multiplier + output logic [1:0] mult_signed_mode_o, // Multiplication in signed mode + output logic [1:0] mult_dot_signed_o, // Dot product in signed mode + + // FPU + input logic [C_RM-1:0] frm_i, // Rounding mode from float CSR + + output logic [cv32e41p_fpu_pkg::FP_FORMAT_BITS-1:0] fpu_dst_fmt_o, // fpu destination format + output logic [cv32e41p_fpu_pkg::FP_FORMAT_BITS-1:0] fpu_src_fmt_o, // fpu source format + output logic [cv32e41p_fpu_pkg::INT_FORMAT_BITS-1:0] fpu_int_fmt_o, // fpu integer format (for casts) + + // APU + output logic apu_en_o, + output logic [APU_WOP_CPU-1:0] apu_op_o, + output logic [1:0] apu_lat_o, + output logic [2:0] fp_rnd_mode_o, + + // register file related signals + output logic regfile_mem_we_o, // write enable for regfile + output logic regfile_alu_we_o, // write enable for 2nd regfile port + output logic regfile_alu_we_dec_o, // write enable for 2nd regfile port without deassert + output logic regfile_alu_waddr_sel_o, // Select register write address for ALU/MUL operations + + // CSR manipulation + output logic csr_access_o, // access to CSR + output logic csr_status_o, // access to xstatus CSR + output csr_opcode_e csr_op_o, // operation to perform on CSR + input PrivLvl_t current_priv_lvl_i, // The current privilege level + + // LD/ST unit signals + output logic data_req_o, // start transaction to data memory + output logic data_we_o, // data memory write enable + output logic prepost_useincr_o, // when not active bypass the alu result for address calculation + output logic [1:0] data_type_o, // data type on data memory: byte, half word or word + output logic [1:0] data_sign_extension_o, // sign extension on read data from data memory / NaN boxing + output logic [1:0] data_reg_offset_o, // offset in byte inside register for stores + output logic data_load_event_o, // data request is in the special event range + + // Atomic memory access + output logic [5:0] atop_o, + + // hwloop signals + output logic [2:0] hwlp_we_o, // write enable for hwloop regs + output logic hwlp_target_mux_sel_o, // selects immediate for hwloop target + output logic hwlp_start_mux_sel_o, // selects hwloop start address input + output logic hwlp_cnt_mux_sel_o, // selects hwloop counter input + + input logic debug_mode_i, // processor is in debug mode + input logic debug_wfi_no_sleep_i, // do not let WFI cause sleep + + // jump/branches + output logic [1:0] ctrl_transfer_insn_in_dec_o, // control transfer instruction without deassert + output logic [1:0] ctrl_transfer_insn_in_id_o, // control transfer instructio is decoded + output logic [2:0] ctrl_transfer_target_mux_sel_o, // jump target selection + + + //Registers specifiers + output logic [4:0] addr_ra_id_o, + output logic [4:0] addr_rb_id_o, + output logic [4:0] waddr_id_o, + + // HPM related control signals + input logic [31:0] mcounteren_i +); + + + assign is_compressed_o = (instr_rdata_i[1:0] != 2'b11); + + // write enable/request control + logic regfile_mem_we; + logic regfile_alu_we; + logic data_req; + logic [2:0] hwlp_we; + logic csr_illegal; + logic [1:0] ctrl_transfer_insn; + + csr_opcode_e csr_op; + + logic alu_en; + logic mult_int_en; + logic mult_dot_en; + logic apu_en; + + // this instruction needs floating-point rounding-mode verification + logic check_fprm; + + logic [cv32e41p_fpu_pkg::OP_BITS-1:0] fpu_op; // fpu operation + logic fpu_op_mod; // fpu operation modifier + logic fpu_vec_op; // fpu vectorial operation + // unittypes for latencies to help us decode for APU + enum logic[1:0] {ADDMUL, DIVSQRT, NONCOMP, CONV} fp_op_group; + + + ///////////////////////////////////////////// + // ____ _ // + // | _ \ ___ ___ ___ __| | ___ _ __ // + // | | | |/ _ \/ __/ _ \ / _` |/ _ \ '__| // + // | |_| | __/ (_| (_) | (_| | __/ | // + // |____/ \___|\___\___/ \__,_|\___|_| // + // // + ///////////////////////////////////////////// + + always_comb + begin + ctrl_transfer_insn = BRANCH_NONE; + ctrl_transfer_target_mux_sel_o = JT_JAL; + + alu_en = 1'b1; + alu_operator_o = ALU_SLTU; + alu_op_a_mux_sel_o = OP_A_REGA_OR_FWD; + alu_op_b_mux_sel_o = OP_B_REGB_OR_FWD; + alu_op_c_mux_sel_o = OP_C_REGC_OR_FWD; + alu_vec_mode_o = VEC_MODE32; + scalar_replication_o = 1'b0; + scalar_replication_c_o = 1'b0; + regc_mux_o = REGC_ZERO; + imm_a_mux_sel_o = IMMA_ZERO; + imm_b_mux_sel_o = IMMB_I; + + mult_operator_o = MUL_I; + mult_int_en = 1'b0; + mult_dot_en = 1'b0; + mult_imm_mux_o = MIMM_ZERO; + mult_signed_mode_o = 2'b00; + mult_sel_subword_o = 1'b0; + mult_dot_signed_o = 2'b00; + + apu_en = 1'b0; + apu_op_o = '0; + apu_lat_o = '0; + fp_rnd_mode_o = '0; + fpu_op = cv32e41p_fpu_pkg::SGNJ; + fpu_op_mod = 1'b0; + fpu_vec_op = 1'b0; + fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP32; + fpu_src_fmt_o = cv32e41p_fpu_pkg::FP32; + fpu_int_fmt_o = cv32e41p_fpu_pkg::INT32; + check_fprm = 1'b0; + fp_op_group = ADDMUL; + + regfile_mem_we = 1'b0; + regfile_alu_we = 1'b0; + regfile_alu_waddr_sel_o = 1'b1; + + prepost_useincr_o = 1'b1; + + hwlp_we = 3'b0; + hwlp_target_mux_sel_o = 1'b0; + hwlp_start_mux_sel_o = 1'b0; + hwlp_cnt_mux_sel_o = 1'b0; + + csr_access_o = 1'b0; + csr_status_o = 1'b0; + csr_illegal = 1'b0; + csr_op = CSR_OP_READ; + mret_insn_o = 1'b0; + uret_insn_o = 1'b0; + + dret_insn_o = 1'b0; + + data_we_o = 1'b0; + data_type_o = 2'b00; + data_sign_extension_o = 2'b00; + data_reg_offset_o = 2'b00; + data_req = 1'b0; + data_load_event_o = 1'b0; + + atop_o = 6'b000000; + + illegal_insn_o = 1'b0; + ebrk_insn_o = 1'b0; + ecall_insn_o = 1'b0; + wfi_o = 1'b0; + + fencei_insn_o = 1'b0; + + rega_used_o = 1'b0; + regb_used_o = 1'b0; + regc_used_o = 1'b0; + reg_fp_a_o = 1'b0; + reg_fp_b_o = 1'b0; + reg_fp_c_o = 1'b0; + reg_fp_d_o = 1'b0; + + bmask_a_mux_o = BMASK_A_ZERO; + bmask_b_mux_o = BMASK_B_ZERO; + alu_bmask_a_mux_sel_o = BMASK_A_IMM; + alu_bmask_b_mux_sel_o = BMASK_B_IMM; + + is_clpx_o = 1'b0; + is_subrot_o = 1'b0; + + mret_dec_o = 1'b0; + uret_dec_o = 1'b0; + dret_dec_o = 1'b0; + + + addr_ra_id_o = 5'd0; + addr_rb_id_o = 5'd0; + waddr_id_o = 5'd0; + + illegal_c_insn_o = 1'b0; + + // 32 bit instruction + if (instr_rdata_i[1:0] == 2'b11) begin + + addr_ra_id_o = instr_rdata_i[REG_S1_MSB:REG_S1_LSB]; + addr_rb_id_o = instr_rdata_i[REG_S2_MSB:REG_S2_LSB]; + waddr_id_o = instr_rdata_i[REG_D_MSB:REG_D_LSB]; + unique case (instr_rdata_i[6:0]) + + ////////////////////////////////////// + // _ _ _ __ __ ____ ____ // + // | | | | | \/ | _ \/ ___| // + // _ | | | | | |\/| | |_) \___ \ // + // | |_| | |_| | | | | __/ ___) | // + // \___/ \___/|_| |_|_| |____/ // + // // + ////////////////////////////////////// + + OPCODE_JAL: begin // Jump and Link + ctrl_transfer_target_mux_sel_o = JT_JAL; + ctrl_transfer_insn = BRANCH_JAL; + // Calculate and store PC+4 + alu_op_a_mux_sel_o = OP_A_CURRPC; + alu_op_b_mux_sel_o = OP_B_IMM; + imm_b_mux_sel_o = IMMB_PCINCR; + alu_operator_o = ALU_ADD; + regfile_alu_we = 1'b1; + // Calculate jump target (= PC + UJ imm) + end + + OPCODE_JALR: begin // Jump and Link Register + ctrl_transfer_target_mux_sel_o = JT_JALR; + ctrl_transfer_insn = BRANCH_JALR; + // Calculate and store PC+4 + alu_op_a_mux_sel_o = OP_A_CURRPC; + alu_op_b_mux_sel_o = OP_B_IMM; + imm_b_mux_sel_o = IMMB_PCINCR; + alu_operator_o = ALU_ADD; + regfile_alu_we = 1'b1; + // Calculate jump target (= RS1 + I imm) + rega_used_o = 1'b1; + + if (instr_rdata_i[14:12] != 3'b0) begin + ctrl_transfer_insn = BRANCH_NONE; + regfile_alu_we = 1'b0; + illegal_insn_o = 1'b1; + end + end + + OPCODE_BRANCH: begin // Branch + ctrl_transfer_target_mux_sel_o = JT_COND; + ctrl_transfer_insn = BRANCH_COND; + alu_op_c_mux_sel_o = OP_C_JT; + rega_used_o = 1'b1; + regb_used_o = 1'b1; + + unique case (instr_rdata_i[14:12]) + 3'b000: alu_operator_o = ALU_EQ; + 3'b001: alu_operator_o = ALU_NE; + 3'b100: alu_operator_o = ALU_LTS; + 3'b101: alu_operator_o = ALU_GES; + 3'b110: alu_operator_o = ALU_LTU; + 3'b111: alu_operator_o = ALU_GEU; + 3'b010: begin // p.beqimm + if (PULP_XPULP) begin + alu_operator_o = ALU_EQ; + regb_used_o = 1'b0; + alu_op_b_mux_sel_o = OP_B_IMM; + imm_b_mux_sel_o = IMMB_BI; + end else begin + illegal_insn_o = 1'b1; + end + end + 3'b011: begin // p.bneimm + if (PULP_XPULP) begin + alu_operator_o = ALU_NE; + regb_used_o = 1'b0; + alu_op_b_mux_sel_o = OP_B_IMM; + imm_b_mux_sel_o = IMMB_BI; + end else begin + illegal_insn_o = 1'b1; + end + end + endcase + end + + + ////////////////////////////////// + // _ ____ ______ _____ // + // | | | _ \ / / ___|_ _| // + // | | | | | |/ /\___ \ | | // + // | |___| |_| / / ___) || | // + // |_____|____/_/ |____/ |_| // + // // + ////////////////////////////////// + + OPCODE_STORE, + OPCODE_STORE_POST: begin + if (PULP_XPULP || (instr_rdata_i[6:0] == OPCODE_STORE)) begin + data_req = 1'b1; + data_we_o = 1'b1; + rega_used_o = 1'b1; + regb_used_o = 1'b1; + alu_operator_o = ALU_ADD; + // pass write data through ALU operand c + alu_op_c_mux_sel_o = OP_C_REGB_OR_FWD; + + // post-increment setup + if (instr_rdata_i[6:0] == OPCODE_STORE_POST) begin + prepost_useincr_o = 1'b0; + regfile_alu_waddr_sel_o = 1'b0; + regfile_alu_we = 1'b1; + end + + if (instr_rdata_i[14] == 1'b0) begin + // offset from immediate + imm_b_mux_sel_o = IMMB_S; + alu_op_b_mux_sel_o = OP_B_IMM; + end else begin + if (PULP_XPULP) begin + // offset from register + regc_used_o = 1'b1; + alu_op_b_mux_sel_o = OP_B_REGC_OR_FWD; + regc_mux_o = REGC_RD; + end else begin + illegal_insn_o = 1'b1; + end + end + + // store size + unique case (instr_rdata_i[13:12]) + 2'b00: data_type_o = 2'b10; // SB + 2'b01: data_type_o = 2'b01; // SH + 2'b10: data_type_o = 2'b00; // SW + default: begin + data_req = 1'b0; + data_we_o = 1'b0; + illegal_insn_o = 1'b1; + end + endcase + end else begin + illegal_insn_o = 1'b1; + end + end + + OPCODE_LOAD, + OPCODE_LOAD_POST: begin + if (PULP_XPULP || (instr_rdata_i[6:0] == OPCODE_LOAD)) begin + data_req = 1'b1; + regfile_mem_we = 1'b1; + rega_used_o = 1'b1; + data_type_o = 2'b00; + // offset from immediate + alu_operator_o = ALU_ADD; + alu_op_b_mux_sel_o = OP_B_IMM; + imm_b_mux_sel_o = IMMB_I; + + // post-increment setup + if (instr_rdata_i[6:0] == OPCODE_LOAD_POST) begin + prepost_useincr_o = 1'b0; + regfile_alu_waddr_sel_o = 1'b0; + regfile_alu_we = 1'b1; + end + + // sign/zero extension + data_sign_extension_o = {1'b0,~instr_rdata_i[14]}; + + // load size + unique case (instr_rdata_i[13:12]) + 2'b00: data_type_o = 2'b10; // LB + 2'b01: data_type_o = 2'b01; // LH + 2'b10: data_type_o = 2'b00; // LW + default: data_type_o = 2'b00; // illegal or reg-reg + endcase + + // reg-reg load (different encoding) + if (instr_rdata_i[14:12] == 3'b111) begin + if (PULP_XPULP) begin + // offset from RS2 + regb_used_o = 1'b1; + alu_op_b_mux_sel_o = OP_B_REGB_OR_FWD; + + // sign/zero extension + data_sign_extension_o = {1'b0, ~instr_rdata_i[30]}; + + // load size + unique case (instr_rdata_i[31:25]) + 7'b0000_000, + 7'b0100_000: data_type_o = 2'b10; // LB, LBU + 7'b0001_000, + 7'b0101_000: data_type_o = 2'b01; // LH, LHU + 7'b0010_000: data_type_o = 2'b00; // LW + default: begin + illegal_insn_o = 1'b1; + end + endcase + end else begin + illegal_insn_o = 1'b1; + end + end + + // special p.elw (event load) + if (instr_rdata_i[14:12] == 3'b110) begin + if (PULP_CLUSTER && (instr_rdata_i[6:0] == OPCODE_LOAD)) begin + data_load_event_o = 1'b1; + end else begin + // p.elw only valid for PULP_CLUSTER = 1; p.elw with post increment does not exist + illegal_insn_o = 1'b1; + end + end + + if (instr_rdata_i[14:12] == 3'b011) begin + // LD -> RV64 only + illegal_insn_o = 1'b1; + end + end else begin + illegal_insn_o = 1'b1; + end + end + + OPCODE_AMO: begin + if (A_EXTENSION) begin : decode_amo + if (instr_rdata_i[14:12] == 3'b010) begin // RV32A Extension (word) + data_req = 1'b1; + data_type_o = 2'b00; + rega_used_o = 1'b1; + regb_used_o = 1'b1; + regfile_mem_we = 1'b1; + prepost_useincr_o = 1'b0; // only use alu_operand_a as address (not a+b) + alu_op_a_mux_sel_o = OP_A_REGA_OR_FWD; + + data_sign_extension_o = 1'b1; + + // Apply AMO instruction at `atop_o`. + atop_o = {1'b1, instr_rdata_i[31:27]}; + + unique case (instr_rdata_i[31:27]) + AMO_LR: begin + data_we_o = 1'b0; + end + AMO_SC, + AMO_SWAP, + AMO_ADD, + AMO_XOR, + AMO_AND, + AMO_OR, + AMO_MIN, + AMO_MAX, + AMO_MINU, + AMO_MAXU: begin + data_we_o = 1'b1; + alu_op_c_mux_sel_o = OP_C_REGB_OR_FWD; // pass write data through ALU operand c + end + default : illegal_insn_o = 1'b1; + endcase + end + else begin + illegal_insn_o = 1'b1; + end + end else begin : no_decode_amo + illegal_insn_o = 1'b1; + end + end + + + ////////////////////////// + // _ _ _ _ // + // / \ | | | | | | // + // / _ \ | | | | | | // + // / ___ \| |__| |_| | // + // /_/ \_\_____\___/ // + // // + ////////////////////////// + + OPCODE_LUI: begin // Load Upper Immediate + alu_op_a_mux_sel_o = OP_A_IMM; + alu_op_b_mux_sel_o = OP_B_IMM; + imm_a_mux_sel_o = IMMA_ZERO; + imm_b_mux_sel_o = IMMB_U; + alu_operator_o = ALU_ADD; + regfile_alu_we = 1'b1; + end + + OPCODE_AUIPC: begin // Add Upper Immediate to PC + alu_op_a_mux_sel_o = OP_A_CURRPC; + alu_op_b_mux_sel_o = OP_B_IMM; + imm_b_mux_sel_o = IMMB_U; + alu_operator_o = ALU_ADD; + regfile_alu_we = 1'b1; + end + + OPCODE_OPIMM: begin // Register-Immediate ALU Operations + alu_op_b_mux_sel_o = OP_B_IMM; + imm_b_mux_sel_o = IMMB_I; + regfile_alu_we = 1'b1; + rega_used_o = 1'b1; + + unique case (instr_rdata_i[14:12]) + 3'b000: alu_operator_o = ALU_ADD; // Add Immediate + 3'b010: alu_operator_o = ALU_SLTS; // Set to one if Lower Than Immediate + 3'b011: alu_operator_o = ALU_SLTU; // Set to one if Lower Than Immediate Unsigned + 3'b100: alu_operator_o = ALU_XOR; // Exclusive Or with Immediate + 3'b110: alu_operator_o = ALU_OR; // Or with Immediate + 3'b111: alu_operator_o = ALU_AND; // And with Immediate + + 3'b001: begin + alu_operator_o = ALU_SLL; // Shift Left Logical by Immediate + if (instr_rdata_i[31:25] != 7'b0) + illegal_insn_o = 1'b1; + end + + 3'b101: begin + if (instr_rdata_i[31:25] == 7'b0) + alu_operator_o = ALU_SRL; // Shift Right Logical by Immediate + else if (instr_rdata_i[31:25] == 7'b010_0000) + alu_operator_o = ALU_SRA; // Shift Right Arithmetically by Immediate + else + illegal_insn_o = 1'b1; + end + + + endcase + end + + OPCODE_OP: begin // Register-Register ALU operation + + // PREFIX 11 + if (instr_rdata_i[31:30] == 2'b11) begin + if (PULP_XPULP) begin + ////////////////////////////// + // IMMEDIATE BIT-MANIPULATION + ////////////////////////////// + + regfile_alu_we = 1'b1; + rega_used_o = 1'b1; + + // bit-manipulation instructions + bmask_a_mux_o = BMASK_A_S3; + bmask_b_mux_o = BMASK_B_S2; + alu_op_b_mux_sel_o = OP_B_IMM; + + unique case (instr_rdata_i[14:12]) + 3'b000: begin + alu_operator_o = ALU_BEXT; + imm_b_mux_sel_o = IMMB_S2; + bmask_b_mux_o = BMASK_B_ZERO; + end + 3'b001: begin + alu_operator_o = ALU_BEXTU; + imm_b_mux_sel_o = IMMB_S2; + bmask_b_mux_o = BMASK_B_ZERO; + end + 3'b010: begin + alu_operator_o = ALU_BINS; + imm_b_mux_sel_o = IMMB_S2; + regc_used_o = 1'b1; + regc_mux_o = REGC_RD; + end + 3'b011: begin + alu_operator_o = ALU_BCLR; + end + 3'b100: begin + alu_operator_o = ALU_BSET; + end + 3'b101: begin + alu_operator_o = ALU_BREV; + // Enable write back to RD + regc_used_o = 1'b1; + regc_mux_o = REGC_RD; + // Extract the source register on operand a + imm_b_mux_sel_o = IMMB_S2; + // Map the radix to bmask_a immediate + alu_bmask_a_mux_sel_o = BMASK_A_IMM; + end + default: illegal_insn_o = 1'b1; + endcase + end else begin + illegal_insn_o = 1'b1; + end + end + + // PREFIX 10 + else if (instr_rdata_i[31:30] == 2'b10) begin + ////////////////////////////// + // REGISTER BIT-MANIPULATION + ////////////////////////////// + if (instr_rdata_i[29:25]==5'b00000) begin + if (PULP_XPULP) begin + regfile_alu_we = 1'b1; + rega_used_o = 1'b1; + + bmask_a_mux_o = BMASK_A_S3; + bmask_b_mux_o = BMASK_B_S2; + alu_op_b_mux_sel_o = OP_B_IMM; + + unique case (instr_rdata_i[14:12]) + 3'b000: begin + alu_operator_o = ALU_BEXT; + imm_b_mux_sel_o = IMMB_S2; + bmask_b_mux_o = BMASK_B_ZERO; + //register variant + alu_op_b_mux_sel_o = OP_B_BMASK; + alu_bmask_a_mux_sel_o = BMASK_A_REG; + regb_used_o = 1'b1; + end + 3'b001: begin + alu_operator_o = ALU_BEXTU; + imm_b_mux_sel_o = IMMB_S2; + bmask_b_mux_o = BMASK_B_ZERO; + //register variant + alu_op_b_mux_sel_o = OP_B_BMASK; + alu_bmask_a_mux_sel_o = BMASK_A_REG; + regb_used_o = 1'b1; + end + 3'b010: begin + alu_operator_o = ALU_BINS; + imm_b_mux_sel_o = IMMB_S2; + regc_used_o = 1'b1; + regc_mux_o = REGC_RD; + //register variant + alu_op_b_mux_sel_o = OP_B_BMASK; + alu_bmask_a_mux_sel_o = BMASK_A_REG; + alu_bmask_b_mux_sel_o = BMASK_B_REG; + regb_used_o = 1'b1; + end + 3'b011: begin + alu_operator_o = ALU_BCLR; + //register variant + regb_used_o = 1'b1; + alu_bmask_a_mux_sel_o = BMASK_A_REG; + alu_bmask_b_mux_sel_o = BMASK_B_REG; + end + 3'b100: begin + alu_operator_o = ALU_BSET; + //register variant + regb_used_o = 1'b1; + alu_bmask_a_mux_sel_o = BMASK_A_REG; + alu_bmask_b_mux_sel_o = BMASK_B_REG; + end + default: illegal_insn_o = 1'b1; + endcase + end else begin + illegal_insn_o = 1'b1; + end + + /////////////////////// + // VECTORIAL FLOAT OPS + /////////////////////// + end else begin + // Vectorial FP not available in 'old' shared FPU + if (FPU==1 && C_XFVEC) begin + + // using APU instead of ALU + apu_en = 1'b1; + alu_en = 1'b0; + // by default, set all registers to FP registers and use 2 + rega_used_o = 1'b1; + regb_used_o = 1'b1; + reg_fp_a_o = 1'b1; + reg_fp_b_o = 1'b1; + reg_fp_d_o = 1'b1; + fpu_vec_op = 1'b1; + // replication bit comes from instruction (can change for some ops) + scalar_replication_o = instr_rdata_i[14]; + // by default we need to verify rm is legal but assume it is for now + check_fprm = 1'b1; + fp_rnd_mode_o = frm_i; // all vectorial ops have rm from fcsr + + // Decode Formats + unique case (instr_rdata_i[13:12]) + // FP32 + 2'b00: begin + fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP32; + alu_vec_mode_o = VEC_MODE32; + end + // FP16ALT + 2'b01: begin + fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP16ALT; + alu_vec_mode_o = VEC_MODE16; + end + // FP16 + 2'b10: begin + fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP16; + alu_vec_mode_o = VEC_MODE16; + end + // FP8 + 2'b11: begin + fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP8; + alu_vec_mode_o = VEC_MODE8; + end + endcase + + // By default, src=dst + fpu_src_fmt_o = fpu_dst_fmt_o; + + // decode vectorial FP instruction + unique case (instr_rdata_i[29:25]) inside + // vfadd.vfmt - Vectorial FP Addition + 5'b00001: begin + fpu_op = cv32e41p_fpu_pkg::ADD; + fp_op_group = ADDMUL; + // FPnew needs addition operands as operand B and C + alu_op_b_mux_sel_o = OP_B_REGA_OR_FWD; + alu_op_c_mux_sel_o = OP_C_REGB_OR_FWD; + scalar_replication_o = 1'b0; + scalar_replication_c_o = instr_rdata_i[14]; + end + // vfsub.vfmt - Vectorial FP Subtraction + 5'b00010: begin + fpu_op = cv32e41p_fpu_pkg::ADD; + fpu_op_mod = 1'b1; + fp_op_group = ADDMUL; + // FPnew needs addition operands as operand B and C + alu_op_b_mux_sel_o = OP_B_REGA_OR_FWD; + alu_op_c_mux_sel_o = OP_C_REGB_OR_FWD; + scalar_replication_o = 1'b0; + scalar_replication_c_o = instr_rdata_i[14]; + end + // vfmul.vfmt - Vectorial FP Multiplication + 5'b00011: begin + fpu_op = cv32e41p_fpu_pkg::MUL; + fp_op_group = ADDMUL; + end + // vfdiv.vfmt - Vectorial FP Division + 5'b00100: begin + fpu_op = cv32e41p_fpu_pkg::DIV; + fp_op_group = DIVSQRT; + end + // vfmin.vfmt - Vectorial FP Minimum + 5'b00101: begin + fpu_op = cv32e41p_fpu_pkg::MINMAX; + fp_rnd_mode_o = 3'b000; // min + fp_op_group = NONCOMP; + check_fprm = 1'b0; // instruction encoded in rm + end + // vfmax.vfmt - Vectorial FP Maximum + 5'b00110: begin + fpu_op = cv32e41p_fpu_pkg::MINMAX; + fp_rnd_mode_o = 3'b001; // max + fp_op_group = NONCOMP; + check_fprm = 1'b0; // instruction encoded in rm + end + // vfsqrt.vfmt - Vectorial FP Square Root + 5'b00111: begin + regb_used_o = 1'b0; + fpu_op = cv32e41p_fpu_pkg::SQRT; + fp_op_group = DIVSQRT; + // rs2 and R must be zero + if ((instr_rdata_i[24:20] != 5'b00000) || instr_rdata_i[14]) begin + illegal_insn_o = 1'b1; + end + end + // vfmac.vfmt - Vectorial FP Multiply-Accumulate + 5'b01000: begin + regc_used_o = 1'b1; + regc_mux_o = REGC_RD; // third operand is rd + reg_fp_c_o = 1'b1; + fpu_op = cv32e41p_fpu_pkg::FMADD; + fp_op_group = ADDMUL; + end + // vfmre.vfmt - Vectorial FP Multiply-Reduce + 5'b01001: begin + regc_used_o = 1'b1; + regc_mux_o = REGC_RD; // third operand is rd + reg_fp_c_o = 1'b1; + fpu_op = cv32e41p_fpu_pkg::FMADD; + fpu_op_mod = 1'b1; + fp_op_group = ADDMUL; + end + // Moves, Conversions, Classifications + 5'b01100: begin + regb_used_o = 1'b0; + scalar_replication_o = 1'b0; + // Decode Operation in rs2 + unique case (instr_rdata_i[24:20]) inside + // vfmv.{x.vfmt/vfmt.x} - Vectorial FP Reg <-> GP Reg Moves + 5'b00000: begin + alu_op_b_mux_sel_o = OP_B_REGA_OR_FWD; // set rs2 = rs1 so we can map FMV to SGNJ in the unit + fpu_op = cv32e41p_fpu_pkg::SGNJ; + fp_rnd_mode_o = 3'b011; // passthrough without checking nan-box + fp_op_group = NONCOMP; + check_fprm = 1'b0; + // GP reg to FP reg + if (instr_rdata_i[14]) begin + reg_fp_a_o = 1'b0; // go from integer regfile + fpu_op_mod = 1'b0; // nan-box result + end + // FP reg to GP reg + else begin + reg_fp_d_o = 1'b0; // go to integer regfile + fpu_op_mod = 1'b1; // sign-extend result + end + end + // vfclass.vfmt - Vectorial FP Classifications + 5'b00001: begin + reg_fp_d_o = 1'b0; // go to integer regfile + fpu_op = cv32e41p_fpu_pkg::CLASSIFY; + fp_rnd_mode_o = 3'b000; + fp_op_group = NONCOMP; + check_fprm = 1'b0; + // R must not be set + if (instr_rdata_i[14]) illegal_insn_o = 1'b1; + end + // vfcvt.{x.vfmt/vfmt.x} - Vectorial FP <-> Int Conversions + 5'b0001?: begin + fp_op_group = CONV; + fpu_op_mod = instr_rdata_i[14]; // signed/unsigned switch + // Integer width matches FP width + unique case (instr_rdata_i[13:12]) + // FP32 + 2'b00 : fpu_int_fmt_o = cv32e41p_fpu_pkg::INT32; + // FP16[ALT] + 2'b01, + 2'b10: fpu_int_fmt_o = cv32e41p_fpu_pkg::INT16; + // FP8 + 2'b11: fpu_int_fmt_o = cv32e41p_fpu_pkg::INT8; + endcase + // Int to FP conversion + if (instr_rdata_i[20]) begin + reg_fp_a_o = 1'b0; // go from integer regfile + fpu_op = cv32e41p_fpu_pkg::I2F; + end + // FP to Int conversion + else begin + reg_fp_d_o = 1'b0; // go to integer regfile + fpu_op = cv32e41p_fpu_pkg::F2I; + end + end + // vfcvt.vfmt.vfmt - Vectorial FP <-> FP Conversions + 5'b001??: begin + fpu_op = cv32e41p_fpu_pkg::F2F; + fp_op_group = CONV; + // check source format + unique case (instr_rdata_i[21:20]) + // Only process instruction if corresponding extension is active (static) + 2'b00: begin + fpu_src_fmt_o = cv32e41p_fpu_pkg::FP32; + if (~C_RVF) illegal_insn_o = 1'b1; + end + 2'b01: begin + fpu_src_fmt_o = cv32e41p_fpu_pkg::FP16ALT; + if (~C_XF16ALT) illegal_insn_o = 1'b1; + end + 2'b10: begin + fpu_src_fmt_o = cv32e41p_fpu_pkg::FP16; + if (~C_XF16) illegal_insn_o = 1'b1; + end + 2'b11: begin + fpu_src_fmt_o = cv32e41p_fpu_pkg::FP8; + if (~C_XF8) illegal_insn_o = 1'b1; + end + endcase + // R must not be set + if (instr_rdata_i[14]) illegal_insn_o = 1'b1; + end + // others + default : illegal_insn_o = 1'b1; + endcase + end + // vfsgnj.vfmt - Vectorial FP Sign Injection + 5'b01101: begin + fpu_op = cv32e41p_fpu_pkg::SGNJ; + fp_rnd_mode_o = 3'b000; // sgnj + fp_op_group = NONCOMP; + check_fprm = 1'b0; + end + // vfsgnjn.vfmt - Vectorial FP Negated Sign Injection + 5'b01110: begin + fpu_op = cv32e41p_fpu_pkg::SGNJ; + fp_rnd_mode_o = 3'b001; // sgnjn + fp_op_group = NONCOMP; + check_fprm = 1'b0; + end + // vfsgnjx.vfmt - Vectorial FP Xored Sign Injection + 5'b01111: begin + fpu_op = cv32e41p_fpu_pkg::SGNJ; + fp_rnd_mode_o = 3'b010; // sgnjx + fp_op_group = NONCOMP; + check_fprm = 1'b0; + end + // vfeq.vfmt - Vectorial FP Equals + 5'b10000: begin + reg_fp_d_o = 1'b0; // go to integer regfile + fpu_op = cv32e41p_fpu_pkg::CMP; + fp_rnd_mode_o = 3'b010; // eq + fp_op_group = NONCOMP; + check_fprm = 1'b0; + end + // vfne.vfmt - Vectorial FP Not Equals + 5'b10001: begin + reg_fp_d_o = 1'b0; // go to integer regfile + fpu_op = cv32e41p_fpu_pkg::CMP; + fpu_op_mod = 1'b1; // invert output + fp_rnd_mode_o = 3'b010; // eq + fp_op_group = NONCOMP; + check_fprm = 1'b0; + end + // vflt.vfmt - Vectorial FP Less Than + 5'b10010: begin + reg_fp_d_o = 1'b0; // go to integer regfile + fpu_op = cv32e41p_fpu_pkg::CMP; + fp_rnd_mode_o = 3'b001; // lt + fp_op_group = NONCOMP; + check_fprm = 1'b0; + end + // vfge.vfmt - Vectorial FP Greater Than or Equals + 5'b10011: begin + reg_fp_d_o = 1'b0; // go to integer regfile + fpu_op = cv32e41p_fpu_pkg::CMP; + fpu_op_mod = 1'b1; // invert output + fp_rnd_mode_o = 3'b001; // lt + fp_op_group = NONCOMP; + check_fprm = 1'b0; + end + // vfle.vfmt - Vectorial FP Less Than or Equals + 5'b10100: begin + reg_fp_d_o = 1'b0; // go to integer regfile + fpu_op = cv32e41p_fpu_pkg::CMP; + fp_rnd_mode_o = 3'b000; // le + fp_op_group = NONCOMP; + check_fprm = 1'b0; + end + // vfgt.vfmt - Vectorial FP Greater Than + 5'b10101: begin + reg_fp_d_o = 1'b0; // go to integer regfile + fpu_op = cv32e41p_fpu_pkg::CMP; + fpu_op_mod = 1'b1; // invert output + fp_rnd_mode_o = 3'b000; // le + fp_op_group = NONCOMP; + check_fprm = 1'b0; + end + // vfcpk{a-d}.vfmt.s/d + 5'b110??: begin + // vfcpk{{a/c}/{b/d}} selection in R bit + fpu_op_mod = instr_rdata_i[14]; + fp_op_group = CONV; + scalar_replication_o = 1'b0; + + if (instr_rdata_i[25]) fpu_op = cv32e41p_fpu_pkg::CPKCD; // vfcpk{c/d} + else fpu_op = cv32e41p_fpu_pkg::CPKAB; // vfcpk{a/b} + + // vfcpk{a-d}.vfmt.d - from double + if (instr_rdata_i[26]) begin + fpu_src_fmt_o = cv32e41p_fpu_pkg::FP64; + if (~C_RVD) illegal_insn_o = 1'b1; + end + // vfcpk{a-d}.vfmt.s + else begin + fpu_src_fmt_o = cv32e41p_fpu_pkg::FP32; + if (~C_RVF) illegal_insn_o = 1'b1; + end + // Resolve legal vfcpk / format combinations (mostly static) + if (fpu_op == cv32e41p_fpu_pkg::CPKCD) begin // vfcpk{c/d} not possible unless FP8 and FLEN>=64 + if (~C_XF8 || ~C_RVD) illegal_insn_o = 1'b1; + end else begin + if (instr_rdata_i[14]) begin // vfcpkb + // vfcpkb not possible for FP32 + if (fpu_dst_fmt_o == cv32e41p_fpu_pkg::FP32) illegal_insn_o = 1'b1; + // vfcpkb not possible for FP16[ALT] if not RVD + if (~C_RVD && (fpu_dst_fmt_o != cv32e41p_fpu_pkg::FP8)) illegal_insn_o = 1'b1; + end + end + end + // Rest are illegal instructions + default: begin + illegal_insn_o = 1'b1; + end + endcase + + // check enabled formats (static) + // need RVD for F vectors + if ((~C_RVF || ~C_RVD) && fpu_dst_fmt_o == cv32e41p_fpu_pkg::FP32) illegal_insn_o = 1'b1; + // need RVF for F16 vectors + if ((~C_XF16 || ~C_RVF) && fpu_dst_fmt_o == cv32e41p_fpu_pkg::FP16) illegal_insn_o = 1'b1; + // need RVF for F16 vectors + if ((~C_XF16ALT || ~C_RVF) && fpu_dst_fmt_o == cv32e41p_fpu_pkg::FP16ALT) begin + illegal_insn_o = 1'b1; + end + // need F16 for F8 vectors + if ((~C_XF8 || (~C_XF16 && ~C_XF16ALT)) && fpu_dst_fmt_o == cv32e41p_fpu_pkg::FP8) begin + illegal_insn_o = 1'b1; + end + + // check rounding mode + if (check_fprm) begin + unique case (frm_i) inside + [3'b000:3'b100] : ; //legal rounding modes + default : illegal_insn_o = 1'b1; + endcase + end + + // Set latencies for FPnew from config. The C_LAT constants contain the number + // of pipeline registers. the APU takes the following values: + // 1 = single cycle (no latency), 2 = one pipestage, 3 = two or more pipestages + case (fp_op_group) + // ADDMUL has format dependent latency + ADDMUL : begin + unique case (fpu_dst_fmt_o) + cv32e41p_fpu_pkg::FP32 : apu_lat_o = (C_LAT_FP32<2) ? C_LAT_FP32+1 : 2'h3; + cv32e41p_fpu_pkg::FP16 : apu_lat_o = (C_LAT_FP16<2) ? C_LAT_FP16+1 : 2'h3; + cv32e41p_fpu_pkg::FP16ALT : apu_lat_o = (C_LAT_FP16ALT<2) ? C_LAT_FP16ALT+1 : 2'h3; + cv32e41p_fpu_pkg::FP8 : apu_lat_o = (C_LAT_FP8<2) ? C_LAT_FP8+1 : 2'h3; + default : ; + endcase + end + // DIVSQRT is iterative and takes more than 2 cycles + DIVSQRT : apu_lat_o = 2'h3; + // NONCOMP uses the same latency for all formats + NONCOMP : apu_lat_o = (C_LAT_NONCOMP<2) ? C_LAT_NONCOMP+1 : 2'h3; + // CONV uses the same latency for all formats + CONV : apu_lat_o = (C_LAT_CONV<2) ? C_LAT_CONV+1 : 2'h3; + endcase + + // Set FPnew OP and OPMOD as the APU op + apu_op_o = {fpu_vec_op, fpu_op_mod, fpu_op}; + end + // FPU!=1 or no Vectors or old shared unit + else begin + illegal_insn_o = 1'b1; + end + end // Vectorial Float Ops + + end // prefix 10 + + // PREFIX 00/01 + else begin + // non bit-manipulation instructions + regfile_alu_we = 1'b1; + rega_used_o = 1'b1; + + if (~instr_rdata_i[28]) regb_used_o = 1'b1; + + unique case ({instr_rdata_i[30:25], instr_rdata_i[14:12]}) + // RV32I ALU operations + {6'b00_0000, 3'b000}: alu_operator_o = ALU_ADD; // Add + {6'b10_0000, 3'b000}: alu_operator_o = ALU_SUB; // Sub + {6'b00_0000, 3'b010}: alu_operator_o = ALU_SLTS; // Set Lower Than + {6'b00_0000, 3'b011}: alu_operator_o = ALU_SLTU; // Set Lower Than Unsigned + {6'b00_0000, 3'b100}: alu_operator_o = ALU_XOR; // Xor + {6'b00_0000, 3'b110}: alu_operator_o = ALU_OR; // Or + {6'b00_0000, 3'b111}: alu_operator_o = ALU_AND; // And + {6'b00_0000, 3'b001}: alu_operator_o = ALU_SLL; // Shift Left Logical + {6'b00_0000, 3'b101}: alu_operator_o = ALU_SRL; // Shift Right Logical + {6'b10_0000, 3'b101}: alu_operator_o = ALU_SRA; // Shift Right Arithmetic + + // supported RV32M instructions + {6'b00_0001, 3'b000}: begin // mul + alu_en = 1'b0; + mult_int_en = 1'b1; + mult_operator_o = MUL_MAC32; + regc_mux_o = REGC_ZERO; + end + {6'b00_0001, 3'b001}: begin // mulh + alu_en = 1'b0; + regc_used_o = 1'b1; + regc_mux_o = REGC_ZERO; + mult_signed_mode_o = 2'b11; + mult_int_en = 1'b1; + mult_operator_o = MUL_H; + end + {6'b00_0001, 3'b010}: begin // mulhsu + alu_en = 1'b0; + regc_used_o = 1'b1; + regc_mux_o = REGC_ZERO; + mult_signed_mode_o = 2'b01; + mult_int_en = 1'b1; + mult_operator_o = MUL_H; + end + {6'b00_0001, 3'b011}: begin // mulhu + alu_en = 1'b0; + regc_used_o = 1'b1; + regc_mux_o = REGC_ZERO; + mult_signed_mode_o = 2'b00; + mult_int_en = 1'b1; + mult_operator_o = MUL_H; + end + {6'b00_0001, 3'b100}: begin // div + alu_op_a_mux_sel_o = OP_A_REGB_OR_FWD; + alu_op_b_mux_sel_o = OP_B_REGA_OR_FWD; + regb_used_o = 1'b1; + alu_operator_o = ALU_DIV; + end + {6'b00_0001, 3'b101}: begin // divu + alu_op_a_mux_sel_o = OP_A_REGB_OR_FWD; + alu_op_b_mux_sel_o = OP_B_REGA_OR_FWD; + regb_used_o = 1'b1; + alu_operator_o = ALU_DIVU; + end + {6'b00_0001, 3'b110}: begin // rem + alu_op_a_mux_sel_o = OP_A_REGB_OR_FWD; + alu_op_b_mux_sel_o = OP_B_REGA_OR_FWD; + regb_used_o = 1'b1; + alu_operator_o = ALU_REM; + end + {6'b00_0001, 3'b111}: begin // remu + alu_op_a_mux_sel_o = OP_A_REGB_OR_FWD; + alu_op_b_mux_sel_o = OP_B_REGA_OR_FWD; + regb_used_o = 1'b1; + alu_operator_o = ALU_REMU; + end + + // PULP specific instructions + {6'b10_0001, 3'b000}: begin // p.mac + if (PULP_XPULP) begin + alu_en = 1'b0; + regc_used_o = 1'b1; + regc_mux_o = REGC_RD; + mult_int_en = 1'b1; + mult_operator_o = MUL_MAC32; + end else begin + illegal_insn_o = 1'b1; + end + end + {6'b10_0001, 3'b001}: begin // p.msu + if (PULP_XPULP) begin + alu_en = 1'b0; + regc_used_o = 1'b1; + regc_mux_o = REGC_RD; + mult_int_en = 1'b1; + mult_operator_o = MUL_MSU32; + end else begin + illegal_insn_o = 1'b1; + end + end + {6'b00_0010, 3'b010}: begin // Set Lower Equal Than - p.slet + if (PULP_XPULP) begin + alu_operator_o = ALU_SLETS; + end else begin + illegal_insn_o = 1'b1; + end + end + {6'b00_0010, 3'b011}: begin // Set Lower Equal Than Unsigned; p.sletu + if (PULP_XPULP) begin + alu_operator_o = ALU_SLETU; + end else begin + illegal_insn_o = 1'b1; + end + end + {6'b00_0010, 3'b100}: begin // Min - p.min + if (PULP_XPULP) begin + alu_operator_o = ALU_MIN; + end else begin + illegal_insn_o = 1'b1; + end + end + {6'b00_0010, 3'b101}: begin // Min Unsigned - p.minu + if (PULP_XPULP) begin + alu_operator_o = ALU_MINU; + end else begin + illegal_insn_o = 1'b1; + end + end + {6'b00_0010, 3'b110}: begin // Max - p.max + if (PULP_XPULP) begin + alu_operator_o = ALU_MAX; + end else begin + illegal_insn_o = 1'b1; + end + end + {6'b00_0010, 3'b111}: begin // Max Unsigned - p.maxu + if (PULP_XPULP) begin + alu_operator_o = ALU_MAXU; + end else begin + illegal_insn_o = 1'b1; + end + end + {6'b00_0100, 3'b101}: begin // Rotate Right - p.ror + if (PULP_XPULP) begin + alu_operator_o = ALU_ROR; + end else begin + illegal_insn_o = 1'b1; + end + end + + // PULP specific instructions using only one source register + + {6'b00_1000, 3'b000}: begin // Find First 1 - p.ff1 + if (PULP_XPULP) begin + alu_operator_o = ALU_FF1; + end else begin + illegal_insn_o = 1'b1; + end + end + {6'b00_1000, 3'b001}: begin // Find Last 1 - p.fl1 + if (PULP_XPULP) begin + alu_operator_o = ALU_FL1; + end else begin + illegal_insn_o = 1'b1; + end + end + {6'b00_1000, 3'b010}: begin // Count Leading Bits - p.clb + if (PULP_XPULP) begin + alu_operator_o = ALU_CLB; + end else begin + illegal_insn_o = 1'b1; + end + end + {6'b00_1000, 3'b011}: begin // Count set bits (popcount) - p.cnt + if (PULP_XPULP) begin + alu_operator_o = ALU_CNT; + end else begin + illegal_insn_o = 1'b1; + end + end + {6'b00_1000, 3'b100}: begin // Sign-extend Halfword - p.exths + if (PULP_XPULP) begin + alu_operator_o = ALU_EXTS; + alu_vec_mode_o = VEC_MODE16; + end else begin + illegal_insn_o = 1'b1; + end + end + {6'b00_1000, 3'b101}: begin // Zero-extend Halfword - p.exthz + if (PULP_XPULP) begin + alu_operator_o = ALU_EXT; + alu_vec_mode_o = VEC_MODE16; + end else begin + illegal_insn_o = 1'b1; + end + end + {6'b00_1000, 3'b110}: begin // Sign-extend Byte - p.extbs + if (PULP_XPULP) begin + alu_operator_o = ALU_EXTS; + alu_vec_mode_o = VEC_MODE8; + end else begin + illegal_insn_o = 1'b1; + end + end + {6'b00_1000, 3'b111}: begin // Zero-extend Byte - p.extbz + if (PULP_XPULP) begin + alu_operator_o = ALU_EXT; + alu_vec_mode_o = VEC_MODE8; + end else begin + illegal_insn_o = 1'b1; + end + end + {6'b00_0010, 3'b000}: begin // p.abs + if (PULP_XPULP) begin + alu_operator_o = ALU_ABS; + end else begin + illegal_insn_o = 1'b1; + end + end + {6'b00_1010, 3'b001}: begin // p.clip + if (PULP_XPULP) begin + alu_operator_o = ALU_CLIP; + alu_op_b_mux_sel_o = OP_B_IMM; + imm_b_mux_sel_o = IMMB_CLIP; + end else begin + illegal_insn_o = 1'b1; + end + end + {6'b00_1010, 3'b010}: begin // p.clipu + if (PULP_XPULP) begin + alu_operator_o = ALU_CLIPU; + alu_op_b_mux_sel_o = OP_B_IMM; + imm_b_mux_sel_o = IMMB_CLIP; + end else begin + illegal_insn_o = 1'b1; + end + end + {6'b00_1010, 3'b101}: begin // p.clipr + if (PULP_XPULP) begin + alu_operator_o = ALU_CLIP; + regb_used_o = 1'b1; + end else begin + illegal_insn_o = 1'b1; + end + end + {6'b00_1010, 3'b110}: begin // p.clipur + if (PULP_XPULP) begin + alu_operator_o = ALU_CLIPU; + regb_used_o = 1'b1; + end else begin + illegal_insn_o = 1'b1; + end + end + + default: begin + illegal_insn_o = 1'b1; + end + endcase + end + end + + //////////////////////////// + // ______ _____ _ _ // + // | ____| __ \| | | | // + // | |__ | |__) | | | | // + // | __| | ___/| | | | // + // | | | | | |__| | // + // |_| |_| \____/ // + // // + //////////////////////////// + + // floating point arithmetic + OPCODE_OP_FP: begin + if (FPU==1) begin + + // using APU instead of ALU + apu_en = 1'b1; + alu_en = 1'b0; + // by default, set all registers to FP registers and use 2 + rega_used_o = 1'b1; + regb_used_o = 1'b1; + reg_fp_a_o = 1'b1; + reg_fp_b_o = 1'b1; + reg_fp_d_o = 1'b1; + // by default we need to verify rm is legal but assume it is for now + check_fprm = 1'b1; + fp_rnd_mode_o = instr_rdata_i[14:12]; + + // Decode Formats (preliminary, can change for some ops) + unique case (instr_rdata_i[26:25]) + // FP32 + 2'b00: fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP32; + // FP64 + 2'b01: fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP64; + // FP16 or FP16ALT + 2'b10: begin + // FP16alt encoded in rm field + if (instr_rdata_i[14:12]==3'b101) fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP16ALT; + // this can still change to FP16ALT + else fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP16; + end + // FP8 + 2'b11: fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP8; + endcase + + // By default, src=dst + fpu_src_fmt_o = fpu_dst_fmt_o; + + // decode FP instruction + unique case (instr_rdata_i[31:27]) + // fadd.fmt - FP Addition + 5'b00000: begin + fpu_op = cv32e41p_fpu_pkg::ADD; + fp_op_group = ADDMUL; + apu_op_o = 2'b0; + apu_lat_o = (PIPE_REG_ADDSUB==1) ? 2'h2 : 2'h1; + alu_op_b_mux_sel_o = OP_B_REGA_OR_FWD; + alu_op_c_mux_sel_o = OP_C_REGB_OR_FWD; + end + // fsub.fmt - FP Subtraction + 5'b00001: begin + fpu_op = cv32e41p_fpu_pkg::ADD; + fpu_op_mod = 1'b1; + fp_op_group = ADDMUL; + apu_op_o = 2'b1; + apu_lat_o = (PIPE_REG_ADDSUB==1) ? 2'h2 : 2'h1; + alu_op_b_mux_sel_o = OP_B_REGA_OR_FWD; + alu_op_c_mux_sel_o = OP_C_REGB_OR_FWD; + end + // fmul.fmt - FP Multiplication + 5'b00010: begin + fpu_op = cv32e41p_fpu_pkg::MUL; + fp_op_group = ADDMUL; + apu_lat_o = (PIPE_REG_MULT==1) ? 2'h2 : 2'h1; + end + // fdiv.fmt - FP Division + 5'b00011: begin + fpu_op = cv32e41p_fpu_pkg::DIV; + fp_op_group = DIVSQRT; + apu_lat_o = 2'h3; + end + // fsqrt.fmt - FP Square Root + 5'b01011: begin + regb_used_o = 1'b0; + fpu_op = cv32e41p_fpu_pkg::SQRT; + fp_op_group = DIVSQRT; + apu_op_o = 1'b1; + apu_lat_o = 2'h3; + // rs2 must be zero + if (instr_rdata_i[24:20] != 5'b00000) illegal_insn_o = 1'b1; + end + // fsgn{j[n]/jx}.fmt - FP Sign Injection + 5'b00100: begin + fpu_op = cv32e41p_fpu_pkg::SGNJ; + fp_op_group = NONCOMP; + check_fprm = 1'b0; // instruction encoded in rm, do the check here + if (C_XF16ALT) begin // FP16ALT instructions encoded in rm separately (static) + if (!(instr_rdata_i[14:12] inside {[3'b000:3'b010], [3'b100:3'b110]})) begin + illegal_insn_o = 1'b1; + end + // FP16ALT uses special encoding here + if (instr_rdata_i[14]) begin + fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP16ALT; + fpu_src_fmt_o = cv32e41p_fpu_pkg::FP16ALT; + end else begin + fp_rnd_mode_o = {1'b0, instr_rdata_i[13:12]}; + end + end else begin + if (!(instr_rdata_i[14:12] inside {[3'b000:3'b010]})) illegal_insn_o = 1'b1; + end + end + // fmin/fmax.fmt - FP Minimum / Maximum + 5'b00101: begin + fpu_op = cv32e41p_fpu_pkg::MINMAX; + fp_op_group = NONCOMP; + check_fprm = 1'b0; // instruction encoded in rm, do the check here + if (C_XF16ALT) begin // FP16ALT instructions encoded in rm separately (static) + if (!(instr_rdata_i[14:12] inside {[3'b000:3'b001], [3'b100:3'b101]})) begin + illegal_insn_o = 1'b1; + end + // FP16ALT uses special encoding here + if (instr_rdata_i[14]) begin + fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP16ALT; + fpu_src_fmt_o = cv32e41p_fpu_pkg::FP16ALT; + end else begin + fp_rnd_mode_o = {1'b0, instr_rdata_i[13:12]}; + end + end else begin + if (!(instr_rdata_i[14:12] inside {[3'b000:3'b001]})) illegal_insn_o = 1'b1; + end + end + // fcvt.fmt.fmt - FP to FP Conversion + 5'b01000: begin + regb_used_o = 1'b0; + fpu_op = cv32e41p_fpu_pkg::F2F; + fp_op_group = CONV; + // bits [22:20] used, other bits must be 0 + if (instr_rdata_i[24:23]) illegal_insn_o = 1'b1; + // check source format + unique case (instr_rdata_i[22:20]) + // Only process instruction if corresponding extension is active (static) + 3'b000: begin + if (~C_RVF) illegal_insn_o = 1'b1; + fpu_src_fmt_o = cv32e41p_fpu_pkg::FP32; + end + 3'b001: begin + if (~C_RVD) illegal_insn_o = 1'b1; + fpu_src_fmt_o = cv32e41p_fpu_pkg::FP64; + end + 3'b010: begin + if (~C_XF16) illegal_insn_o = 1'b1; + fpu_src_fmt_o = cv32e41p_fpu_pkg::FP16; + end + 3'b110: begin + if (~C_XF16ALT) illegal_insn_o = 1'b1; + fpu_src_fmt_o = cv32e41p_fpu_pkg::FP16ALT; + end + 3'b011: begin + if (~C_XF8) illegal_insn_o = 1'b1; + fpu_src_fmt_o = cv32e41p_fpu_pkg::FP8; + end + default: illegal_insn_o = 1'b1; + endcase + end + // fmulex.s.fmt - FP Expanding Multiplication to FP32 + 5'b01001: begin + fpu_op = cv32e41p_fpu_pkg::MUL; + fp_op_group = ADDMUL; + apu_lat_o = (PIPE_REG_MULT==1) ? 2'h2 : 2'h1; + // set dst format to FP32 + fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP32; + end + // fmacex.s.fmt - FP Expanding Multipy-Accumulate to FP32 + 5'b01010: begin + regc_used_o = 1'b1; + regc_mux_o = REGC_RD; // third operand is rd + reg_fp_c_o = 1'b1; + fpu_op = cv32e41p_fpu_pkg::FMADD; + fp_op_group = ADDMUL; + apu_lat_o = (PIPE_REG_MULT==1) ? 2'h2 : 2'h1; + // set dst format to FP32 + fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP32; + end + // feq/flt/fle.fmt - FP Comparisons + 5'b10100: begin + fpu_op = cv32e41p_fpu_pkg::CMP; + fp_op_group = NONCOMP; + reg_fp_d_o = 1'b0; // go to integer regfile + check_fprm = 1'b0; // instruction encoded in rm, do the check here + if (C_XF16ALT) begin // FP16ALT instructions encoded in rm separately (static) + if (!(instr_rdata_i[14:12] inside {[3'b000:3'b010], [3'b100:3'b110]})) begin + illegal_insn_o = 1'b1; + end + // FP16ALT uses special encoding here + if (instr_rdata_i[14]) begin + fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP16ALT; + fpu_src_fmt_o = cv32e41p_fpu_pkg::FP16ALT; + end else begin + fp_rnd_mode_o = {1'b0, instr_rdata_i[13:12]}; + end + end else begin + if (!(instr_rdata_i[14:12] inside {[3'b000:3'b010]})) illegal_insn_o = 1'b1; + end + end + // fcvt.ifmt.fmt - FP to Int Conversion + 5'b11000: begin + regb_used_o = 1'b0; + reg_fp_d_o = 1'b0; // go to integer regfile + fpu_op = cv32e41p_fpu_pkg::F2I; + fp_op_group = CONV; + fpu_op_mod = instr_rdata_i[20]; // signed/unsigned switch + apu_op_o = 2'b1; + apu_lat_o = (PIPE_REG_CAST==1) ? 2'h2 : 2'h1; + + unique case (instr_rdata_i[26:25]) //fix for casting to different formats other than FP32 + 2'b00: begin + if (~C_RVF) illegal_insn_o = 1; + else fpu_src_fmt_o = cv32e41p_fpu_pkg::FP32; + end + 2'b01: begin + if (~C_RVD) illegal_insn_o = 1; + else fpu_src_fmt_o = cv32e41p_fpu_pkg::FP64; + end + 2'b10: begin + if (instr_rdata_i[14:12] == 3'b101) begin + if (~C_XF16ALT) illegal_insn_o = 1; + else fpu_src_fmt_o = cv32e41p_fpu_pkg::FP16ALT; + end else if (~C_XF16) begin + illegal_insn_o = 1; + end else begin + fpu_src_fmt_o = cv32e41p_fpu_pkg::FP16; + end + end + 2'b11: begin + if (~C_XF8) illegal_insn_o = 1; + else fpu_src_fmt_o = cv32e41p_fpu_pkg::FP8; + end + endcase // unique case (instr_rdata_i[26:25]) + // bits [21:20] used, other bits must be 0 + if (instr_rdata_i[24:21]) illegal_insn_o = 1'b1; // in RV32, no casts to L allowed. + end + // fcvt.fmt.ifmt - Int to FP Conversion + 5'b11010: begin + regb_used_o = 1'b0; + reg_fp_a_o = 1'b0; // go from integer regfile + fpu_op = cv32e41p_fpu_pkg::I2F; + fp_op_group = CONV; + fpu_op_mod = instr_rdata_i[20]; // signed/unsigned switch + apu_op_o = 2'b0; + apu_lat_o = (PIPE_REG_CAST==1) ? 2'h2 : 2'h1; + // bits [21:20] used, other bits must be 0 + if (instr_rdata_i[24:21]) illegal_insn_o = 1'b1; // in RV32, no casts to L allowed. + end + // move and class + 5'b11100: begin + regb_used_o = 1'b0; + reg_fp_d_o = 1'b0; // go to integer regfile + fp_op_group = NONCOMP; + check_fprm = 1'b0; // instruction encoded in rm, do the check here + // fmv.x.fmt - FPR to GPR Move + if (instr_rdata_i[14:12] == 3'b000 || (C_XF16ALT && instr_rdata_i[14:12] == 3'b100)) begin + alu_op_b_mux_sel_o = OP_B_REGA_OR_FWD; // set rs2 = rs1 so we can map FMV to SGNJ in the unit + fpu_op = cv32e41p_fpu_pkg::SGNJ; // mapped to SGNJ-passthrough since no recoding + fpu_op_mod = 1'b1; // sign-extend result + fp_rnd_mode_o = 3'b011; // passthrough without checking nan-box + // FP16ALT uses special encoding here + if (instr_rdata_i[14]) begin + fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP16ALT; + fpu_src_fmt_o = cv32e41p_fpu_pkg::FP16ALT; + end + // fclass.fmt - FP Classify + end else if (instr_rdata_i[14:12] == 3'b001 || (C_XF16ALT && instr_rdata_i[14:12] == 3'b101)) begin + fpu_op = cv32e41p_fpu_pkg::CLASSIFY; + fp_rnd_mode_o = 3'b000; + // FP16ALT uses special encoding here + if (instr_rdata_i[14]) begin + fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP16ALT; + fpu_src_fmt_o = cv32e41p_fpu_pkg::FP16ALT; + end + end else begin + illegal_insn_o = 1'b1; + end + // rs2 must be zero + if (instr_rdata_i[24:20]) illegal_insn_o = 1'b1; + end + // fmv.fmt.x - GPR to FPR Move + 5'b11110: begin + regb_used_o = 1'b0; + reg_fp_a_o = 1'b0; // go from integer regfile + alu_op_b_mux_sel_o = OP_B_REGA_OR_FWD; // set rs2 = rs1 so we can map FMV to SGNJ in the unit + fpu_op = cv32e41p_fpu_pkg::SGNJ; // mapped to SGNJ-passthrough since no recoding + fpu_op_mod = 1'b0; // nan-box result + fp_op_group = NONCOMP; + fp_rnd_mode_o = 3'b011; // passthrough without checking nan-box + check_fprm = 1'b0; // instruction encoded in rm, do the check here + if (instr_rdata_i[14:12] == 3'b000 || (C_XF16ALT && instr_rdata_i[14:12] == 3'b100)) begin + // FP16ALT uses special encoding here + if (instr_rdata_i[14]) begin + fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP16ALT; + fpu_src_fmt_o = cv32e41p_fpu_pkg::FP16ALT; + end + end else begin + illegal_insn_o = 1'b1; + end + // rs2 must be zero + if (instr_rdata_i[24:20] != 5'b00000) illegal_insn_o = 1'b1; + end + // Rest are illegal instructions + default: begin + illegal_insn_o = 1'b1; + end + endcase + + // check enabled formats (static) + if (~C_RVF && fpu_dst_fmt_o == cv32e41p_fpu_pkg::FP32) illegal_insn_o = 1'b1; + if ((~C_RVD) && fpu_dst_fmt_o == cv32e41p_fpu_pkg::FP64) illegal_insn_o = 1'b1; + if ((~C_XF16) && fpu_dst_fmt_o == cv32e41p_fpu_pkg::FP16) illegal_insn_o = 1'b1; + if ((~C_XF16ALT) && fpu_dst_fmt_o == cv32e41p_fpu_pkg::FP16ALT) begin + illegal_insn_o = 1'b1; + end + if ((~C_XF8) && fpu_dst_fmt_o == cv32e41p_fpu_pkg::FP8) illegal_insn_o = 1'b1; + + // check rounding mode + if (check_fprm) begin + unique case (instr_rdata_i[14:12]) inside + [3'b000:3'b100]: ; //legal rounding modes + 3'b101: begin // Alternative Half-Precsision encded as fmt=10 and rm=101 + if (~C_XF16ALT || fpu_dst_fmt_o != cv32e41p_fpu_pkg::FP16ALT) illegal_insn_o = 1'b1; + // actual rounding mode from frm csr + unique case (frm_i) inside + [3'b000:3'b100] : fp_rnd_mode_o = frm_i; //legal rounding modes + default : illegal_insn_o = 1'b1; + endcase + end + 3'b111: begin + // rounding mode from frm csr + unique case (frm_i) inside + [3'b000:3'b100] : fp_rnd_mode_o = frm_i; //legal rounding modes + default : illegal_insn_o = 1'b1; + endcase + end + default : illegal_insn_o = 1'b1; + endcase + end + + // Set latencies for FPnew from config. The C_LAT constants contain the number + // of pipeline registers. the APU takes the following values: + // 1 = single cycle (no latency), 2 = one pipestage, 3 = two or more pipestages + case (fp_op_group) + // ADDMUL has format dependent latency + ADDMUL : begin + unique case (fpu_dst_fmt_o) + cv32e41p_fpu_pkg::FP32 : apu_lat_o = (C_LAT_FP32<2) ? C_LAT_FP32+1 : 2'h3; + cv32e41p_fpu_pkg::FP64 : apu_lat_o = (C_LAT_FP64<2) ? C_LAT_FP64+1 : 2'h3; + cv32e41p_fpu_pkg::FP16 : apu_lat_o = (C_LAT_FP16<2) ? C_LAT_FP16+1 : 2'h3; + cv32e41p_fpu_pkg::FP16ALT : apu_lat_o = (C_LAT_FP16ALT<2) ? C_LAT_FP16ALT+1 : 2'h3; + cv32e41p_fpu_pkg::FP8 : apu_lat_o = (C_LAT_FP8<2) ? C_LAT_FP8+1 : 2'h3; + default : ; + endcase + end + // DIVSQRT is iterative and takes more than 2 cycles + DIVSQRT : apu_lat_o = 2'h3; + // NONCOMP uses the same latency for all formats + NONCOMP : apu_lat_o = (C_LAT_NONCOMP<2) ? C_LAT_NONCOMP+1 : 2'h3; + // CONV uses the same latency for all formats + CONV : apu_lat_o = (C_LAT_CONV<2) ? C_LAT_CONV+1 : 2'h3; + endcase + + // Set FPnew OP and OPMOD as the APU op + apu_op_o = {fpu_vec_op, fpu_op_mod, fpu_op}; + + end + // FPU!=1 + else + illegal_insn_o = 1'b1; + end + + // floating point fused arithmetic + OPCODE_OP_FMADD, + OPCODE_OP_FMSUB, + OPCODE_OP_FNMSUB, + OPCODE_OP_FNMADD : begin + if (FPU==1) begin + // using APU instead of ALU + apu_en = 1'b1; + alu_en = 1'b0; + apu_lat_o = (PIPE_REG_MAC>1) ? 2'h3 : 2'h2; + // all registers are FP registers and use three + rega_used_o = 1'b1; + regb_used_o = 1'b1; + regc_used_o = 1'b1; + regc_mux_o = REGC_S4; + reg_fp_a_o = 1'b1; + reg_fp_b_o = 1'b1; + reg_fp_c_o = 1'b1; + reg_fp_d_o = 1'b1; + fp_rnd_mode_o = instr_rdata_i[14:12]; + + // Decode Formats + unique case (instr_rdata_i[26:25]) + // FP32 + 2'b00 : fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP32; + // FP64 + 2'b01 : fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP64; + // FP16 or FP16ALT + 2'b10 : begin + // FP16alt encoded in rm field + if (instr_rdata_i[14:12]==3'b101) fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP16ALT; + else fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP16; + end + // FP8 + 2'b11 : fpu_dst_fmt_o = cv32e41p_fpu_pkg::FP8; + endcase + + // By default, src=dst + fpu_src_fmt_o = fpu_dst_fmt_o; + + // decode FP intstruction + unique case (instr_rdata_i[6:0]) + // fmadd.fmt - FP Fused multiply-add + OPCODE_OP_FMADD : begin + fpu_op = cv32e41p_fpu_pkg::FMADD; + apu_op_o = 2'b00; + end + // fmsub.fmt - FP Fused multiply-subtract + OPCODE_OP_FMSUB : begin + fpu_op = cv32e41p_fpu_pkg::FMADD; + fpu_op_mod = 1'b1; + apu_op_o = 2'b01; + end + // fnmsub.fmt - FP Negated fused multiply-subtract + OPCODE_OP_FNMSUB : begin + fpu_op = cv32e41p_fpu_pkg::FNMSUB; + apu_op_o = 2'b10; + end + // fnmadd.fmt - FP Negated fused multiply-add + OPCODE_OP_FNMADD : begin + fpu_op = cv32e41p_fpu_pkg::FNMSUB; + fpu_op_mod = 1'b1; + apu_op_o = 2'b11; + end + endcase + + // check enabled formats (static) + if (~C_RVF && fpu_dst_fmt_o == cv32e41p_fpu_pkg::FP32) illegal_insn_o = 1'b1; + if ((~C_RVD) && fpu_dst_fmt_o == cv32e41p_fpu_pkg::FP64) illegal_insn_o = 1'b1; + if ((~C_XF16) && fpu_dst_fmt_o == cv32e41p_fpu_pkg::FP16) illegal_insn_o = 1'b1; + if ((~C_XF16ALT) && fpu_dst_fmt_o == cv32e41p_fpu_pkg::FP16ALT) begin + illegal_insn_o = 1'b1; + end + if ((~C_XF8) && fpu_dst_fmt_o == cv32e41p_fpu_pkg::FP8) illegal_insn_o = 1'b1; + + // check rounding mode + unique case (instr_rdata_i[14:12]) inside + [3'b000:3'b100]: ; //legal rounding modes + 3'b101: begin // Alternative Half-Precsision encded as fmt=10 and rm=101 + if (~C_XF16ALT || fpu_dst_fmt_o != cv32e41p_fpu_pkg::FP16ALT) illegal_insn_o = 1'b1; + // actual rounding mode from frm csr + unique case (frm_i) inside + [3'b000:3'b100] : fp_rnd_mode_o = frm_i; //legal rounding modes + default : illegal_insn_o = 1'b1; + endcase + end + 3'b111: begin + // rounding mode from frm csr + unique case (frm_i) inside + [3'b000:3'b100] : fp_rnd_mode_o = frm_i; //legal rounding modes + default : illegal_insn_o = 1'b1; + endcase + end + default : illegal_insn_o = 1'b1; + endcase + + // Set latencies for FPnew from config. The C_LAT constants contain the number + // of pipeline registers. the APU takes the following values: + // 1 = single cycle (no latency), 2 = one pipestage, 3 = two or more pipestages + // format dependent latency + unique case (fpu_dst_fmt_o) + cv32e41p_fpu_pkg::FP32 : apu_lat_o = (C_LAT_FP32<2) ? C_LAT_FP32+1 : 2'h3; + cv32e41p_fpu_pkg::FP64 : apu_lat_o = (C_LAT_FP64<2) ? C_LAT_FP64+1 : 2'h3; + cv32e41p_fpu_pkg::FP16 : apu_lat_o = (C_LAT_FP16<2) ? C_LAT_FP16+1 : 2'h3; + cv32e41p_fpu_pkg::FP16ALT : apu_lat_o = (C_LAT_FP16ALT<2) ? C_LAT_FP16ALT+1 : 2'h3; + cv32e41p_fpu_pkg::FP8 : apu_lat_o = (C_LAT_FP8<2) ? C_LAT_FP8+1 : 2'h3; + default : ; + endcase + + // Set FPnew OP and OPMOD as the APU op + apu_op_o = {fpu_vec_op, fpu_op_mod, fpu_op}; + end + // FPU!=1 + else begin + illegal_insn_o = 1'b1; + end + end + + OPCODE_STORE_FP: begin + if (FPU==1) begin + data_req = 1'b1; + data_we_o = 1'b1; + rega_used_o = 1'b1; + regb_used_o = 1'b1; + alu_operator_o = ALU_ADD; + reg_fp_b_o = 1'b1; + + // offset from immediate + imm_b_mux_sel_o = IMMB_S; + alu_op_b_mux_sel_o = OP_B_IMM; + + // pass write data through ALU operand c + alu_op_c_mux_sel_o = OP_C_REGB_OR_FWD; + + // Decode data type + unique case (instr_rdata_i[14:12]) + // fsb - FP8 store + 3'b000 : if (C_XF8) data_type_o = 2'b10; + else illegal_insn_o = 1'b1; + // fsh - FP16 store + 3'b001 : if (C_XF16 | C_XF16ALT) data_type_o = 2'b01; + else illegal_insn_o = 1'b1; + // fsw - FP32 store + 3'b010 : if (C_RVF) data_type_o = 2'b00; + else illegal_insn_o = 1'b1; + // fsd - FP64 store + 3'b011 : if (C_RVD) data_type_o = 2'b00; // 64bit stores unsupported! + else illegal_insn_o = 1'b1; + default: illegal_insn_o = 1'b1; + endcase + + // sanitize memory bus signals for illegal instr (not sure if needed??) + if (illegal_insn_o) begin + data_req = 1'b0; + data_we_o = 1'b0; + end + end + // FPU!=1 + else + illegal_insn_o = 1'b1; + end + + OPCODE_LOAD_FP: begin + if (FPU==1) begin + data_req = 1'b1; + regfile_mem_we = 1'b1; + reg_fp_d_o = 1'b1; + rega_used_o = 1'b1; + alu_operator_o = ALU_ADD; + + // offset from immediate + imm_b_mux_sel_o = IMMB_I; + alu_op_b_mux_sel_o = OP_B_IMM; + + // NaN boxing + data_sign_extension_o = 2'b10; + + // Decode data type + unique case (instr_rdata_i[14:12]) + // flb - FP8 load + 3'b000 : if (C_XF8) data_type_o = 2'b10; + else illegal_insn_o = 1'b1; + // flh - FP16 load + 3'b001 : if (C_XF16 | C_XF16ALT) data_type_o = 2'b01; + else illegal_insn_o = 1'b1; + // flw - FP32 load + 3'b010 : if (C_RVF) data_type_o = 2'b00; + else illegal_insn_o = 1'b1; + // fld - FP64 load + 3'b011 : if (C_RVD) data_type_o = 2'b00; // 64bit loads unsupported! + else illegal_insn_o = 1'b1; + default: illegal_insn_o = 1'b1; + endcase + end + // FPU!=1 + else + illegal_insn_o = 1'b1; + end + + OPCODE_PULP_OP: begin // PULP specific ALU instructions with three source operands + if (PULP_XPULP) begin + regfile_alu_we = 1'b1; + rega_used_o = 1'b1; + regb_used_o = 1'b1; + + case (instr_rdata_i[13:12]) + 2'b00: begin // multiply with subword selection + alu_en = 1'b0; + + mult_sel_subword_o = instr_rdata_i[30]; + mult_signed_mode_o = {2{instr_rdata_i[31]}}; + + mult_imm_mux_o = MIMM_S3; + regc_mux_o = REGC_ZERO; + mult_int_en = 1'b1; + + if (instr_rdata_i[14]) + mult_operator_o = MUL_IR; + else + mult_operator_o = MUL_I; + end + + 2'b01: begin // MAC with subword selection + alu_en = 1'b0; + + mult_sel_subword_o = instr_rdata_i[30]; + mult_signed_mode_o = {2{instr_rdata_i[31]}}; + + regc_used_o = 1'b1; + regc_mux_o = REGC_RD; + mult_imm_mux_o = MIMM_S3; + mult_int_en = 1'b1; + + if (instr_rdata_i[14]) + mult_operator_o = MUL_IR; + else + mult_operator_o = MUL_I; + end + + 2'b10: begin // add with normalization and rounding + // decide between using unsigned and rounding, and combinations + // thereof + case ({instr_rdata_i[31],instr_rdata_i[14]}) + 2'b00: alu_operator_o = ALU_ADD; + 2'b01: alu_operator_o = ALU_ADDR; + 2'b10: alu_operator_o = ALU_ADDU; + 2'b11: alu_operator_o = ALU_ADDUR; + endcase + + bmask_a_mux_o = BMASK_A_ZERO; + bmask_b_mux_o = BMASK_B_S3; + + if (instr_rdata_i[30]) begin + //register variant + regc_used_o = 1'b1; + regc_mux_o = REGC_RD; + alu_bmask_b_mux_sel_o = BMASK_B_REG; + alu_op_a_mux_sel_o = OP_A_REGC_OR_FWD; + alu_op_b_mux_sel_o = OP_B_REGA_OR_FWD; + end + + end + + 2'b11: begin // sub with normalization and rounding + // decide between using unsigned and rounding, and combinations + // thereof + case ({instr_rdata_i[31],instr_rdata_i[14]}) + 2'b00: alu_operator_o = ALU_SUB; + 2'b01: alu_operator_o = ALU_SUBR; + 2'b10: alu_operator_o = ALU_SUBU; + 2'b11: alu_operator_o = ALU_SUBUR; + endcase + + bmask_a_mux_o = BMASK_A_ZERO; + bmask_b_mux_o = BMASK_B_S3; + + if (instr_rdata_i[30]) begin + //register variant + regc_used_o = 1'b1; + regc_mux_o = REGC_RD; + alu_bmask_b_mux_sel_o = BMASK_B_REG; + alu_op_a_mux_sel_o = OP_A_REGC_OR_FWD; + alu_op_b_mux_sel_o = OP_B_REGA_OR_FWD; + end + + end + endcase + end else begin + illegal_insn_o = 1'b1; + end + end + + OPCODE_VECOP: begin + if (PULP_XPULP) begin + regfile_alu_we = 1'b1; + rega_used_o = 1'b1; + imm_b_mux_sel_o = IMMB_VS; + + // vector size + if (instr_rdata_i[12]) begin + alu_vec_mode_o = VEC_MODE8; + mult_operator_o = MUL_DOT8; + end else begin + alu_vec_mode_o = VEC_MODE16; + mult_operator_o = MUL_DOT16; + end + + // distinguish normal vector, sc and sci modes + if (instr_rdata_i[14]) begin + scalar_replication_o = 1'b1; + + if (instr_rdata_i[13]) begin + // immediate scalar replication, .sci + alu_op_b_mux_sel_o = OP_B_IMM; + end else begin + // register scalar replication, .sc + regb_used_o = 1'b1; + end + end else begin + // normal register use + regb_used_o = 1'b1; + end + + // now decode the instruction + unique case (instr_rdata_i[31:26]) + 6'b00000_0: begin alu_operator_o = ALU_ADD; imm_b_mux_sel_o = IMMB_VS; end // pv.add + 6'b00001_0: begin alu_operator_o = ALU_SUB; imm_b_mux_sel_o = IMMB_VS; end // pv.sub + 6'b00010_0: begin alu_operator_o = ALU_ADD; imm_b_mux_sel_o = IMMB_VS; bmask_b_mux_o = BMASK_B_ONE; end // pv.avg + 6'b00011_0: begin alu_operator_o = ALU_ADDU; imm_b_mux_sel_o = IMMB_VU; bmask_b_mux_o = BMASK_B_ONE; end // pv.avgu + 6'b00100_0: begin alu_operator_o = ALU_MIN; imm_b_mux_sel_o = IMMB_VS; end // pv.min + 6'b00101_0: begin alu_operator_o = ALU_MINU; imm_b_mux_sel_o = IMMB_VU; end // pv.minu + 6'b00110_0: begin alu_operator_o = ALU_MAX; imm_b_mux_sel_o = IMMB_VS; end // pv.max + 6'b00111_0: begin alu_operator_o = ALU_MAXU; imm_b_mux_sel_o = IMMB_VU; end // pv.maxu + 6'b01000_0: begin alu_operator_o = ALU_SRL; imm_b_mux_sel_o = IMMB_VS; end // pv.srl + 6'b01001_0: begin alu_operator_o = ALU_SRA; imm_b_mux_sel_o = IMMB_VS; end // pv.sra + 6'b01010_0: begin alu_operator_o = ALU_SLL; imm_b_mux_sel_o = IMMB_VS; end // pv.sll + 6'b01011_0: begin alu_operator_o = ALU_OR; imm_b_mux_sel_o = IMMB_VS; end // pv.or + 6'b01100_0: begin alu_operator_o = ALU_XOR; imm_b_mux_sel_o = IMMB_VS; end // pv.xor + 6'b01101_0: begin alu_operator_o = ALU_AND; imm_b_mux_sel_o = IMMB_VS; end // pv.and + 6'b01110_0: begin alu_operator_o = ALU_ABS; imm_b_mux_sel_o = IMMB_VS; end // pv.abs + + // shuffle/pack + 6'b11101_0, // pv.shuffleI1 + 6'b11110_0, // pv.shuffleI2 + 6'b11111_0, // pv.shuffleI3 + 6'b11000_0: begin // pv.shuffle, pv.shuffleI0 + alu_operator_o = ALU_SHUF; + imm_b_mux_sel_o = IMMB_SHUF; + regb_used_o = 1'b1; + scalar_replication_o = 1'b0; + end + 6'b11001_0: begin // pv.shuffle2 + alu_operator_o = ALU_SHUF2; + regb_used_o = 1'b1; + regc_used_o = 1'b1; + regc_mux_o = REGC_RD; + scalar_replication_o = 1'b0; + end + 6'b11010_0: begin // pv.pack + alu_operator_o = instr_rdata_i[25] ? ALU_PCKHI : ALU_PCKLO; + regb_used_o = 1'b1; + end + 6'b11011_0: begin // pv.packhi + alu_operator_o = ALU_PCKHI; + regb_used_o = 1'b1; + regc_used_o = 1'b1; + regc_mux_o = REGC_RD; + end + 6'b11100_0: begin // pv.packlo + alu_operator_o = ALU_PCKLO; + regb_used_o = 1'b1; + regc_used_o = 1'b1; + regc_mux_o = REGC_RD; + end + 6'b01111_0: begin // pv.extract + alu_operator_o = ALU_EXTS; + end + 6'b10010_0: begin // pv.extractu + alu_operator_o = ALU_EXT; + end + 6'b10110_0: begin // pv.insert + alu_operator_o = ALU_INS; + regc_used_o = 1'b1; + regc_mux_o = REGC_RD; + alu_op_b_mux_sel_o = OP_B_REGC_OR_FWD; + end + 6'b10000_0: begin // pv.dotup + alu_en = 1'b0; + mult_dot_en = 1'b1; + mult_dot_signed_o = 2'b00; + imm_b_mux_sel_o = IMMB_VU; + end + 6'b10001_0: begin // pv.dotusp + alu_en = 1'b0; + mult_dot_en = 1'b1; + mult_dot_signed_o = 2'b01; + end + 6'b10011_0: begin // pv.dotsp + alu_en = 1'b0; + mult_dot_en = 1'b1; + mult_dot_signed_o = 2'b11; + end + 6'b10100_0: begin // pv.sdotup + alu_en = 1'b0; + mult_dot_en = 1'b1; + mult_dot_signed_o = 2'b00; + regc_used_o = 1'b1; + regc_mux_o = REGC_RD; + imm_b_mux_sel_o = IMMB_VU; + end + 6'b10101_0: begin // pv.sdotusp + alu_en = 1'b0; + mult_dot_en = 1'b1; + mult_dot_signed_o = 2'b01; + regc_used_o = 1'b1; + regc_mux_o = REGC_RD; + end + 6'b10111_0: begin // pv.sdotsp + alu_en = 1'b0; + mult_dot_en = 1'b1; + mult_dot_signed_o = 2'b11; + regc_used_o = 1'b1; + regc_mux_o = REGC_RD; + end + + /* COMPLEX INSTRUCTIONS */ + + 6'b01010_1: begin // pc.clpxmul.{r,i}.{/,div2,div4,div8} + alu_en = 1'b0; + mult_dot_en = 1'b1; + mult_dot_signed_o = 2'b11; + is_clpx_o = 1'b1; + regc_used_o = 1'b1; + regc_mux_o = REGC_RD; + scalar_replication_o = 1'b0; + alu_op_b_mux_sel_o = OP_B_REGB_OR_FWD; + regb_used_o = 1'b1; + illegal_insn_o = instr_rdata_i[12]; + end + + 6'b01101_1: begin // pv.subrotmj.{/,div2,div4,div8} + alu_operator_o = ALU_SUB; + is_clpx_o = 1'b1; + scalar_replication_o = 1'b0; + alu_op_b_mux_sel_o = OP_B_REGB_OR_FWD; + regb_used_o = 1'b1; + is_subrot_o = 1'b1; + illegal_insn_o = instr_rdata_i[12]; + end + + 6'b01011_1: begin // pv.cplxconj + alu_operator_o = ALU_ABS; + is_clpx_o = 1'b1; + scalar_replication_o = 1'b0; + regb_used_o = 1'b0; + illegal_insn_o = instr_rdata_i[12] || (instr_rdata_i[24:20]!='0); + end + + 6'b01110_1: begin // pv.add.{div2,div4,div8} + alu_operator_o = ALU_ADD; + is_clpx_o = 1'b1; + scalar_replication_o = 1'b0; + alu_op_b_mux_sel_o = OP_B_REGB_OR_FWD; + regb_used_o = 1'b1; + illegal_insn_o = instr_rdata_i[12]; + end + + 6'b01100_1: begin // pv.sub.{div2,div4,div8} + alu_operator_o = ALU_SUB; + is_clpx_o = 1'b1; + scalar_replication_o = 1'b0; + alu_op_b_mux_sel_o = OP_B_REGB_OR_FWD; + regb_used_o = 1'b1; + illegal_insn_o = instr_rdata_i[12]; + end + + // comparisons, always have bit 26 set + 6'b00000_1: begin alu_operator_o = ALU_EQ; imm_b_mux_sel_o = IMMB_VS; end // pv.cmpeq + 6'b00001_1: begin alu_operator_o = ALU_NE; imm_b_mux_sel_o = IMMB_VS; end // pv.cmpne + 6'b00010_1: begin alu_operator_o = ALU_GTS; imm_b_mux_sel_o = IMMB_VS; end // pv.cmpgt + 6'b00011_1: begin alu_operator_o = ALU_GES; imm_b_mux_sel_o = IMMB_VS; end // pv.cmpge + 6'b00100_1: begin alu_operator_o = ALU_LTS; imm_b_mux_sel_o = IMMB_VS; end // pv.cmplt + 6'b00101_1: begin alu_operator_o = ALU_LES; imm_b_mux_sel_o = IMMB_VS; end // pv.cmple + 6'b00110_1: begin alu_operator_o = ALU_GTU; imm_b_mux_sel_o = IMMB_VU; end // pv.cmpgtu + 6'b00111_1: begin alu_operator_o = ALU_GEU; imm_b_mux_sel_o = IMMB_VU; end // pv.cmpgeu + 6'b01000_1: begin alu_operator_o = ALU_LTU; imm_b_mux_sel_o = IMMB_VU; end // pv.cmpltu + 6'b01001_1: begin alu_operator_o = ALU_LEU; imm_b_mux_sel_o = IMMB_VU; end // pv.cmpleu + + default: illegal_insn_o = 1'b1; + endcase + end else begin + illegal_insn_o = 1'b1; + end + end + + //////////////////////////////////////////////// + // ____ ____ _____ ____ ___ _ _ // + // / ___|| _ \| ____/ ___|_ _| / \ | | // + // \___ \| |_) | _|| | | | / _ \ | | // + // ___) | __/| |__| |___ | | / ___ \| |___ // + // |____/|_| |_____\____|___/_/ \_\_____| // + // // + //////////////////////////////////////////////// + + OPCODE_FENCE: begin + unique case (instr_rdata_i[14:12]) + 3'b000: begin // FENCE (FENCE.I instead, a bit more conservative) + // flush pipeline + fencei_insn_o = 1'b1; + end + + 3'b001: begin // FENCE.I + // flush prefetch buffer, flush pipeline + fencei_insn_o = 1'b1; + end + + default: begin + illegal_insn_o = 1'b1; + end + endcase + end + + OPCODE_SYSTEM: begin + if (instr_rdata_i[14:12] == 3'b000) + begin + // non CSR related SYSTEM instructions + if ( {instr_rdata_i[19:15], instr_rdata_i[11:7]} == '0) + begin + unique case (instr_rdata_i[31:20]) + 12'h000: // ECALL + begin + // environment (system) call + ecall_insn_o = 1'b1; + end + + 12'h001: // ebreak + begin + // debugger trap + ebrk_insn_o = 1'b1; + end + + 12'h302: // mret + begin + illegal_insn_o = (PULP_SECURE) ? current_priv_lvl_i != PRIV_LVL_M : 1'b0; + mret_insn_o = ~illegal_insn_o; + mret_dec_o = 1'b1; + end + + 12'h002: // uret + begin + illegal_insn_o = (PULP_SECURE) ? 1'b0 : 1'b1; + uret_insn_o = ~illegal_insn_o; + uret_dec_o = 1'b1; + end + + 12'h7b2: // dret + begin + illegal_insn_o = !debug_mode_i; + dret_insn_o = debug_mode_i; + dret_dec_o = 1'b1; + end + + 12'h105: // wfi + begin + wfi_o = 1'b1; + if (debug_wfi_no_sleep_i) begin + // Treat as NOP (do not cause sleep mode entry) + // Using decoding similar to ADDI, but without register reads/writes, i.e. + // keep regfile_alu_we = 0, rega_used_o = 0 + alu_op_b_mux_sel_o = OP_B_IMM; + imm_b_mux_sel_o = IMMB_I; + alu_operator_o = ALU_ADD; + end + end + + default: + begin + illegal_insn_o = 1'b1; + end + endcase + end else illegal_insn_o = 1'b1; + end + else + begin + // instruction to read/modify CSR + csr_access_o = 1'b1; + regfile_alu_we = 1'b1; + alu_op_b_mux_sel_o = OP_B_IMM; + imm_a_mux_sel_o = IMMA_Z; + imm_b_mux_sel_o = IMMB_I; // CSR address is encoded in I imm + + if (instr_rdata_i[14] == 1'b1) begin + // rs1 field is used as immediate + alu_op_a_mux_sel_o = OP_A_IMM; + end else begin + rega_used_o = 1'b1; + alu_op_a_mux_sel_o = OP_A_REGA_OR_FWD; + end + + // instr_rdata_i[19:14] = rs or immediate value + // if set or clear with rs==x0 or imm==0, + // then do not perform a write action + unique case (instr_rdata_i[13:12]) + 2'b01: csr_op = CSR_OP_WRITE; + 2'b10: csr_op = instr_rdata_i[19:15] == 5'b0 ? CSR_OP_READ : CSR_OP_SET; + 2'b11: csr_op = instr_rdata_i[19:15] == 5'b0 ? CSR_OP_READ : CSR_OP_CLEAR; + default: csr_illegal = 1'b1; + endcase + + if (instr_rdata_i[29:28] > current_priv_lvl_i) begin + // No access to higher privilege CSR + csr_illegal = 1'b1; + end + + // Determine if CSR access is illegal + case (instr_rdata_i[31:20]) + // Floating point + CSR_FFLAGS, + CSR_FRM, + CSR_FCSR : + if(!FPU) csr_illegal = 1'b1; + + // Writes to read only CSRs results in illegal instruction + CSR_MVENDORID, + CSR_MARCHID, + CSR_MIMPID, + CSR_MHARTID : + if(csr_op != CSR_OP_READ) csr_illegal = 1'b1; + + // These are valid CSR registers + CSR_MSTATUS, + CSR_MEPC, + CSR_MTVEC, + CSR_MCAUSE : + // Not illegal, but treat as status CSR for side effect handling + csr_status_o = 1'b1; + + // These are valid CSR registers + CSR_MISA, + CSR_MIE, + CSR_MSCRATCH, + CSR_MTVAL, + CSR_MIP : + ; // do nothing, not illegal + + // Hardware Performance Monitor + CSR_MCYCLE, + CSR_MINSTRET, + CSR_MHPMCOUNTER3, + CSR_MHPMCOUNTER4, CSR_MHPMCOUNTER5, CSR_MHPMCOUNTER6, CSR_MHPMCOUNTER7, + CSR_MHPMCOUNTER8, CSR_MHPMCOUNTER9, CSR_MHPMCOUNTER10, CSR_MHPMCOUNTER11, + CSR_MHPMCOUNTER12, CSR_MHPMCOUNTER13, CSR_MHPMCOUNTER14, CSR_MHPMCOUNTER15, + CSR_MHPMCOUNTER16, CSR_MHPMCOUNTER17, CSR_MHPMCOUNTER18, CSR_MHPMCOUNTER19, + CSR_MHPMCOUNTER20, CSR_MHPMCOUNTER21, CSR_MHPMCOUNTER22, CSR_MHPMCOUNTER23, + CSR_MHPMCOUNTER24, CSR_MHPMCOUNTER25, CSR_MHPMCOUNTER26, CSR_MHPMCOUNTER27, + CSR_MHPMCOUNTER28, CSR_MHPMCOUNTER29, CSR_MHPMCOUNTER30, CSR_MHPMCOUNTER31, + CSR_MCYCLEH, + CSR_MINSTRETH, + CSR_MHPMCOUNTER3H, + CSR_MHPMCOUNTER4H, CSR_MHPMCOUNTER5H, CSR_MHPMCOUNTER6H, CSR_MHPMCOUNTER7H, + CSR_MHPMCOUNTER8H, CSR_MHPMCOUNTER9H, CSR_MHPMCOUNTER10H, CSR_MHPMCOUNTER11H, + CSR_MHPMCOUNTER12H, CSR_MHPMCOUNTER13H, CSR_MHPMCOUNTER14H, CSR_MHPMCOUNTER15H, + CSR_MHPMCOUNTER16H, CSR_MHPMCOUNTER17H, CSR_MHPMCOUNTER18H, CSR_MHPMCOUNTER19H, + CSR_MHPMCOUNTER20H, CSR_MHPMCOUNTER21H, CSR_MHPMCOUNTER22H, CSR_MHPMCOUNTER23H, + CSR_MHPMCOUNTER24H, CSR_MHPMCOUNTER25H, CSR_MHPMCOUNTER26H, CSR_MHPMCOUNTER27H, + CSR_MHPMCOUNTER28H, CSR_MHPMCOUNTER29H, CSR_MHPMCOUNTER30H, CSR_MHPMCOUNTER31H, + CSR_MCOUNTINHIBIT, + CSR_MHPMEVENT3, + CSR_MHPMEVENT4, CSR_MHPMEVENT5, CSR_MHPMEVENT6, CSR_MHPMEVENT7, + CSR_MHPMEVENT8, CSR_MHPMEVENT9, CSR_MHPMEVENT10, CSR_MHPMEVENT11, + CSR_MHPMEVENT12, CSR_MHPMEVENT13, CSR_MHPMEVENT14, CSR_MHPMEVENT15, + CSR_MHPMEVENT16, CSR_MHPMEVENT17, CSR_MHPMEVENT18, CSR_MHPMEVENT19, + CSR_MHPMEVENT20, CSR_MHPMEVENT21, CSR_MHPMEVENT22, CSR_MHPMEVENT23, + CSR_MHPMEVENT24, CSR_MHPMEVENT25, CSR_MHPMEVENT26, CSR_MHPMEVENT27, + CSR_MHPMEVENT28, CSR_MHPMEVENT29, CSR_MHPMEVENT30, CSR_MHPMEVENT31 : + // Not illegal, but treat as status CSR to get accurate counts + csr_status_o = 1'b1; + + // Hardware Performance Monitor (unprivileged read-only mirror CSRs) + CSR_CYCLE, + CSR_INSTRET, + CSR_HPMCOUNTER3, + CSR_HPMCOUNTER4, CSR_HPMCOUNTER5, CSR_HPMCOUNTER6, CSR_HPMCOUNTER7, + CSR_HPMCOUNTER8, CSR_HPMCOUNTER9, CSR_HPMCOUNTER10, CSR_HPMCOUNTER11, + CSR_HPMCOUNTER12, CSR_HPMCOUNTER13, CSR_HPMCOUNTER14, CSR_HPMCOUNTER15, + CSR_HPMCOUNTER16, CSR_HPMCOUNTER17, CSR_HPMCOUNTER18, CSR_HPMCOUNTER19, + CSR_HPMCOUNTER20, CSR_HPMCOUNTER21, CSR_HPMCOUNTER22, CSR_HPMCOUNTER23, + CSR_HPMCOUNTER24, CSR_HPMCOUNTER25, CSR_HPMCOUNTER26, CSR_HPMCOUNTER27, + CSR_HPMCOUNTER28, CSR_HPMCOUNTER29, CSR_HPMCOUNTER30, CSR_HPMCOUNTER31, + CSR_CYCLEH, + CSR_INSTRETH, + CSR_HPMCOUNTER3H, + CSR_HPMCOUNTER4H, CSR_HPMCOUNTER5H, CSR_HPMCOUNTER6H, CSR_HPMCOUNTER7H, + CSR_HPMCOUNTER8H, CSR_HPMCOUNTER9H, CSR_HPMCOUNTER10H, CSR_HPMCOUNTER11H, + CSR_HPMCOUNTER12H, CSR_HPMCOUNTER13H, CSR_HPMCOUNTER14H, CSR_HPMCOUNTER15H, + CSR_HPMCOUNTER16H, CSR_HPMCOUNTER17H, CSR_HPMCOUNTER18H, CSR_HPMCOUNTER19H, + CSR_HPMCOUNTER20H, CSR_HPMCOUNTER21H, CSR_HPMCOUNTER22H, CSR_HPMCOUNTER23H, + CSR_HPMCOUNTER24H, CSR_HPMCOUNTER25H, CSR_HPMCOUNTER26H, CSR_HPMCOUNTER27H, + CSR_HPMCOUNTER28H, CSR_HPMCOUNTER29H, CSR_HPMCOUNTER30H, CSR_HPMCOUNTER31H : + // Read-only and readable from user mode only if the bit of mcounteren is set + if((csr_op != CSR_OP_READ) || (PULP_SECURE && (current_priv_lvl_i != PRIV_LVL_M) && !mcounteren_i[instr_rdata_i[24:20]])) begin + csr_illegal = 1'b1; + end else begin + csr_status_o = 1'b1; + end + + // This register only exists in user mode + CSR_MCOUNTEREN : + if(!PULP_SECURE) begin + csr_illegal = 1'b1; + end else begin + csr_status_o = 1'b1; + end + + // Debug register access + CSR_DCSR, + CSR_DPC, + CSR_DSCRATCH0, + CSR_DSCRATCH1 : + if(!debug_mode_i) begin + csr_illegal = 1'b1; + end else begin + csr_status_o = 1'b1; + end + + // Debug Trigger register access + CSR_TSELECT, + CSR_TDATA1, + CSR_TDATA2, + CSR_TDATA3, + CSR_TINFO, + CSR_MCONTEXT, + CSR_SCONTEXT : + if(DEBUG_TRIGGER_EN != 1) + csr_illegal = 1'b1; + + // Hardware Loop register, UHARTID access + CSR_LPSTART0, + CSR_LPEND0, + CSR_LPCOUNT0, + CSR_LPSTART1, + CSR_LPEND1, + CSR_LPCOUNT1, + CSR_UHARTID : + if(!PULP_XPULP) csr_illegal = 1'b1; + + // PRIVLV access + CSR_PRIVLV : + if(!PULP_XPULP) begin + csr_illegal = 1'b1; + end else begin + csr_status_o = 1'b1; + end + + // PMP register access + CSR_PMPCFG0, + CSR_PMPCFG1, + CSR_PMPCFG2, + CSR_PMPCFG3, + CSR_PMPADDR0, + CSR_PMPADDR1, + CSR_PMPADDR2, + CSR_PMPADDR3, + CSR_PMPADDR4, + CSR_PMPADDR5, + CSR_PMPADDR6, + CSR_PMPADDR7, + CSR_PMPADDR8, + CSR_PMPADDR9, + CSR_PMPADDR10, + CSR_PMPADDR11, + CSR_PMPADDR12, + CSR_PMPADDR13, + CSR_PMPADDR14, + CSR_PMPADDR15 : + if(!USE_PMP) csr_illegal = 1'b1; + + // User register access + CSR_USTATUS, + CSR_UEPC, + CSR_UTVEC, + CSR_UCAUSE : + if (!PULP_SECURE) begin + csr_illegal = 1'b1; + end else begin + csr_status_o = 1'b1; + end + + default : csr_illegal = 1'b1; + + endcase // case (instr_rdata_i[31:20]) + + illegal_insn_o = csr_illegal; + + end + + end + + + /////////////////////////////////////////////// + // _ ___ ___ ___ ___ ____ // + // | | | \ \ / / | / _ \ / _ \| _ \ // + // | |_| |\ \ /\ / /| | | | | | | | | |_) | // + // | _ | \ V V / | |__| |_| | |_| | __/ // + // |_| |_| \_/\_/ |_____\___/ \___/|_| // + // // + /////////////////////////////////////////////// + + OPCODE_HWLOOP: begin + if(PULP_XPULP) begin : HWLOOP_FEATURE_ENABLED + hwlp_target_mux_sel_o = 1'b0; + + unique case (instr_rdata_i[14:12]) + 3'b000: begin + // lp.starti: set start address to PC + I-type immediate + hwlp_we[0] = 1'b1; + hwlp_start_mux_sel_o = 1'b0; + end + + 3'b001: begin + // lp.endi: set end address to PC + I-type immediate + hwlp_we[1] = 1'b1; + end + + 3'b010: begin + // lp.count: initialize counter from rs1 + hwlp_we[2] = 1'b1; + hwlp_cnt_mux_sel_o = 1'b1; + rega_used_o = 1'b1; + end + + 3'b011: begin + // lp.counti: initialize counter from I-type immediate + hwlp_we[2] = 1'b1; + hwlp_cnt_mux_sel_o = 1'b0; + end + + 3'b100: begin + // lp.setup: initialize counter from rs1, set start address to + // next instruction and end address to PC + I-type immediate + hwlp_we = 3'b111; + hwlp_start_mux_sel_o = 1'b1; + hwlp_cnt_mux_sel_o = 1'b1; + rega_used_o = 1'b1; + end + + 3'b101: begin + // lp.setupi: initialize counter from immediate, set start address to + // next instruction and end address to PC + I-type immediate + hwlp_we = 3'b111; + hwlp_target_mux_sel_o = 1'b1; + hwlp_start_mux_sel_o = 1'b1; + hwlp_cnt_mux_sel_o = 1'b0; + end + + default: begin + illegal_insn_o = 1'b1; + end + endcase // case (instr_rdata_i[14:12]) + + end else begin // block: HWLOOP_FEATURE_ENABLED + illegal_insn_o = 1'b1; + end + end // case: OPCODE_HWLOOP + + default: begin + illegal_insn_o = 1'b1; + end + + endcase + end + else if (is_compressed_o) + // Compressed decoder ! + illegal_c_insn_o = 1'b0; + unique case (instr_rdata_i[1:0]) + // C0 + 2'b00: begin + unique case (instr_rdata_i[15:13]) + // c.addi4spn -> addi rd', x2, imm + 3'b000: begin + alu_op_b_mux_sel_o = OP_B_IMM; + imm_b_mux_sel_o = IMMB_CSPN; + addr_ra_id_o = 5'd2; + waddr_id_o = {2'b01,instr_rdata_i[4:2]}; + regfile_alu_we = 1'b1; + rega_used_o = 1'b1; + alu_operator_o = ALU_ADD; + if (instr_rdata_i[12:5] == 8'b0) illegal_c_insn_o = 1'b1; + end + // c.fld -> fld rd', imm(rs1') + 3'b001: begin + if (FPU == 1'b1 && C_RVD) begin + data_req = 1'b1; + regfile_mem_we = 1'b1; + reg_fp_d_o = 1'b1; + rega_used_o = 1'b1; + alu_operator_o = ALU_ADD; + imm_b_mux_sel_o = IMMB_CLD; + alu_op_b_mux_sel_o = OP_B_IMM; + + addr_ra_id_o = {2'b01,instr_rdata_i[4:2]}; + waddr_id_o = {2'b01,instr_rdata_i[9:7]}; + // NaN boxing + data_sign_extension_o = 2'b10; + // The orginal decoder mapped into 32bit loads, 64bit loads unsupported + data_type_o = 2'b00; + end + else + illegal_c_insn_o = 1'b1; + end + // c.lw -> lw rd', imm(rs1') + 3'b010: begin + data_req = 1'b1; + regfile_mem_we = 1'b1; + rega_used_o = 1'b1; + data_type_o = 2'b00; + // offset from immediate + alu_operator_o = ALU_ADD; + alu_op_b_mux_sel_o = OP_B_IMM; + imm_b_mux_sel_o = IMMB_CLW; + + waddr_id_o = {2'b01,instr_rdata_i[4:2]}; + addr_ra_id_o = {2'b01,instr_rdata_i[9:7]}; + + data_sign_extension_o = 2'b01; + + data_type_o = 2'b00; // LW + end + // c.flw -> flw rd', imm(rs1') + 3'b011: begin + if (FPU == 1'b1) begin + data_req = 1'b1; + regfile_mem_we = 1'b1; + reg_fp_d_o = 1'b1; + rega_used_o = 1'b1; + alu_operator_o = ALU_ADD; + imm_b_mux_sel_o = IMMB_CLW; + alu_op_b_mux_sel_o = OP_B_IMM; + + waddr_id_o = {2'b01,instr_rdata_i[4:2]}; + addr_ra_id_o = {2'b01,instr_rdata_i[9:7]}; + // NaN boxing + data_sign_extension_o = 2'b10; + // The orginal decoder mapped into 32bit loads, 64bit loads unsupported + data_type_o = 2'b00; + end + else + illegal_c_insn_o = 1'b1; + end + // c.fsd -> fsd rs2', imm(rs1') + 3'b101: begin + if (FPU==1) begin + data_req = 1'b1; + data_we_o = 1'b1; + rega_used_o = 1'b1; + regb_used_o = 1'b1; + alu_operator_o = ALU_ADD; + reg_fp_b_o = 1'b1; + imm_b_mux_sel_o = IMMB_CLD; + alu_op_b_mux_sel_o = OP_B_IMM; + + addr_ra_id_o = {2'b01,instr_rdata_i[9:7]}; + addr_rb_id_o = {2'b01,instr_rdata_i[4:2]}; + // pass write data through ALU operand c + alu_op_c_mux_sel_o = OP_C_REGB_OR_FWD; + end + else + illegal_c_insn_o = 1'b1; + end + // c.sw -> sw rs2', imm(rs1') + 3'b110: begin + data_req = 1'b1; + data_we_o = 1'b1; + rega_used_o = 1'b1; + regb_used_o = 1'b1; + alu_operator_o = ALU_ADD; + alu_op_c_mux_sel_o = OP_C_REGB_OR_FWD; + data_type_o = 2'b00; // SW + imm_b_mux_sel_o = IMMB_CLW; + alu_op_b_mux_sel_o = OP_B_IMM; + + addr_ra_id_o = {2'b01,instr_rdata_i[9:7]}; + addr_rb_id_o = {2'b01,instr_rdata_i[4:2]}; + + end + // c.fsw -> fsw rs2', imm(rs1') + 3'b111: begin + if (FPU==1 && C_RVF) begin + data_req = 1'b1; + data_we_o = 1'b1; + rega_used_o = 1'b1; + regb_used_o = 1'b1; + alu_operator_o = ALU_ADD; + reg_fp_b_o = 1'b1; + imm_b_mux_sel_o = IMMB_CLW; + alu_op_b_mux_sel_o = OP_B_IMM; + + addr_ra_id_o = {2'b01,instr_rdata_i[9:7]}; + addr_rb_id_o = {2'b01,instr_rdata_i[4:2]}; + // pass write data through ALU operand c + alu_op_c_mux_sel_o = OP_C_REGB_OR_FWD; + data_type_o = 2'b00; + end + else + illegal_c_insn_o = 1'b1; + end + default: begin + illegal_c_insn_o = 1'b1; + end + endcase + end + // C1 + 2'b01: begin + illegal_c_insn_o = 1'b0; + unique case (instr_rdata_i[15:13]) + 3'b000: begin + // c.addi -> addi rd, rd, nzimm + // c.nop + alu_op_b_mux_sel_o = OP_B_IMM; + imm_b_mux_sel_o = IMMB_CANDI; + regfile_alu_we = 1'b1; + rega_used_o = 1'b1; + alu_operator_o = ALU_ADD; + + addr_ra_id_o = instr_rdata_i[REG_D_MSB:REG_D_LSB]; + waddr_id_o = instr_rdata_i[REG_D_MSB:REG_D_LSB]; + end + + 3'b001, 3'b101: begin + // 001: c.jal -> jal x1, imm + // 101: c.j -> jal x0, imm + ctrl_transfer_target_mux_sel_o = JT_CJAL; + ctrl_transfer_insn = BRANCH_JAL; + // Calculate and store PC+4 + alu_op_a_mux_sel_o = OP_A_CURRPC; + alu_op_b_mux_sel_o = OP_B_IMM; + imm_b_mux_sel_o = IMMB_PCINCR; + alu_operator_o = ALU_ADD; + regfile_alu_we = 1'b1; + waddr_id_o = instr_rdata_i[15] ? 5'd0 : 5'd1; + + end + + 3'b010: begin + // c.li + alu_op_b_mux_sel_o = OP_B_IMM; + // { {26{instr[12]}},instr[12:12],instr[6:2] } + imm_b_mux_sel_o = IMMB_CANDI; + regfile_alu_we = 1'b1; + waddr_id_o = instr_rdata_i[REG_D_MSB:REG_D_LSB]; + rega_used_o = 1'b1; + alu_operator_o = ALU_ADD; + end + + 3'b011: begin + if ({instr_rdata_i[12], instr_rdata_i[6:2]} == 6'b0) begin + illegal_c_insn_o = 1'b1; + end else begin + if (instr_rdata_i[11:7] == 5'h02) begin + // c.addi16sp -> addi x2, x2, nzimm + alu_op_b_mux_sel_o = OP_B_IMM; + imm_b_mux_sel_o = IMMB_CADDI; + regfile_alu_we = 1'b1; + rega_used_o = 1'b1; + waddr_id_o = 5'd2; + addr_ra_id_o = 5'd2; + alu_operator_o = ALU_ADD; + + end else if (instr_rdata_i[11:7] == 5'b0) begin + // Hint -> lui x0, imm + alu_op_a_mux_sel_o = OP_A_IMM; + alu_op_b_mux_sel_o = OP_B_IMM; + imm_a_mux_sel_o = IMMA_ZERO; + imm_b_mux_sel_o = IMMB_CLUI; + alu_operator_o = ALU_ADD; + regfile_alu_we = 1'b1; + end else begin + // c.lui -> lui rd, imm + alu_op_a_mux_sel_o = OP_A_IMM; + alu_op_b_mux_sel_o = OP_B_IMM; + imm_a_mux_sel_o = IMMA_ZERO; + imm_b_mux_sel_o = IMMB_CLUI; + alu_operator_o = ALU_ADD; + regfile_alu_we = 1'b1; + waddr_id_o = instr_rdata_i[REG_D_MSB:REG_D_LSB]; + end + end + end + + 3'b100: begin + unique case (instr_rdata_i[11:10]) + // 00: c.srli -> srli rd, rd, shamt + 2'b00: begin + alu_op_b_mux_sel_o = OP_B_IMM; + imm_b_mux_sel_o = IMMB_CSRLI; + regfile_alu_we = 1'b1; + rega_used_o = 1'b1; + addr_ra_id_o = {2'b01,instr_rdata_i[9:7]}; + waddr_id_o = {2'b01,instr_rdata_i[9:7]}; + alu_operator_o = ALU_SRL; + if (instr_rdata_i[12] == 1'b1) begin + illegal_c_insn_o = 1'b1; + end + end + // 01: c.srai -> srai rd, rd, shamt + 2'b01: begin + alu_op_b_mux_sel_o = OP_B_IMM; + // IMMEDIATE { 26'b0,instr[12:12],instr[6:2] } + imm_b_mux_sel_o = IMMB_CSRLI; + regfile_alu_we = 1'b1; + rega_used_o = 1'b1; + addr_ra_id_o = {2'b01,instr_rdata_i[9:7]}; + waddr_id_o = {2'b01,instr_rdata_i[9:7]}; + alu_operator_o = ALU_SRA; + if (instr_rdata_i[12] == 1'b1) begin + illegal_c_insn_o = 1'b1; + end + end + // c.andi -> andi rd, rd, imm + 2'b10: begin + alu_op_b_mux_sel_o = OP_B_IMM; + imm_b_mux_sel_o = IMMB_CANDI; + regfile_alu_we = 1'b1; + rega_used_o = 1'b1; + addr_ra_id_o = {2'b01,instr_rdata_i[9:7]}; + waddr_id_o = {2'b01,instr_rdata_i[9:7]}; + alu_operator_o = ALU_AND; + end + 2'b11: begin + + unique case ({instr_rdata_i[12], instr_rdata_i[6:5]}) + 3'b000: begin + // c.sub -> sub rd', rd', rs2' + regfile_alu_we = 1'b1; + rega_used_o = 1'b1; + regb_used_o = 1'b1; + addr_ra_id_o = {2'b01,instr_rdata_i[9:7]}; + addr_rb_id_o = {2'b01,instr_rdata_i[4:2]}; + waddr_id_o = {2'b01,instr_rdata_i[9:7]}; + alu_operator_o = ALU_SUB; + end + + 3'b001: begin + // c.xor -> xor rd', rd', rs2' + regfile_alu_we = 1'b1; + rega_used_o = 1'b1; + regb_used_o = 1'b1; + addr_ra_id_o = {2'b01,instr_rdata_i[9:7]}; + addr_rb_id_o = {2'b01,instr_rdata_i[4:2]}; + waddr_id_o = {2'b01,instr_rdata_i[9:7]}; + alu_operator_o = ALU_XOR; + end + + 3'b010: begin + // c.or -> or rd', rd', rs2' + regfile_alu_we = 1'b1; + rega_used_o = 1'b1; + regb_used_o = 1'b1; + addr_ra_id_o = {2'b01,instr_rdata_i[9:7]}; + addr_rb_id_o = {2'b01,instr_rdata_i[4:2]}; + waddr_id_o = {2'b01,instr_rdata_i[9:7]}; + alu_operator_o = ALU_OR; + end + + 3'b011: begin + // c.and -> and rd', rd', rs2' + regfile_alu_we = 1'b1; + rega_used_o = 1'b1; + regb_used_o = 1'b1; + addr_ra_id_o = {2'b01,instr_rdata_i[9:7]}; + addr_rb_id_o = {2'b01,instr_rdata_i[4:2]}; + waddr_id_o = {2'b01,instr_rdata_i[9:7]}; + alu_operator_o = ALU_AND; + end + + 3'b100, 3'b101, 3'b110, 3'b111: begin + // 100: c.subw + // 101: c.addw + illegal_c_insn_o = 1'b1; + end + endcase + end + endcase + end + + 3'b110: begin + // 0: c.beqz -> beq rs1', x0, imm + ctrl_transfer_target_mux_sel_o = JT_CCOND; + ctrl_transfer_insn = BRANCH_COND; + alu_op_c_mux_sel_o = OP_C_JT; + rega_used_o = 1'b1; + regb_used_o = 1'b1; + addr_ra_id_o = {2'b01,instr_rdata_i[9:7]}; + addr_rb_id_o = 5'd0; + + alu_operator_o = ALU_EQ; + end + + 3'b111: begin + // 1: c.bnez -> bne rs1', x0, imm + ctrl_transfer_target_mux_sel_o = JT_CCOND; + ctrl_transfer_insn = BRANCH_COND; + alu_op_c_mux_sel_o = OP_C_JT; + rega_used_o = 1'b1; + regb_used_o = 1'b1; + addr_ra_id_o = {2'b01,instr_rdata_i[9:7]}; + addr_rb_id_o = 5'd0; + alu_operator_o = ALU_NE; + + + end + endcase + end + // C2 + 2'b10: begin + unique case (instr_rdata_i[15:13]) + 3'b000: begin + if (instr_rdata_i[12] == 1'b1) begin + /* Reserved for future extensions (instr_o don't care) + SLLI64 (Generate Illegal Instruction Exception but + it tries extend it non the less ?? ) */ + alu_op_b_mux_sel_o = OP_B_IMM; + imm_b_mux_sel_o = IMMB_I; + regfile_alu_we = 1'b1; + rega_used_o = 1'b1; + addr_ra_id_o = instr_rdata_i[11:7]; + waddr_id_o = instr_rdata_i[11:7]; + alu_operator_o = ALU_SLL; + illegal_c_insn_o = 1'b1; + end else begin + // SLLI,the immediate format is the same, thats why its called like that ! + alu_op_b_mux_sel_o = OP_B_IMM; + imm_b_mux_sel_o = IMMB_CSRLI; + regfile_alu_we = 1'b1; + rega_used_o = 1'b1; + addr_ra_id_o = instr_rdata_i[11:7]; + waddr_id_o = instr_rdata_i[11:7]; + alu_operator_o = ALU_SLL; + end + end + + 3'b001: begin + // c.fldsp -> fld rd, imm(x2) + if (FPU==1 && C_RVD) + begin // instr_i[6:5] -> offset[4:3], instr_i[4:2] -> offset[8:6], instr_i[12] -> offset[5] + data_req = 1'b1; + regfile_mem_we = 1'b1; + reg_fp_d_o = 1'b1; + rega_used_o = 1'b1; + alu_operator_o = ALU_ADD; + // offset from immediate + imm_b_mux_sel_o = IMMB_CFLDSP; + alu_op_b_mux_sel_o = OP_B_IMM; + + addr_ra_id_o = 5'd2; + waddr_id_o = instr_rdata_i[REG_D_MSB:REG_D_LSB]; + // NaN boxing + data_sign_extension_o = 2'b10; + data_type_o = 2'b00; + end + else illegal_c_insn_o = 1'b1; + end + + 3'b010: begin + // c.lwsp -> lw rd, imm(x2) + data_req = 1'b1; + regfile_mem_we = 1'b1; + rega_used_o = 1'b1; + data_type_o = 2'b00; + + addr_ra_id_o = 5'd2; + waddr_id_o = instr_rdata_i[REG_D_MSB:REG_D_LSB]; + // offset from immediate + alu_operator_o = ALU_ADD; + alu_op_b_mux_sel_o = OP_B_IMM; + imm_b_mux_sel_o = IMMB_CLWSP; + data_sign_extension_o = 2'b01; + data_type_o = 2'b00; + + if (instr_rdata_i[11:7] == 5'b0) illegal_c_insn_o = 1'b1; + end + + 3'b011: begin + // c.flwsp -> flw rd, imm(x2) + if (FPU == 1 && C_RVF) begin + data_req = 1'b1; + regfile_mem_we = 1'b1; + reg_fp_d_o = 1'b1; + rega_used_o = 1'b1; + alu_operator_o = ALU_ADD; + // offset from immediate + // IMMEDIATE { 24'b0,instr[3:2],instr[12:12],instr[6:4],2'b0}, + imm_b_mux_sel_o = IMMB_CLWSP; + alu_op_b_mux_sel_o = OP_B_IMM; + + addr_ra_id_o = 5'd2; + waddr_id_o = instr_rdata_i[REG_D_MSB:REG_D_LSB]; + // NaN boxing + data_sign_extension_o = 2'b10; + data_type_o = 2'b00; + end + else illegal_c_insn_o = 1'b1; + end + + 3'b100: begin + if (instr_rdata_i[12] == 1'b0) begin + if (instr_rdata_i[6:2] == 5'b0) begin + // c.jr -> jalr x0, rd/rs1, 0 + ctrl_transfer_target_mux_sel_o = JT_CJALR; + ctrl_transfer_insn = BRANCH_JALR; + // Calculate and store PC+4 + alu_op_a_mux_sel_o = OP_A_CURRPC; + alu_op_b_mux_sel_o = OP_B_IMM; + imm_b_mux_sel_o = IMMB_PCINCR; + alu_operator_o = ALU_ADD; + regfile_alu_we = 1'b1; + rega_used_o = 1'b1; + + addr_ra_id_o = instr_rdata_i[REG_D_MSB:REG_D_LSB]; + waddr_id_o = 5'd0; + // c.jr with rs1 = 0 is reserved + if (instr_rdata_i[11:7] == 5'b0) illegal_c_insn_o = 1'b1; + end else begin + // if (instr_rdata_i[11:7] == 5'b0) begin + // // Hint -> add x0, x0, rs2 + + // end else begin + // // c.mv -> add rd, x0, rs2 + + // end + regfile_alu_we = 1'b1; + rega_used_o = 1'b1; + regb_used_o = 1'b1; + addr_ra_id_o = 5'd0; + addr_rb_id_o = instr_rdata_i[6:2]; + + waddr_id_o = instr_rdata_i[REG_D_MSB:REG_D_LSB]; + + alu_operator_o = ALU_ADD; + end + end else begin + if (instr_rdata_i[6:2] == 5'b0) begin + if (instr_rdata_i[11:7] == 5'b0) begin + // c.ebreak -> ebreak + ebrk_insn_o = 1'b1; + end else begin + // c.jalr -> jalr x1, rs1, 0 + ctrl_transfer_target_mux_sel_o = JT_CJALR; + ctrl_transfer_insn = BRANCH_JALR; + // Calculate and store PC+4 + alu_op_a_mux_sel_o = OP_A_CURRPC; + alu_op_b_mux_sel_o = OP_B_IMM; + imm_b_mux_sel_o = IMMB_PCINCR; + alu_operator_o = ALU_ADD; + regfile_alu_we = 1'b1; + // Calculate jump target (= RS1 + I imm) + rega_used_o = 1'b1; + addr_ra_id_o = instr_rdata_i[REG_D_MSB:REG_D_LSB]; + waddr_id_o = 5'd1; + + end + end else begin + regfile_alu_we = 1'b1; + rega_used_o = 1'b1; + regb_used_o = 1'b1; + + addr_ra_id_o = instr_rdata_i[REG_D_MSB:REG_D_LSB]; + addr_rb_id_o = instr_rdata_i[6:2]; + waddr_id_o = instr_rdata_i[REG_D_MSB:REG_D_LSB];; + + + alu_operator_o = ALU_ADD; + // if (instr_rdata_i[11:7] == 5'b0) begin + // // Hint -> add x0, x0, rs2 + + // end else begin + // // c.add -> add rd, rd, rs2 + + // end + end + end + end + + 3'b101: begin + // c.fsdsp -> fsd rs2, imm(x2) + // instr_i[12:10] -> offset[5:3], instr_i[9:7] -> offset[8:6] + if (FPU == 1 && C_RVD) begin + data_req = 1'b1; + data_we_o = 1'b1; + rega_used_o = 1'b1; + regb_used_o = 1'b1; + alu_operator_o = ALU_ADD; + reg_fp_b_o = 1'b1; + + // offset from immediate + imm_b_mux_sel_o = IMMB_FSDP; + alu_op_b_mux_sel_o = OP_B_IMM; + + addr_ra_id_o = 5'd2; + addr_rb_id_o = instr_rdata_i[REG_D_MSB:REG_D_LSB]; + // pass write data through ALU operand c + alu_op_c_mux_sel_o = OP_C_REGB_OR_FWD; + data_type_o = 2'b00; + end + else illegal_c_insn_o = 1'b1; + end + 3'b110: begin + // c.swsp -> sw rs2, imm(x2) + data_req = 1'b1; + data_we_o = 1'b1; + rega_used_o = 1'b1; + regb_used_o = 1'b1; + alu_operator_o = ALU_ADD; + + addr_ra_id_o = 5'd2; + addr_rb_id_o = instr_rdata_i[6:2]; + // pass write data through ALU operand c + alu_op_c_mux_sel_o = OP_C_REGB_OR_FWD; + // IMMEDIATE { 24'b0,instr[8:7],instr[12:9],2'b0 } + imm_b_mux_sel_o = IMMB_CSWSP; + alu_op_b_mux_sel_o = OP_B_IMM; + end + + 3'b111: begin + // c.fswsp -> fsw rs2, imm(x2) + if (FPU == 1 && C_RVF) begin + data_req = 1'b1; + data_we_o = 1'b1; + rega_used_o = 1'b1; + regb_used_o = 1'b1; + alu_operator_o = ALU_ADD; + reg_fp_b_o = 1'b1; + + addr_ra_id_o = 5'd2; + addr_rb_id_o = instr_rdata_i[REG_D_MSB:REG_D_LSB]; + // offset from immediate + imm_b_mux_sel_o = IMMB_CSWSP; + alu_op_b_mux_sel_o = OP_B_IMM; + + // pass write data through ALU operand c + alu_op_c_mux_sel_o = OP_C_REGB_OR_FWD; + + data_type_o = 2'b00; + end + else illegal_c_insn_o = 1'b1; + end + endcase + end + default: begin + illegal_c_insn_o = 1'b0; + end + endcase + + //16 bit instructions + + // make sure invalid compressed instruction causes an exception + if (illegal_c_insn_o) begin + illegal_insn_o = 1'b1; + end + + end + + // deassert we signals (in case of stalls) + assign alu_en_o = (deassert_we_i) ? 1'b0 : alu_en; + assign apu_en_o = (deassert_we_i) ? 1'b0 : apu_en; + assign mult_int_en_o = (deassert_we_i) ? 1'b0 : mult_int_en; + assign mult_dot_en_o = (deassert_we_i) ? 1'b0 : mult_dot_en; + assign regfile_mem_we_o = (deassert_we_i) ? 1'b0 : regfile_mem_we; + assign regfile_alu_we_o = (deassert_we_i) ? 1'b0 : regfile_alu_we; + assign data_req_o = (deassert_we_i) ? 1'b0 : data_req; + assign hwlp_we_o = (deassert_we_i) ? 3'b0 : hwlp_we; + assign csr_op_o = (deassert_we_i) ? CSR_OP_READ : csr_op; + assign ctrl_transfer_insn_in_id_o = (deassert_we_i) ? BRANCH_NONE : ctrl_transfer_insn; + + assign ctrl_transfer_insn_in_dec_o = ctrl_transfer_insn; + assign regfile_alu_we_dec_o = regfile_alu_we; + +// endmodule // cv32e41p_decoder + + +endmodule + diff --git a/rtl/cv32e41p_mult.sv b/rtl/cv32e41p_mult.sv index 98d78c1..34c0c16 100644 --- a/rtl/cv32e41p_mult.sv +++ b/rtl/cv32e41p_mult.sv @@ -370,9 +370,7 @@ module cv32e41p_mult |-> (result_o == (($signed( {{32{op_a_i[31]}}, op_a_i} - ) * { - 32'b0, op_b_i - }) >> 32))); + ) * {32'b0, op_b_i}) >> 32))); // check multiplication result for mulhu assert property ( diff --git a/rtl/cv32e41p_prefetch_buffer.sv b/rtl/cv32e41p_prefetch_buffer.sv index 29a9c9f..bb14bb3 100644 --- a/rtl/cv32e41p_prefetch_buffer.sv +++ b/rtl/cv32e41p_prefetch_buffer.sv @@ -48,8 +48,8 @@ module cv32e41p_prefetch_buffer #( output logic [31:0] instr_addr_o, input logic [31:0] instr_rdata_i, input logic instr_rvalid_i, - input logic instr_err_i, // Not used yet (future addition) - input logic instr_err_pmp_i, // Not used yet (future addition) + input logic instr_err_i, // Not used yet (future addition) + input logic instr_err_pmp_i, // Not used yet (future addition) // Prefetch Buffer Status output logic busy_o @@ -167,15 +167,15 @@ module cv32e41p_prefetch_buffer #( .resp_valid_o(resp_valid), .resp_rdata_o(resp_rdata), - .resp_err_o (resp_err), // Unused for now + .resp_err_o (resp_err), // Unused for now .obi_req_o (instr_req_o), .obi_gnt_i (instr_gnt_i), .obi_addr_o (instr_addr_o), - .obi_we_o (), // Left unconnected on purpose - .obi_be_o (), // Left unconnected on purpose - .obi_wdata_o (), // Left unconnected on purpose - .obi_atop_o (), // Left unconnected on purpose + .obi_we_o (), // Left unconnected on purpose + .obi_be_o (), // Left unconnected on purpose + .obi_wdata_o (), // Left unconnected on purpose + .obi_atop_o (), // Left unconnected on purpose .obi_rdata_i (instr_rdata_i), .obi_rvalid_i(instr_rvalid_i), .obi_err_i (instr_err_i) diff --git a/rtl/cv32e41p_prefetch_controller.sv b/rtl/cv32e41p_prefetch_controller.sv index 9d92b36..367fb35 100644 --- a/rtl/cv32e41p_prefetch_controller.sv +++ b/rtl/cv32e41p_prefetch_controller.sv @@ -47,10 +47,10 @@ module cv32e41p_prefetch_controller #( input logic rst_n, // Fetch stage interface - input logic req_i, // Fetch stage requests instructions - input logic branch_i, // Taken branch + input logic req_i, // Fetch stage requests instructions + input logic branch_i, // Taken branch input logic [31:0] branch_addr_i, // Taken branch address (only valid when branch_i = 1) - output logic busy_o, // Prefetcher busy + output logic busy_o, // Prefetcher busy // HW loop signals input logic hwlp_jump_i, diff --git a/rtl/cv32e41p_sleep_unit.sv b/rtl/cv32e41p_sleep_unit.sv index 5242931..943205c 100644 --- a/rtl/cv32e41p_sleep_unit.sv +++ b/rtl/cv32e41p_sleep_unit.sv @@ -59,8 +59,8 @@ module cv32e41p_sleep_unit #( // Clock, reset interface input logic clk_ungated_i, // Free running clock input logic rst_n, - output logic clk_gated_o, // Gated clock - input logic scan_cg_en_i, // Enable all clock gates for testing + output logic clk_gated_o, // Gated clock + input logic scan_cg_en_i, // Enable all clock gates for testing // Core sleep output logic core_sleep_o, diff --git a/rtl/include/cv32e41p_pkg.sv b/rtl/include/cv32e41p_pkg.sv index a81a16c..8104dc6 100644 --- a/rtl/include/cv32e41p_pkg.sv +++ b/rtl/include/cv32e41p_pkg.sv @@ -70,6 +70,19 @@ package cv32e41p_pkg; parameter REGC_RD = 2'b01; parameter REGC_ZERO = 2'b11; + // Source/Destination register instruction index + parameter REG_S1_MSB = 19; + parameter REG_S1_LSB = 15; + + parameter REG_S2_MSB = 24; + parameter REG_S2_LSB = 20; + + parameter REG_S4_MSB = 31; + parameter REG_S4_LSB = 27; + + parameter REG_D_MSB = 11; + parameter REG_D_LSB = 7; + ////////////////////////////////////////////////////////////////////////////// // _ _ _ _ ___ _ _ // @@ -550,7 +563,8 @@ package cv32e41p_pkg; parameter MVENDORID_BANK = 25'hC; // Number of continuation codes // Machine Architecture ID (https://github.com/riscv/riscv-isa-manual/blob/master/marchid.md) - parameter MARCHID = 32'h4; + // https://github.com/riscv/riscv-isa-manual/pull/818/files + parameter MARCHID = 32'h28; /////////////////////////////////////////////// // ___ ____ ____ _ // @@ -585,17 +599,29 @@ package cv32e41p_pkg; parameter OP_B_BMASK = 3'b100; // immediate b selection - parameter IMMB_I = 4'b0000; - parameter IMMB_S = 4'b0001; - parameter IMMB_U = 4'b0010; - parameter IMMB_PCINCR = 4'b0011; - parameter IMMB_S2 = 4'b0100; - parameter IMMB_S3 = 4'b0101; - parameter IMMB_VS = 4'b0110; - parameter IMMB_VU = 4'b0111; - parameter IMMB_SHUF = 4'b1000; - parameter IMMB_CLIP = 4'b1001; - parameter IMMB_BI = 4'b1011; + parameter IMMB_I = 5'b00000; + parameter IMMB_S = 5'b00001; + parameter IMMB_U = 5'b00010; + parameter IMMB_PCINCR = 5'b00011; + parameter IMMB_S2 = 5'b00100; + parameter IMMB_S3 = 5'b00101; + parameter IMMB_VS = 5'b00110; + parameter IMMB_VU = 5'b00111; + parameter IMMB_SHUF = 5'b01000; + parameter IMMB_CLIP = 5'b01001; + parameter IMMB_BI = 5'b01010; + parameter IMMB_CJAL = 5'b01011; + parameter IMMB_CSPN = 5'b01100; + parameter IMMB_CFLDSP = 5'b01101; + parameter IMMB_CADDI = 5'b01110; + parameter IMMB_CLWSP = 5'b01111; + parameter IMMB_CLD = 5'b10000; + parameter IMMB_CSWSP = 5'b10001; + parameter IMMB_FSDP = 5'b10010; + parameter IMMB_CLW = 5'b10011; + parameter IMMB_CSRLI = 5'b10100; + parameter IMMB_CANDI = 5'b10101; + parameter IMMB_CLUI = 5'b10110; // bit mask selection parameter BMASK_A_ZERO = 1'b0; @@ -628,9 +654,12 @@ package cv32e41p_pkg; parameter BRANCH_COND = 2'b11; // conditional branches // jump target mux - parameter JT_JAL = 2'b01; - parameter JT_JALR = 2'b10; - parameter JT_COND = 2'b11; + parameter JT_JAL = 3'b001; + parameter JT_JALR = 3'b010; + parameter JT_COND = 3'b011; + parameter JT_CJAL = 3'b100; + parameter JT_CCOND = 3'b101; + parameter JT_CJALR = 3'b110; // Atomic operations parameter AMO_LR = 5'b00010;