Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

refactor(frontend): prune the least significant bit of PC #3949

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/main/scala/xiangshan/Bundle.scala
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,7 @@ class PredictorAnswer(implicit p: Parameters) extends XSBundle {

class CfiUpdateInfo(implicit p: Parameters) extends XSBundle with HasBPUParameter {
// from backend
//TODO: This should be PrunedAddr. It remains UInt now because modifications are needed in backend.
val pc = UInt(VAddrBits.W)
// frontend -> backend -> frontend
val pd = new PreDecodeInfo
Expand Down Expand Up @@ -133,7 +134,7 @@ class CfiUpdateInfo(implicit p: Parameters) extends XSBundle with HasBPUParamete
this.TOSW := entry.TOSW
this.TOSR := entry.TOSR
this.NOS := entry.NOS
this.topAddr := entry.topAddr
this.topAddr := entry.topAddr.toUInt
this
}

Expand Down
3 changes: 2 additions & 1 deletion src/main/scala/xiangshan/XSCore.scala
Original file line number Diff line number Diff line change
Expand Up @@ -231,7 +231,8 @@ class XSCoreImp(outer: XSCoreBase) extends LazyModuleImp(outer)

// top-down info
memBlock.io.debugTopDown.robHeadVaddr := backend.io.debugTopDown.fromRob.robHeadVaddr
frontend.io.debugTopDown.robHeadVaddr := backend.io.debugTopDown.fromRob.robHeadVaddr
frontend.io.debugTopDown.robHeadVaddr.bits := backend.io.debugTopDown.fromRob.robHeadVaddr.bits
frontend.io.debugTopDown.robHeadVaddr.valid := backend.io.debugTopDown.fromRob.robHeadVaddr.valid
io.debugTopDown.robHeadPaddr := backend.io.debugTopDown.fromRob.robHeadPaddr
io.debugTopDown.robTrueCommit := backend.io.debugRolling.robTrueCommit
backend.io.debugTopDown.fromCore.l2MissMatch := io.debugTopDown.l2MissMatch
Expand Down
18 changes: 9 additions & 9 deletions src/main/scala/xiangshan/backend/CtrlBlock.scala
Original file line number Diff line number Diff line change
Expand Up @@ -110,7 +110,7 @@ class CtrlBlockImp(

pcMem.io.ren.get(pcMemRdIndexes("robFlush").head) := s0_robFlushRedirect.valid
pcMem.io.raddr(pcMemRdIndexes("robFlush").head) := s0_robFlushRedirect.bits.ftqIdx.value
private val s1_robFlushPc = pcMem.io.rdata(pcMemRdIndexes("robFlush").head).startAddr + (RegEnable(s0_robFlushRedirect.bits.ftqOffset, s0_robFlushRedirect.valid) << instOffsetBits)
private val s1_robFlushPc = pcMem.io.rdata(pcMemRdIndexes("robFlush").head).startAddr.toUInt + (RegEnable(s0_robFlushRedirect.bits.ftqOffset, s0_robFlushRedirect.valid) << instOffsetBits)
private val s3_redirectGen = redirectGen.io.stage2Redirect
private val s1_s3_redirect = Mux(s1_robFlushRedirect.valid, s1_robFlushRedirect, s3_redirectGen)
private val s2_s4_pendingRedirectValid = RegInit(false.B)
Expand Down Expand Up @@ -217,15 +217,15 @@ class CtrlBlockImp(
pcMem.io.raddr(pcMemRdIndexes("redirect").head) := memViolation.bits.ftqIdx.value
pcMem.io.ren.get(pcMemRdIndexes("memPred").head) := memViolation.valid
pcMem.io.raddr(pcMemRdIndexes("memPred").head) := memViolation.bits.stFtqIdx.value
redirectGen.io.memPredPcRead.data := pcMem.io.rdata(pcMemRdIndexes("memPred").head).startAddr + (RegEnable(memViolation.bits.stFtqOffset, memViolation.valid) << instOffsetBits)
redirectGen.io.memPredPcRead.data := pcMem.io.rdata(pcMemRdIndexes("memPred").head).startAddr.toUInt + (RegEnable(memViolation.bits.stFtqOffset, memViolation.valid) << instOffsetBits)

for ((pcMemIdx, i) <- pcMemRdIndexes("bjuPc").zipWithIndex) {
val ren = io.toDataPath.pcToDataPathIO.fromDataPathValid(i)
val raddr = io.toDataPath.pcToDataPathIO.fromDataPathFtqPtr(i).value
val roffset = io.toDataPath.pcToDataPathIO.fromDataPathFtqOffset(i)
pcMem.io.ren.get(pcMemIdx) := ren
pcMem.io.raddr(pcMemIdx) := raddr
io.toDataPath.pcToDataPathIO.toDataPathPC(i) := pcMem.io.rdata(pcMemIdx).startAddr
io.toDataPath.pcToDataPathIO.toDataPathPC(i) := pcMem.io.rdata(pcMemIdx).startAddr.toUInt
}

val newestEn = RegNext(io.frontend.fromFtq.newest_entry_en)
Expand All @@ -239,7 +239,7 @@ class CtrlBlockImp(
pcMem.io.ren.get(pcMemIdx) := ren
pcMem.io.raddr(pcMemIdx) := raddr
val needNewest = RegNext(baseAddr === newestPtr.value)
io.toDataPath.pcToDataPathIO.toDataPathTargetPC(i) := Mux(needNewest, newestTargetNext, pcMem.io.rdata(pcMemIdx).startAddr)
io.toDataPath.pcToDataPathIO.toDataPathTargetPC(i) := Mux(needNewest, newestTargetNext, pcMem.io.rdata(pcMemIdx).startAddr.toUInt)
}

val baseIdx = params.BrhCnt
Expand All @@ -250,21 +250,21 @@ class CtrlBlockImp(
val roffset = io.toDataPath.pcToDataPathIO.fromDataPathFtqOffset(baseIdx+i)
pcMem.io.ren.get(pcMemIdx) := ren
pcMem.io.raddr(pcMemIdx) := raddr
io.toDataPath.pcToDataPathIO.toDataPathPC(baseIdx+i) := pcMem.io.rdata(pcMemIdx).startAddr
io.toDataPath.pcToDataPathIO.toDataPathPC(baseIdx+i) := pcMem.io.rdata(pcMemIdx).startAddr.toUInt
}

for ((pcMemIdx, i) <- pcMemRdIndexes("hybrid").zipWithIndex) {
// load read pcMem (s0) -> get rdata (s1) -> reg next in Memblock (s2) -> reg next in Memblock (s3) -> consumed by pf (s3)
pcMem.io.ren.get(pcMemIdx) := io.memHyPcRead(i).valid
pcMem.io.raddr(pcMemIdx) := io.memHyPcRead(i).ptr.value
io.memHyPcRead(i).data := pcMem.io.rdata(pcMemIdx).startAddr + (RegEnable(io.memHyPcRead(i).offset, io.memHyPcRead(i).valid) << instOffsetBits)
io.memHyPcRead(i).data := pcMem.io.rdata(pcMemIdx).startAddr.toUInt + (RegEnable(io.memHyPcRead(i).offset, io.memHyPcRead(i).valid) << instOffsetBits)
}

if (EnableStorePrefetchSMS) {
for ((pcMemIdx, i) <- pcMemRdIndexes("store").zipWithIndex) {
pcMem.io.ren.get(pcMemIdx) := io.memStPcRead(i).valid
pcMem.io.raddr(pcMemIdx) := io.memStPcRead(i).ptr.value
io.memStPcRead(i).data := pcMem.io.rdata(pcMemIdx).startAddr + (RegEnable(io.memStPcRead(i).offset, io.memStPcRead(i).valid) << instOffsetBits)
io.memStPcRead(i).data := pcMem.io.rdata(pcMemIdx).startAddr.toUInt + (RegEnable(io.memStPcRead(i).offset, io.memStPcRead(i).valid) << instOffsetBits)
}
} else {
io.memStPcRead.foreach(_.data := 0.U)
Expand All @@ -283,7 +283,7 @@ class CtrlBlockImp(
val traceValid = trace.toPcMem.blocks(i).valid
pcMem.io.ren.get(pcMemIdx) := traceValid
pcMem.io.raddr(pcMemIdx) := trace.toPcMem.blocks(i).bits.ftqIdx.get.value
tracePcStart(i) := pcMem.io.rdata(pcMemIdx).startAddr
tracePcStart(i) := pcMem.io.rdata(pcMemIdx).startAddr.toUInt
}

// Trap/Xret only occur in block(0).
Expand Down Expand Up @@ -316,7 +316,7 @@ class CtrlBlockImp(
redirectGen.io.loadReplay <> loadReplay
val loadRedirectOffset = Mux(memViolation.bits.flushItself(), 0.U, Mux(memViolation.bits.isRVC, 2.U, 4.U))
val loadRedirectPcFtqOffset = RegEnable((memViolation.bits.ftqOffset << instOffsetBits).asUInt +& loadRedirectOffset, memViolation.valid)
val loadRedirectPcRead = pcMem.io.rdata(pcMemRdIndexes("redirect").head).startAddr + loadRedirectPcFtqOffset
val loadRedirectPcRead = pcMem.io.rdata(pcMemRdIndexes("redirect").head).startAddr.toUInt + loadRedirectPcFtqOffset

redirectGen.io.loadReplay.bits.cfiUpdate.pc := loadRedirectPcRead
val load_target = loadRedirectPcRead
Expand Down
3 changes: 3 additions & 0 deletions src/main/scala/xiangshan/cache/L1Cache.scala
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ import org.chipsalliance.cde.config.Parameters
import chisel3._
import chisel3.util._
import xiangshan.{HasXSParameter, XSBundle, XSModule}
import xiangshan.frontend.PrunedAddr

// this file contains common building blocks that can be shared by ICache and DCache
// this is the common parameter base for L1 ICache and L1 DCache
Expand Down Expand Up @@ -79,9 +80,11 @@ trait HasL1CacheParameters extends HasXSParameter
def refillWords = refillBytes / wordBytes

def get_phy_tag(paddr: UInt) = (paddr >> pgUntagBits).asUInt
def get_phy_tag(paddr: PrunedAddr): UInt = (paddr >> pgUntagBits).asUInt
def get_vir_tag(vaddr: UInt) = (vaddr >> untagBits).asUInt
def get_tag(addr: UInt) = get_phy_tag(addr)
def get_idx(addr: UInt) = addr(untagBits-1, blockOffBits)
def get_idx(addr: PrunedAddr) = addr(untagBits-1, blockOffBits)
def get_untag(addr: UInt) = addr(pgUntagBits-1, 0)
def get_block(addr: UInt) = addr >> blockOffBits
def get_block_addr(addr: UInt) = (addr >> blockOffBits) << blockOffBits
Expand Down
52 changes: 28 additions & 24 deletions src/main/scala/xiangshan/frontend/BPU.scala
Original file line number Diff line number Diff line change
Expand Up @@ -107,9 +107,9 @@ trait BPUUtils extends HasXSParameter {
)
}

def getFallThroughAddr(start: UInt, carry: Bool, pft: UInt) = {
val higher = start.head(VAddrBits - log2Ceil(PredictWidth) - instOffsetBits)
Cat(Mux(carry, higher + 1.U, higher), pft, 0.U(instOffsetBits.W))
def getFallThroughAddr(start: PrunedAddr, carry: Bool, pft: UInt): PrunedAddr = {
val higher = start(VAddrBits - 1, log2Ceil(PredictWidth) + instOffsetBits)
PrunedAddrInit(Cat(Mux(carry, higher + 1.U, higher), pft, 0.U(instOffsetBits.W)))
}

def foldTag(tag: UInt, l: Int): UInt = {
Expand All @@ -122,7 +122,7 @@ trait BPUUtils extends HasXSParameter {
class BasePredictorInput(implicit p: Parameters) extends XSBundle with HasBPUConst {
def nInputs = 1

val s0_pc = Vec(numDup, UInt(VAddrBits.W))
val s0_pc = Vec(numDup, PrunedAddr(VAddrBits))

val folded_hist = Vec(numDup, new AllFoldedHistories(foldedGHistInfos))
val s1_folded_hist = Vec(numDup, new AllFoldedHistories(foldedGHistInfos))
Expand All @@ -139,11 +139,11 @@ class BasePredictorInput(implicit p: Parameters) extends XSBundle with HasBPUCon
class BasePredictorOutput(implicit p: Parameters) extends BranchPredictionResp {}

class BasePredictorIO(implicit p: Parameters) extends XSBundle with HasBPUConst {
val reset_vector = Input(UInt(PAddrBits.W))
val reset_vector = Input(PrunedAddr(PAddrBits))
val in = Flipped(DecoupledIO(new BasePredictorInput)) // TODO: Remove DecoupledIO
// val out = DecoupledIO(new BasePredictorOutput)
val out = Output(new BasePredictorOutput)
// val flush_out = Valid(UInt(VAddrBits.W))
// val flush_out = Valid(PrunedAddr(VAddrBits))

val fauftb_entry_in = Input(new FTBEntry)
val fauftb_entry_hit_in = Input(Bool())
Expand Down Expand Up @@ -191,20 +191,24 @@ abstract class BasePredictor(implicit p: Parameters) extends XSModule

val s0_pc_dup = WireInit(io.in.bits.s0_pc) // fetchIdx(io.f0_pc)
val s1_pc_dup = s0_pc_dup.zip(io.s0_fire).map { case (s0_pc, s0_fire) => RegEnable(s0_pc, s0_fire) }
// FIXME: Potential problems here. Ideally, we should modify SegmentedAddrNext to support PrunedAddr.
// However, we are not doing this now because SegmentedAddrNext is in submodule.
// This should be fixed after PrunedAddr is merged to master.
// TODO: Check the generated Verilog whether this works or not
val s2_pc_dup = s1_pc_dup.zip(io.s1_fire).map { case (s1_pc, s1_fire) =>
SegmentedAddrNext(s1_pc, pcSegments, s1_fire, Some("s2_pc"))
PrunedAddrInit(SegmentedAddrNext(s1_pc.toUInt, pcSegments, s1_fire, Some("s2_pc")).getAddr())
}
val s3_pc_dup = s2_pc_dup.zip(io.s2_fire).map { case (s2_pc, s2_fire) =>
SegmentedAddrNext(s2_pc, s2_fire, Some("s3_pc"))
PrunedAddrInit(SegmentedAddrNext(s2_pc.toUInt, pcSegments, s2_fire, Some("s3_pc")).getAddr())
}

when(RegNext(RegNext(reset.asBool) && !reset.asBool)) {
s1_pc_dup.map { case s1_pc => s1_pc := io.reset_vector }
}

io.out.s1.pc := s1_pc_dup
io.out.s2.pc := s2_pc_dup.map(_.getAddr())
io.out.s3.pc := s3_pc_dup.map(_.getAddr())
io.out.s2.pc := s2_pc_dup
io.out.s3.pc := s3_pc_dup

val perfEvents: Seq[(String, UInt)] = Seq()

Expand All @@ -225,7 +229,7 @@ class PredictorIO(implicit p: Parameters) extends XSBundle {
val bpu_to_ftq = new BpuToFtqIO()
val ftq_to_bpu = Flipped(new FtqToBpuIO)
val ctrl = Input(new BPUCtrl)
val reset_vector = Input(UInt(PAddrBits.W))
val reset_vector = Input(PrunedAddr(PAddrBits))
}

class Predictor(implicit p: Parameters) extends XSModule with HasBPUConst with HasPerfEvents
Expand Down Expand Up @@ -279,7 +283,7 @@ class Predictor(implicit p: Parameters) extends XSModule with HasBPUConst with H
val s1_ready_dup, s2_ready_dup, s3_ready_dup = dup_wire(Bool())
val s1_components_ready_dup, s2_components_ready_dup, s3_components_ready_dup = dup_wire(Bool())

val s0_pc_dup = dup(WireInit(0.U.asTypeOf(UInt(VAddrBits.W))))
val s0_pc_dup = dup(WireInit(0.U.asTypeOf(PrunedAddr(VAddrBits))))
val s0_pc_reg_dup = s0_pc_dup.zip(s0_stall_dup).map { case (s0_pc, s0_stall) => RegEnable(s0_pc, !s0_stall) }
when(RegNext(RegNext(reset.asBool) && !reset.asBool)) {
s0_pc_reg_dup.map { case s0_pc => s0_pc := io.reset_vector }
Expand Down Expand Up @@ -315,7 +319,7 @@ class Predictor(implicit p: Parameters) extends XSModule with HasBPUConst with H
val s3_ahead_fh_oldest_bits_dup =
RegEnable(s2_ahead_fh_oldest_bits_dup, 0.U.asTypeOf(s0_ahead_fh_oldest_bits_dup), s2_fire_dup(1))

val npcGen_dup = Seq.tabulate(numDup)(n => new PhyPriorityMuxGenerator[UInt])
val npcGen_dup = Seq.tabulate(numDup)(n => new PhyPriorityMuxGenerator[PrunedAddr])
val foldedGhGen_dup = Seq.tabulate(numDup)(n => new PhyPriorityMuxGenerator[AllFoldedHistories])
val ghistPtrGen_dup = Seq.tabulate(numDup)(n => new PhyPriorityMuxGenerator[CGHPtr])
val lastBrNumOHGen_dup = Seq.tabulate(numDup)(n => new PhyPriorityMuxGenerator[UInt])
Expand Down Expand Up @@ -587,7 +591,7 @@ class Predictor(implicit p: Parameters) extends XSModule with HasBPUConst with H

class PreviousPredInfo extends Bundle {
val hit = Vec(numDup, Bool())
val target = Vec(numDup, UInt(VAddrBits.W))
val target = Vec(numDup, PrunedAddr(VAddrBits))
val lastBrPosOH = Vec(numDup, Vec(numBr + 1, Bool()))
val taken = Vec(numDup, Bool())
val takenMask = Vec(numDup, Vec(numBr, Bool()))
Expand Down Expand Up @@ -896,12 +900,12 @@ class Predictor(implicit p: Parameters) extends XSModule with HasBPUConst with H
predictors.io.update := io.ftq_to_bpu.update
predictors.io.update.bits.ghist := getHist(io.ftq_to_bpu.update.bits.spec_info.histPtr)
// Move the update pc registers out of predictors.
predictors.io.update.bits.pc := SegmentedAddrNext(
io.ftq_to_bpu.update.bits.pc,
predictors.io.update.bits.pc := PrunedAddrInit(SegmentedAddrNext(
io.ftq_to_bpu.update.bits.pc.toUInt,
pcSegments,
io.ftq_to_bpu.update.valid,
Some("predictors_io_update_pc")
).getAddr()
).getAddr())

val redirect_dup = do_redirect_dup.map(_.bits)
predictors.io.redirect := do_redirect_dup(0)
Expand Down Expand Up @@ -1027,7 +1031,7 @@ class Predictor(implicit p: Parameters) extends XSModule with HasBPUConst with H

// val updatedGh = oldGh.update(shift, taken && addIntoHist)
for ((npcGen, do_redirect) <- npcGen_dup zip do_redirect_dup)
npcGen.register(do_redirect.valid, do_redirect.bits.cfiUpdate.target, Some("redirect_target"), 2)
npcGen.register(do_redirect.valid, PrunedAddrInit(do_redirect.bits.cfiUpdate.target), Some("redirect_target"), 2)
for (((foldedGhGen, do_redirect), updated_fh) <- foldedGhGen_dup zip do_redirect_dup zip updated_fh_dup)
foldedGhGen.register(do_redirect.valid, updated_fh, Some("redirect_FGHT"), 2)
for (((ghistPtrGen, do_redirect), updated_ptr) <- ghistPtrGen_dup zip do_redirect_dup zip updated_ptr_dup)
Expand Down Expand Up @@ -1163,15 +1167,15 @@ class Predictor(implicit p: Parameters) extends XSModule with HasBPUConst with H
XSDebug(io.ftq_to_bpu.update.valid, p"Update from ftq\n")
XSDebug(io.ftq_to_bpu.redirect.valid, p"Redirect from ftq\n")

XSDebug("[BP0] fire=%d pc=%x\n", s0_fire_dup(0), s0_pc_dup(0))
XSDebug("[BP0] fire=%d pc=%x\n", s0_fire_dup(0), s0_pc_dup(0).toUInt)
XSDebug(
"[BP1] v=%d r=%d cr=%d fire=%d flush=%d pc=%x\n",
s1_valid_dup(0),
s1_ready_dup(0),
s1_components_ready_dup(0),
s1_fire_dup(0),
s1_flush_dup(0),
s1_pc
s1_pc.toUInt
)
XSDebug(
"[BP2] v=%d r=%d cr=%d fire=%d redirect=%d flush=%d pc=%x\n",
Expand All @@ -1181,7 +1185,7 @@ class Predictor(implicit p: Parameters) extends XSModule with HasBPUConst with H
s2_fire_dup(0),
s2_redirect_dup(0),
s2_flush_dup(0),
s2_pc
s2_pc.toUInt
)
XSDebug(
"[BP3] v=%d r=%d cr=%d fire=%d redirect=%d flush=%d pc=%x\n",
Expand All @@ -1191,11 +1195,11 @@ class Predictor(implicit p: Parameters) extends XSModule with HasBPUConst with H
s3_fire_dup(0),
s3_redirect_dup(0),
s3_flush_dup(0),
s3_pc
s3_pc.toUInt
)
XSDebug("[FTQ] ready=%d\n", io.bpu_to_ftq.resp.ready)
XSDebug("resp.s1.target=%x\n", resp.s1.getTarget(0))
XSDebug("resp.s2.target=%x\n", resp.s2.getTarget(0))
XSDebug("resp.s1.target=%x\n", resp.s1.getTarget(0).toUInt)
XSDebug("resp.s2.target=%x\n", resp.s2.getTarget(0).toUInt)
// XSDebug("s0_ghist: %b\n", s0_ghist.predHist)
// XSDebug("s1_ghist: %b\n", s1_ghist.predHist)
// XSDebug("s2_ghist: %b\n", s2_ghist.predHist)
Expand Down
Loading
Loading