diff --git a/src/src/ComplexCoreCpuTop.scala b/src/src/ComplexCoreCpuTop.scala index 77a7039c..85ed758f 100644 --- a/src/src/ComplexCoreCpuTop.scala +++ b/src/src/ComplexCoreCpuTop.scala @@ -179,6 +179,8 @@ class ComplexCoreCpuTop extends Module { case (dst, src) => dst := src } + frontend.io.commitFixBranch := false.B + frontend.io.commitFixId := 0.U // Instruction queue instQueue.io.enqueuePort <> frontend.io.instDequeuePort diff --git a/src/src/SimpleCoreCpuTop.scala b/src/src/SimpleCoreCpuTop.scala index a1d92a52..bf1ee20f 100644 --- a/src/src/SimpleCoreCpuTop.scala +++ b/src/src/SimpleCoreCpuTop.scala @@ -1,5 +1,4 @@ import pipeline.simple.MainExeStage -import pipeline.simple.id.IssueStage import axi.Axi3x1Crossbar import axi.bundles.AxiMasterInterface import chisel3._ @@ -189,8 +188,13 @@ class SimpleCoreCpuTop extends Module { // TODO: Connect frontend frontend.io.exeFtqPort.queryPcBundle <> issueQueue.io.queryPcPort - frontend.io.exeFtqPort.commitBundle := mainExeStage.io.peer.get.feedbackFtq - frontend.io.commitFtqTrainPort := addrTransStage.io.peer.get.commitFtqPort + frontend.io.exeFtqPort.feedBack := mainExeStage.io.peer.get.feedbackFtq + val commitFtqPort = + if (isNoPrivilege) mainExeStage.io.peer.get.commitFtqPort + else addrTransStage.io.peer.get.commitFtqPort + frontend.io.commitFtqTrainPort := commitFtqPort + frontend.io.commitFixBranch := false.B + frontend.io.commitFixId := 0.U connectVec(frontend.io.commitBitMask, cu.io.commitBitMask) // Instruction queue diff --git a/src/src/frontend/FetchTargetQueue.scala b/src/src/frontend/FetchTargetQueue.scala index 815ead19..b0f76ae4 100644 --- a/src/src/frontend/FetchTargetQueue.scala +++ b/src/src/frontend/FetchTargetQueue.scala @@ -17,10 +17,11 @@ class FetchTargetQueue( val io = IO(new Bundle { // <-> Frontend flush control - val backendFlush = Input(Bool()) - val backendFlushFtqId = Input(UInt(ptrWidth.W)) - val instFetchFlush = Input(Bool()) - val instFetchFtqId = Input(UInt(ptrWidth.W)) + val backendFlush = Input(Bool()) + val backendFlushFtqId = Input(UInt(ptrWidth.W)) + val preDecoderFlush = Input(Bool()) + val preDecoderFtqId = Input(UInt(ptrWidth.W)) + val preDecoderBranchTaken = Input(Bool()) // <-> BPU val bpuFtqPort = new BpuFtqPort @@ -116,9 +117,9 @@ class FetchTargetQueue( } // if IF predecoder found a redirect - when(io.instFetchFlush) { - nextIfPtr := io.instFetchFtqId + 1.U - bpuPtr := io.instFetchFtqId + 1.U + when(io.preDecoderFlush) { + nextIfPtr := io.preDecoderFtqId + 1.U + bpuPtr := io.preDecoderFtqId + 1.U } // if backend redirect triggered,back to the next block of the redirect block // backend may continue to commit older block (flush before exeStage inst;commit after exeStage inst) @@ -187,7 +188,16 @@ class FetchTargetQueue( io.bpuFtqPort.ftqFull := queueFull // training meta to BPU - io.bpuFtqPort.ftqBpuTrainMeta := FtqBpuMetaPort.default + io.bpuFtqPort.ftqBpuTrainMeta := FtqBpuMetaPort.default + io.bpuFtqPort.ftqBpuTrainMeta.ghrUpdateSignalBundle.exeFixBundle := io.exeFtqPort.feedBack.fixGhrBundle + io.bpuFtqPort.ftqBpuTrainMeta.ghrUpdateSignalBundle.isPredecoderFixGhr := io.preDecoderFlush + io.bpuFtqPort.ftqBpuTrainMeta.ghrUpdateSignalBundle.isPredecoderBranchTaken := io.preDecoderBranchTaken + io.bpuFtqPort.ftqBpuTrainMeta.tageGhrInfo := + Mux( + io.backendFlush, + ftqBpuMetaRegs(io.backendFlushFtqId).tageQueryMeta.tageGhrInfo, + ftqBpuMetaRegs(io.preDecoderFtqId).tageQueryMeta.tageGhrInfo + ) // when( // io.cuCommitFtqPort.blockBitmask(0) & io.cuCommitFtqPort.meta.isBranch // ) { @@ -204,7 +214,7 @@ class FetchTargetQueue( io.bpuFtqPort.ftqBpuTrainMeta.branchAddrBundle.startPc := ftqVecReg(commitFtqId).startPc io.bpuFtqPort.ftqBpuTrainMeta.isCrossCacheline := ftqVecReg(commitFtqId).isCrossCacheline - io.bpuFtqPort.ftqBpuTrainMeta.tageOriginMeta := ftqBpuMetaRegs(commitFtqId).tageQueryMeta + io.bpuFtqPort.ftqBpuTrainMeta.tageOriginMeta := ftqBpuMetaRegs(commitFtqId).tageQueryMeta io.bpuFtqPort.ftqBpuTrainMeta.branchAddrBundle.jumpTargetAddr := ftqBranchMetaRegs(commitFtqId).jumpTargetAddr io.bpuFtqPort.ftqBpuTrainMeta.branchAddrBundle.fallThroughAddr := ftqBranchMetaRegs(commitFtqId).fallThroughAddr @@ -231,6 +241,7 @@ class FetchTargetQueue( io.bpuFtqPort.bpuQueryMeta, BpuFtqMetaNdPort.default ) + bpuMetaWriteEntry.tageQueryMeta.tageGhrInfo := io.bpuFtqPort.bpuQueryMeta.tageQueryMeta.tageGhrInfo // when(io.bpuFtqPort.ftqP1.isValid & ~mainBpuRedirectDelay) { // bpuMetaWriteValid := true.B // bpuMetaWritePtr := bpuPtr - 1.U @@ -259,16 +270,19 @@ class FetchTargetQueue( when(bpuMetaWriteValid) { ftqBpuMetaRegs(bpuMetaWritePtr) := bpuMetaWriteEntry } + when(io.preDecoderFlush) { + ftqBpuMetaRegs(io.preDecoderFtqId).tageQueryMeta.tageGhrInfo := io.bpuFtqPort.bpuQueryMeta.tageQueryMeta.tageGhrInfo + } // update pc from backend - when(io.exeFtqPort.commitBundle.ftqMetaUpdateValid) { - val ftqUpdateMetaId = WireDefault(io.exeFtqPort.commitBundle.ftqUpdateMetaId) + when(io.exeFtqPort.feedBack.commitBundle.ftqMetaUpdateValid) { + val ftqUpdateMetaId = WireDefault(io.exeFtqPort.feedBack.commitBundle.ftqUpdateMetaId) ftqBranchMetaRegs( ftqUpdateMetaId - ).jumpTargetAddr := io.exeFtqPort.commitBundle.ftqMetaUpdateJumpTarget + ).jumpTargetAddr := io.exeFtqPort.feedBack.commitBundle.ftqMetaUpdateJumpTarget ftqBranchMetaRegs( ftqUpdateMetaId - ).fallThroughAddr := io.exeFtqPort.commitBundle.ftqMetaUpdateFallThrough - ftqBranchMetaRegs(ftqUpdateMetaId).ftbDirty := io.exeFtqPort.commitBundle.ftqMetaUpdateFtbDirty + ).fallThroughAddr := io.exeFtqPort.feedBack.commitBundle.ftqMetaUpdateFallThrough + ftqBranchMetaRegs(ftqUpdateMetaId).ftbDirty := io.exeFtqPort.feedBack.commitBundle.ftqMetaUpdateFtbDirty } } diff --git a/src/src/frontend/Frontend.scala b/src/src/frontend/Frontend.scala index d28b5bfb..107f343a 100644 --- a/src/src/frontend/Frontend.scala +++ b/src/src/frontend/Frontend.scala @@ -23,6 +23,8 @@ class Frontend extends Module { // ftq <-> cu val commitFtqTrainPort = Input(new CommitFtqTrainNdPort) val commitBitMask = Input(Vec(Param.commitNum, Bool())) + val commitFixBranch = Input(Bool()) + val commitFixId = Input(UInt(Param.BPU.ftqPtrWidth.W)) // instFetch <-> ICache val accessPort = Flipped(new ICacheAccessPort) @@ -66,10 +68,11 @@ class Frontend extends Module { // fetch Target Pc queue; // stage 1 // act as a fetch buffer - ftq.io.backendFlush := io.isFlush - ftq.io.backendFlushFtqId := io.ftqFlushId - ftq.io.instFetchFlush := instFetch.io.preDecodeRedirectPort.predecodeRedirect // TODO add predecoder stage - ftq.io.instFetchFtqId := instFetch.io.preDecodeRedirectPort.redirectFtqId + ftq.io.backendFlush := io.isFlush + ftq.io.backendFlushFtqId := io.ftqFlushId + ftq.io.preDecoderFlush := instFetch.io.preDecodeRedirectPort.predecodeRedirect // TODO add predecoder stage + ftq.io.preDecoderFtqId := instFetch.io.preDecodeRedirectPort.redirectFtqId + ftq.io.preDecoderBranchTaken := instFetch.io.preDecodeRedirectPort.predecoderBranch instFetch.io.preDecodeRedirectPort.commitRasPort := ftq.io.ftqRasPort ftq.io.commitFtqTrainPort := io.commitFtqTrainPort ftq.io.exeFtqPort <> io.exeFtqPort diff --git a/src/src/frontend/bpu/BPU.scala b/src/src/frontend/bpu/BPU.scala index 076ea2ce..79c507ce 100644 --- a/src/src/frontend/bpu/BPU.scala +++ b/src/src/frontend/bpu/BPU.scala @@ -5,8 +5,8 @@ import chisel3.util._ import frontend.bpu.bundles._ import frontend.bpu.components.Bundles.{FtbEntryNdPort, TageMetaPort} import frontend.bpu.components.FTB -import frontend.bundles.{BpuFtqPort, FtqBlockBundle} -import spec.Param.BPU.BranchType +import frontend.bundles.{BpuFtqPort, FtqBlockBundle, GhrUpdateSignalBundle} +import spec.Param.BPU.{BranchType, GhrFixType} import spec._ // BPU is the Branch Predicting Unit @@ -144,7 +144,8 @@ class BPU( io.bpuRedirectPc.bits := ftbEntry.jumpTargetAddr } is(Param.BPU.BranchType.ret) { - // return inst is predict in pre decode Stage + // return inst is predict in preDecode Stage; + // when preDecode predict error,use ftb to predict } } @@ -200,6 +201,28 @@ class BPU( ftbUpdateEntry.jumpTargetAddr := io.bpuFtqPort.ftqBpuTrainMeta.branchAddrBundle.jumpTargetAddr ftbUpdateEntry.fallThroughAddr := io.bpuFtqPort.ftqBpuTrainMeta.branchAddrBundle.fallThroughAddr + // global branch history update logic + val ghrFixBundle = Wire(new GhrFixNdBundle) + val ghrUpdateSignalBundle = WireDefault( + io.bpuFtqPort.ftqBpuTrainMeta.ghrUpdateSignalBundle + ) + ghrFixBundle.isFixGhrValid := ghrUpdateSignalBundle.isPredecoderFixGhr || io.backendFlush + ghrFixBundle.isFixBranchTaken := ghrUpdateSignalBundle.exeFixBundle.exeFixIsTaken +// ghrUpdateSignalBundle.isCommitFixGhr + ghrFixBundle.ghrFixType := Mux( + io.backendFlush && !ghrUpdateSignalBundle.exeFixBundle.isExeFixValid && !ghrUpdateSignalBundle.isPredecoderFixGhr, + GhrFixType.commitRecover, + Mux( + ghrUpdateSignalBundle.exeFixBundle.isExeFixValid, + Mux( + ghrUpdateSignalBundle.exeFixBundle.exeFixFirstBrTaken, + GhrFixType.exeUpdateJump, + Mux(ghrUpdateSignalBundle.exeFixBundle.exeFixJumpError, GhrFixType.exeFixJumpError, GhrFixType.exeRecover) + ), + Mux(ghrUpdateSignalBundle.isPredecoderBranchTaken, GhrFixType.decodeUpdateJump, GhrFixType.decodeBrExcp) + ) + ) + // connect fetch target buffer module // assign ftbHit = 0 val ftbModule = Module(new FTB) @@ -220,12 +243,16 @@ class BPU( // connect tage Predictor module val tagePredictorModule = Module(new TagePredictor) - tagePredictorModule.io.pc := io.pc - tageQueryMeta := tagePredictorModule.io.tageQueryMeta - predictTaken := tagePredictorModule.io.predictBranchTaken - predictValid := tagePredictorModule.io.predictValid - tagePredictorModule.io.updatePc := io.bpuFtqPort.ftqBpuTrainMeta.branchAddrBundle.startPc - tagePredictorModule.io.updateInfoPort := tageUpdateInfo + tagePredictorModule.io.pc := io.pc + tageQueryMeta := tagePredictorModule.io.tageQueryMeta + predictTaken := tagePredictorModule.io.predictBranchTaken + predictValid := tagePredictorModule.io.predictValid + tagePredictorModule.io.updatePc := io.bpuFtqPort.ftqBpuTrainMeta.branchAddrBundle.startPc + tagePredictorModule.io.updateInfoPort := tageUpdateInfo + tagePredictorModule.io.ghrUpdateNdBundle.bpuSpecTaken := io.bpuFtqPort.ftqP1.predictTaken // bpu predict info + tagePredictorModule.io.ghrUpdateNdBundle.bpuSpecValid := mainRedirectValid + tagePredictorModule.io.ghrUpdateNdBundle.fixBundle := ghrFixBundle + tagePredictorModule.io.ghrUpdateNdBundle.tageGhrInfo := io.bpuFtqPort.ftqBpuTrainMeta.tageGhrInfo // tagePredictorModule.io.perfTagHitCounters <> DontCare } diff --git a/src/src/frontend/bpu/TagePredictor.scala b/src/src/frontend/bpu/TagePredictor.scala index 27224c1b..7f1cbfea 100644 --- a/src/src/frontend/bpu/TagePredictor.scala +++ b/src/src/frontend/bpu/TagePredictor.scala @@ -4,10 +4,24 @@ import chisel3._ import chisel3.util._ import chisel3.util.random.LFSR import frontend.bpu.bundles.{BpuFtqMetaNdPort, TagePredictorUpdateInfoPort} -import frontend.bpu.components.Bundles.TageMetaPort +import frontend.bpu.components.Bundles.{TageGhrInfo, TageMetaPort} import frontend.bpu.components._ +import spec.Param.BPU.GhrFixType import spec._ - +class GhrUpdateNdBundle extends Bundle { + val bpuSpecValid = Bool() // speculative update + val bpuSpecTaken = Bool() + val fixBundle = new GhrFixNdBundle + val tageGhrInfo = new TageGhrInfo() +} +class GhrFixNdBundle extends Bundle { + val isFixGhrValid = Bool() + val ghrFixType = UInt(Param.BPU.GhrFixType.width.W) + val isFixBranchTaken = Bool() +} +object GhrUpdateNdBundle { + def default = 0.U.asTypeOf(new GhrUpdateNdBundle) +} // TAGE predictor // This is the main predictor class TagePredictor( @@ -24,6 +38,7 @@ class TagePredictor( val addrWidth = log2Ceil(addr) val pointerWidth = log2Ceil(entryNum) val tagComPtrWidth = log2Ceil(tagComponentNum + 1) + val phtAddrWidth = log2Ceil(phtDepths(1)) val io = IO(new Bundle { // Query signal val pc = Input(UInt(Width.Reg.data)) @@ -35,6 +50,9 @@ class TagePredictor( val updatePc = Input(UInt(Width.Reg.data)) val updateInfoPort = Input(new TagePredictorUpdateInfoPort) + // globalHistory update info + val ghrUpdateNdBundle = Input(new GhrUpdateNdBundle) + // TODO PMU // val perfTagHitCounters = Output(Vec(32, UInt((tagComponentNum + 1).W))) @@ -69,11 +87,14 @@ class TagePredictor( val queryNewEntryFlag = WireDefault(false.B) // Indicates the provider is new // Meta - val tagCtrbits = WireDefault(VecInit(Seq.fill(tagComponentNum)(0.U(3.W)))) - val tagUsefulbits = WireDefault(VecInit(Seq.fill(tagComponentNum)(0.U(componentUsefulWidth(1).W)))) - val tagQueryTags = WireDefault(VecInit(Seq.fill(tagComponentNum)(0.U(tagComponentTagWidth.W)))) - val tagOriginTags = WireDefault(VecInit(Seq.fill(tagComponentNum)(0.U(tagComponentTagWidth.W)))) - val tagHitIndexs = WireDefault(VecInit(Seq.fill(tagComponentNum)(0.U(10.W)))) + val tagCtrbits = WireDefault(VecInit(Seq.fill(tagComponentNum)(0.U(3.W)))) + val tagUsefulbits = WireDefault(VecInit(Seq.fill(tagComponentNum)(0.U(componentUsefulWidth(1).W)))) + val tagQueryTags = WireDefault(VecInit(Seq.fill(tagComponentNum)(0.U(tagComponentTagWidth.W)))) + val tagOriginTags = WireDefault(VecInit(Seq.fill(tagComponentNum)(0.U(tagComponentTagWidth.W)))) + val tagHitIndexs = WireDefault(VecInit(Seq.fill(tagComponentNum)(0.U(10.W)))) + val tagGhtHashs = WireDefault(VecInit(Seq.fill(tagComponentNum)(0.U(phtAddrWidth.W)))) + val tagTagHashCsr1s = WireDefault(VecInit(Seq.fill(tagComponentNum)(0.U(tagComponentTagWidth.W)))) + val tagTagHashCsr2s = WireDefault(VecInit(Seq.fill(tagComponentNum)(0.U((tagComponentTagWidth - 1).W)))) // update val updatePc = WireDefault(0.U(Width.Reg.data)) @@ -114,13 +135,104 @@ class TagePredictor( //////////////////////////////////////////////////////////////////////////////////////////// // END of Defines //////////////////////////////////////////////////////////////////////////////////////////// + // Global History Register speculative update logic + //////////////////////////////////////////////////////////////////////////////////////////// + // predict queue + val specPtr = RegInit(0.U(Param.BPU.TagePredictor.ghrPtrWidth.W)) + val nextSpecPtr = Wire(UInt(Param.BPU.TagePredictor.ghrPtrWidth.W)) + val commitPtr = dontTouch(RegInit(0.U(Param.BPU.TagePredictor.ghrPtrWidth.W))) + val checkPtr = WireDefault(0.U(Param.BPU.TagePredictor.ghrPtrWidth.W)) + val checkDepth = Wire(UInt(Param.BPU.TagePredictor.ghrPtrWidth.W)) + checkPtr := io.ghrUpdateNdBundle.tageGhrInfo.checkPtr + checkDepth := checkPtr - commitPtr // calculate the location of the correct old history // Global History Register - val ghr = RegInit(0.U(ghrDepth.W)) + val speculativeHistoryReg = RegInit(VecInit(Seq.fill(ghrDepth)(false.B))) + val nextGlobalHistory = Wire(Vec(ghrDepth, Bool())) + val shiftedGlobalHistory = dontTouch(Wire(UInt(ghrDepth.W))) + // default nextGhr keep value; ghr = RegNext(nextGhr) (assign in the next clock) + nextGlobalHistory := speculativeHistoryReg + speculativeHistoryReg := nextGlobalHistory + nextSpecPtr := specPtr + specPtr := nextSpecPtr + + // signal that indicates how to fix globalHistory Hash value + val isExeFixCsr = WireDefault(false.B) + val isPredecodeFixCsr = WireDefault(false.B) + val isRecoverCsr = WireDefault(false.B) +// val originHash = Input(UInt(outputLength.W)) + + isGlobalHistoryUpdateReg := isUpdateValid + + // speculative Global history update + // 1. when ftb hit and predict ,update ghr with predictBranchTaken, specPtr + 1 + // 2. when inst commit, commitPtr + 1 + // fix error global history + // 1. when check a direction error, Cat the correct old history and actually taken as fixed history,specPtr keep as checkPtr + // 2. when check a branch inst actually is taken without prediction, update the history with checkPoint,specPtr = 1 + checkPtr + // 3. when detect a non branch inst predict taken or an exception that discards a predicted inst,recover the history, specPtr = checkPtr - 1 + // ghr update priority: + // 1.commitStage : recover + // 2.exeStage : fix or update + // 3.preDecoderStage :recover or update + // 4.spec update + + when(isUpdateValid) { + commitPtr := commitPtr - 1.U + } + when(io.ghrUpdateNdBundle.fixBundle.isFixGhrValid) { + switch(io.ghrUpdateNdBundle.fixBundle.ghrFixType) { + is(GhrFixType.commitRecover) { + // recover to the old history + nextSpecPtr := checkPtr + isRecoverCsr := true.B + } + is(GhrFixType.decodeBrExcp) { + // recover to the old history,quit predict info + nextSpecPtr := checkPtr + 1.U + isRecoverCsr := true.B + } + is(GhrFixType.exeFixJumpError, GhrFixType.exeRecover) { + // fix error predict + nextSpecPtr := checkPtr + nextGlobalHistory(nextSpecPtr) := io.ghrUpdateNdBundle.fixBundle.isFixBranchTaken + isExeFixCsr := true.B + } + is(GhrFixType.exeUpdateJump) { + // update the branch that has not been predicted + nextSpecPtr := checkPtr - 1.U + nextGlobalHistory(nextSpecPtr) := true.B + isExeFixCsr := true.B + } + is(GhrFixType.decodeUpdateJump) { + // update the branch that has not been predicted + nextSpecPtr := checkPtr - 1.U + nextGlobalHistory(nextSpecPtr) := true.B + isPredecodeFixCsr := true.B + } + } + }.elsewhen(io.ghrUpdateNdBundle.bpuSpecValid) { + nextSpecPtr := specPtr - 1.U + nextGlobalHistory(nextSpecPtr) := io.ghrUpdateNdBundle.bpuSpecTaken + } + val ghr = dontTouch(RegInit((0.U(ghrDepth.W)))) when(isUpdateValid) { ghr := Cat(ghr(ghrDepth - 2, 0), updateBranchTaken) } - isGlobalHistoryUpdateReg := isUpdateValid + if (Param.isSpeculativeGlobalHistory) { + shiftedGlobalHistory := Cat(nextGlobalHistory.asUInt, nextGlobalHistory.asUInt) >> nextSpecPtr + } else { + shiftedGlobalHistory := ghr + } + // shiftedGlobalHistory := (nextGlobalHistory.asUInt >> nextSpecPtr).asUInt | (nextGlobalHistory.asUInt << (Param.BPU.TagePredictor.ghrLength.U - nextSpecPtr)).asUInt + // TODO use the correct Cat history +// shiftedGlobalHistory := Cat( +// nextGlobalHistory.asUInt(nextSpecPtr, 0), +// nextGlobalHistory.asUInt(Param.BPU.TagePredictor.ghrLength-1, nextSpecPtr) +// ) + + // output + io.tageQueryMeta.tageGhrInfo.checkPtr := nextSpecPtr //////////////////////////////////////////////////////////////////////////////////////////// // Query Logic @@ -136,6 +248,12 @@ class TagePredictor( basePredictor.io.isCtrInc := updateBranchTaken basePredictor.io.updateCtr := updateMetaBundle.providerCtrBits(0) + val isGlobalHistoryUpdate = WireDefault(false.B) + if (Param.isSpeculativeGlobalHistory) { + isGlobalHistoryUpdate := io.ghrUpdateNdBundle.bpuSpecValid + } else { + isGlobalHistoryUpdate := isUpdateValid + } // Tagged Predictor Generate val taggedPreditors = Seq.range(0, tagComponentNum).map { providerId => { @@ -148,8 +266,8 @@ class TagePredictor( ) ) // Query - taggedPreditor.io.isGlobalHistoryUpdate := isUpdateValid - taggedPreditor.io.globalHistory := ghr(historyLengths(providerId + 1) - 1, 0) + taggedPreditor.io.isGlobalHistoryUpdate := isGlobalHistoryUpdate + taggedPreditor.io.globalHistory := shiftedGlobalHistory(historyLengths(providerId + 1) - 1, 0) taggedPreditor.io.pc := io.pc tagUsefulbits(providerId) := taggedPreditor.io.usefulBits tagCtrbits(providerId) := taggedPreditor.io.ctrBits @@ -158,19 +276,28 @@ class TagePredictor( tagHitIndexs(providerId) := taggedPreditor.io.hitIndex tagIsTakens(providerId) := taggedPreditor.io.taken tagIsHits(providerId) := taggedPreditor.io.tagHit + tagGhtHashs(providerId) := taggedPreditor.io.queryGhtHash + tagTagHashCsr1s(providerId) := taggedPreditor.io.queryTagHashCsr1 + tagTagHashCsr2s(providerId) := taggedPreditor.io.queryTagHashCsr2 // update - taggedPreditor.io.updatePc := io.updatePc - taggedPreditor.io.updateValid := isUpdateValid && updateIsConditional - taggedPreditor.io.incUseful := tagUpdateIsIncUsefuls(providerId) - taggedPreditor.io.updateUseful := tagIsUpdateUsefuls(providerId) - taggedPreditor.io.updateUsefulBits := updateMetaBundle.tagPredictorUsefulBits(providerId) - taggedPreditor.io.updateCtr := tagIsUpdateCtrs.asBools(providerId) - taggedPreditor.io.incCtr := updateBranchTaken - taggedPreditor.io.updateCtrBits := updateMetaBundle.providerCtrBits(providerId + 1) - taggedPreditor.io.reallocEntry := tagUpdateisReallocEntrys(providerId) - taggedPreditor.io.updateTag := tagUpdateNewTags(providerId) - taggedPreditor.io.updateIndex := updateMetaBundle.tagPredictorHitIndexs(providerId) + taggedPreditor.io.updatePc := io.updatePc + taggedPreditor.io.updateValid := isUpdateValid && updateIsConditional + taggedPreditor.io.incUseful := tagUpdateIsIncUsefuls(providerId) + taggedPreditor.io.updateUseful := tagIsUpdateUsefuls(providerId) + taggedPreditor.io.updateUsefulBits := updateMetaBundle.tagPredictorUsefulBits(providerId) + taggedPreditor.io.updateCtr := tagIsUpdateCtrs.asBools(providerId) + taggedPreditor.io.incCtr := updateBranchTaken + taggedPreditor.io.updateCtrBits := updateMetaBundle.providerCtrBits(providerId + 1) + taggedPreditor.io.reallocEntry := tagUpdateisReallocEntrys(providerId) + taggedPreditor.io.updateTag := tagUpdateNewTags(providerId) + taggedPreditor.io.updateIndex := updateMetaBundle.tagPredictorHitIndexs(providerId) + taggedPreditor.io.isRecoverCsr := isRecoverCsr + taggedPreditor.io.isExeFixCsr := isExeFixCsr + taggedPreditor.io.isPredecodeFixCsr := isPredecodeFixCsr + taggedPreditor.io.originGhtHash := updateMetaBundle.tageGhrInfo.tagGhtHashs(providerId) + taggedPreditor.io.originTagHashCsr1 := updateMetaBundle.tageGhrInfo.tagTagHashCsr1s(providerId) + taggedPreditor.io.originTagHashCsr2 := updateMetaBundle.tageGhrInfo.tagTagHashCsr2s(providerId) taggedPreditor } @@ -224,6 +351,10 @@ class TagePredictor( vecAssign(queryMetaBundle.tagPredictorHitIndexs, tagHitIndexs) vecAssign(queryMetaBundle.tagPredictorQueryTags, tagQueryTags) vecAssign(queryMetaBundle.tagPredictorOriginTags, tagOriginTags) + vecAssign(queryMetaBundle.tageGhrInfo.tagGhtHashs, tagGhtHashs) + vecAssign(queryMetaBundle.tageGhrInfo.tagTagHashCsr1s, tagTagHashCsr1s) + vecAssign(queryMetaBundle.tageGhrInfo.tagTagHashCsr2s, tagTagHashCsr2s) + queryMetaBundle.tageGhrInfo.checkPtr := nextSpecPtr queryMetaBundle.isUseful := takens(predPredictionId) =/= takens( altPredPredctionId ) // Indicates whether the pred component is useful diff --git a/src/src/frontend/bpu/bundles/TagePredictorUpdateInfoPort.scala b/src/src/frontend/bpu/bundles/TagePredictorUpdateInfoPort.scala index 521ebae2..6b739ba8 100644 --- a/src/src/frontend/bpu/bundles/TagePredictorUpdateInfoPort.scala +++ b/src/src/frontend/bpu/bundles/TagePredictorUpdateInfoPort.scala @@ -1,6 +1,6 @@ package frontend.bpu.bundles import chisel3._ -import frontend.bpu.components.Bundles.TageMetaPort +import frontend.bpu.components.Bundles.{TageGhrInfo, TageMetaPort} class TagePredictorUpdateInfoPort extends Bundle { val valid = Bool() diff --git a/src/src/frontend/bpu/components/Bundles/TageMetaPort.scala b/src/src/frontend/bpu/components/Bundles/TageMetaPort.scala index a8033587..43dd0ce8 100644 --- a/src/src/frontend/bpu/components/Bundles/TageMetaPort.scala +++ b/src/src/frontend/bpu/components/Bundles/TageMetaPort.scala @@ -4,9 +4,21 @@ import chisel3._ import chisel3.util._ import spec._ +class TageGhrInfo( + tagComponentNum: Int = Param.BPU.TagePredictor.tagComponentNum, + tagComponentTagWidth: Int = Param.BPU.TagePredictor.tagComponentTagWidth, + phtAddrWidth: Int = log2Ceil(Param.BPU.TagePredictor.componentTableDepth(1))) + extends Bundle { + val checkPtr = UInt(Param.BPU.TagePredictor.ghrPtrWidth.W) + val tagGhtHashs = Vec(tagComponentNum, UInt(phtAddrWidth.W)) + val tagTagHashCsr1s = Vec(tagComponentNum, UInt(tagComponentTagWidth.W)) + val tagTagHashCsr2s = Vec(tagComponentNum, UInt((tagComponentTagWidth - 1).W)) +} + class TageMetaPort( tagComponentNum: Int = Param.BPU.TagePredictor.tagComponentNum, - tagComponentTagWidth: Int = Param.BPU.TagePredictor.tagComponentTagWidth) + tagComponentTagWidth: Int = Param.BPU.TagePredictor.tagComponentTagWidth, + phtAddrWidth: Int = log2Ceil(Param.BPU.TagePredictor.componentTableDepth(1))) extends Bundle { val providerId = UInt(log2Ceil(tagComponentNum + 1).W) val altProviderId = UInt(log2Ceil(tagComponentNum + 1).W) @@ -16,10 +28,10 @@ class TageMetaPort( val tagPredictorOriginTags = Vec(tagComponentNum, UInt(tagComponentTagWidth.W)) val tagPredictorHitIndexs = Vec(tagComponentNum, UInt(10.W)) val tagPredictorUsefulBits = Vec(tagComponentNum, UInt(3.W)) + // global history Hash info + val tageGhrInfo = new TageGhrInfo() } object TageMetaPort { - val tagComponentNum = Param.BPU.TagePredictor.tagComponentNum - val tagComponentTagWidth = Param.BPU.TagePredictor.tagComponentTagWidth - def default = 0.U.asTypeOf(new TageMetaPort) + def default = 0.U.asTypeOf(new TageMetaPort) } diff --git a/src/src/frontend/bpu/components/TaggedPreditor.scala b/src/src/frontend/bpu/components/TaggedPreditor.scala index 72864619..7256634b 100644 --- a/src/src/frontend/bpu/components/TaggedPreditor.scala +++ b/src/src/frontend/bpu/components/TaggedPreditor.scala @@ -40,11 +40,14 @@ class TaggedPreditor( val pc = Input(UInt(spec.Width.Mem.addr)) // Meta - val usefulBits = Output(UInt(phtUsefulWidth.W)) - val ctrBits = Output(UInt(phtCtrWidth.W)) - val queryTag = Output(UInt(phtTagWidth.W)) - val originTag = Output(UInt(phtTagWidth.W)) - val hitIndex = Output(UInt(log2Ceil(phtDepth).W)) + val usefulBits = Output(UInt(phtUsefulWidth.W)) + val ctrBits = Output(UInt(phtCtrWidth.W)) + val queryTag = Output(UInt(phtTagWidth.W)) + val originTag = Output(UInt(phtTagWidth.W)) + val hitIndex = Output(UInt(log2Ceil(phtDepth).W)) + val queryGhtHash = Output(UInt(phtAddrWidth.W)) + val queryTagHashCsr1 = Output(UInt(phtTagWidth.W)) + val queryTagHashCsr2 = Output(UInt((phtTagWidth - 1).W)) // Query result val taken = Output(Bool()) @@ -62,6 +65,14 @@ class TaggedPreditor( val reallocEntry = Input(Bool()) val updateTag = Input(UInt(phtTagWidth.W)) val updateIndex = Input(UInt(log2Ceil(phtDepth).W)) + + // speculative update hash value + val isExeFixCsr = Input(Bool()) + val isPredecodeFixCsr = Input(Bool()) + val isRecoverCsr = Input(Bool()) + val originGhtHash = Input(UInt(phtAddrWidth.W)) + val originTagHashCsr1 = Input(UInt(phtTagWidth.W)) + val originTagHashCsr2 = Input(UInt((phtTagWidth - 1).W)) }) def toPhtLine(line: UInt) = { @@ -166,25 +177,39 @@ class TaggedPreditor( // to do connect CSR hash val ghtHashCsrHash = Module(new CsrHash(ghrLength, phtAddrWidth)) - ghtHashCsrHash.io.data := io.globalHistory - ghtHashCsrHash.io.dataUpdate := io.isGlobalHistoryUpdate - hashedGhtInput := ghtHashCsrHash.io.hash + ghtHashCsrHash.io.data := io.globalHistory + ghtHashCsrHash.io.dataUpdate := io.isGlobalHistoryUpdate + hashedGhtInput := ghtHashCsrHash.io.hash + ghtHashCsrHash.io.originHash := io.originGhtHash + ghtHashCsrHash.io.isRecoverCsr := io.isRecoverCsr + ghtHashCsrHash.io.isExeFixCsr := io.isExeFixCsr + ghtHashCsrHash.io.isPredecodeFixCsr := io.isPredecodeFixCsr + io.queryGhtHash := ghtHashCsrHash.io.hash val pcHashCsrHash1 = Module(new CsrHash(ghrLength, phtTagWidth)) - pcHashCsrHash1.io.data := io.globalHistory - pcHashCsrHash1.io.dataUpdate := io.isGlobalHistoryUpdate - tagHashCsr1 := pcHashCsrHash1.io.hash + pcHashCsrHash1.io.data := io.globalHistory + pcHashCsrHash1.io.dataUpdate := io.isGlobalHistoryUpdate + tagHashCsr1 := pcHashCsrHash1.io.hash + pcHashCsrHash1.io.originHash := io.originTagHashCsr1 + pcHashCsrHash1.io.isRecoverCsr := io.isRecoverCsr + pcHashCsrHash1.io.isExeFixCsr := io.isExeFixCsr + pcHashCsrHash1.io.isPredecodeFixCsr := io.isPredecodeFixCsr + io.queryTagHashCsr1 := pcHashCsrHash1.io.hash val pcHashCsrHash2 = Module(new CsrHash(ghrLength, phtTagWidth - 1)) - pcHashCsrHash2.io.data := io.globalHistory - pcHashCsrHash2.io.dataUpdate := io.isGlobalHistoryUpdate - tagHashCsr2 := pcHashCsrHash2.io.hash + pcHashCsrHash2.io.data := io.globalHistory + pcHashCsrHash2.io.dataUpdate := io.isGlobalHistoryUpdate + tagHashCsr2 := pcHashCsrHash2.io.hash + pcHashCsrHash2.io.originHash := io.originTagHashCsr2 + pcHashCsrHash2.io.isRecoverCsr := io.isRecoverCsr + pcHashCsrHash2.io.isExeFixCsr := io.isExeFixCsr + pcHashCsrHash2.io.isPredecodeFixCsr := io.isPredecodeFixCsr + io.queryTagHashCsr2 := pcHashCsrHash2.io.hash val phtRam = Module( new VSimpleDualBRam( phtDepth, // size PhtEntey.width // dataWidth - ) ) phtRam.io.readAddr := queryIndex diff --git a/src/src/frontend/bpu/utils/Bram.scala b/src/src/frontend/bpu/utils/Bram.scala deleted file mode 100644 index ff4c4f83..00000000 --- a/src/src/frontend/bpu/utils/Bram.scala +++ /dev/null @@ -1,54 +0,0 @@ -package frontend.bpu.utils -import chisel3._ -// simulate BRAM IP in simulation without Vivado -// data read latency is 1 cycle -class Bram( - dataWidth: Int = 128, - dataDepthExp2: Int = 8) - extends Module { - // parameter - val addrWidth = dataDepthExp2 - val dataDepth = 1 << dataDepthExp2 - val io = IO(new Bundle { - val ena = Input(Bool()) // Chip enable A - val enb = Input(Bool()) // Chip enable B - val wea = Input(Bool()) // Write enable A - val web = Input(Bool()) // Write enable B - - val dina = Input(UInt(dataWidth.W)) - val addra = Input(UInt(addrWidth.W)) - val douta = Output(UInt(dataWidth.W)) - - val dinb = Input(UInt(dataWidth.W)) - val addrb = Input(UInt(addrWidth.W)) - val doutb = Output(UInt(dataWidth.W)) - - }) - val datas = RegInit(VecInit(Seq.fill(dataWidth)(0.U(dataDepth.W)))) - - // Read logic - when(io.ena & io.wea) { - io.douta := RegNext(io.dina, 0.U) - }.elsewhen(io.ena) { - io.douta := RegNext(datas(io.addra), 0.U) - }.otherwise { - io.douta := RegNext(0.U(dataDepth.W), 0.U) - } - - when(io.enb & io.web) { - io.doutb := RegNext(io.dinb, 0.U) - }.elsewhen(io.enb) { - io.doutb := RegNext(datas(io.addrb), 0.U) - }.otherwise { - io.doutb := RegNext(0.U(dataDepth.W), 0.U) - } - - // Write logic - when(io.enb & io.web) { - datas(io.addrb) := RegNext(io.dinb, 0.U) - } - - when(io.ena & io.wea) { - datas(io.addra) := RegNext(io.dinb, 0.U) - } -} diff --git a/src/src/frontend/bpu/utils/CsrHash.scala b/src/src/frontend/bpu/utils/CsrHash.scala index 9fe806c4..55a85a87 100644 --- a/src/src/frontend/bpu/utils/CsrHash.scala +++ b/src/src/frontend/bpu/utils/CsrHash.scala @@ -1,6 +1,7 @@ package frontend.bpu.utils import chisel3._ import chisel3.util._ +import spec.Param // Implement GHR hash using a CSR (Circular Shifted Register) class CsrHash( @@ -8,9 +9,13 @@ class CsrHash( outputLength: Int = 10) extends Module { val io = IO(new Bundle { - val dataUpdate = Input(Bool()) - val data = Input(UInt(inputLength.W)) - val hash = Output(UInt(outputLength.W)) + val dataUpdate = Input(Bool()) + val data = Input(UInt(inputLength.W)) + val hash = Output(UInt(outputLength.W)) + val isExeFixCsr = Input(Bool()) + val isPredecodeFixCsr = Input(Bool()) + val isRecoverCsr = Input(Bool()) + val originHash = Input(UInt(outputLength.W)) }) val csr = RegInit(0.U(outputLength.W)) @@ -20,14 +25,40 @@ class CsrHash( // nextCSR := Cat(csr(outputLength - 2, 0), csr(outputLength - 1) ^ io.data(0)) // nextCSR(residual) := nextCSR(residual, residual) ^ io.data(inputLength - 1) - nextCSR := Cat(csr(outputLength - 2, 0), csr(outputLength - 1) ^ io.data(0)) ^ (io.data( - inputLength - 1 - ) << residual).asUInt - - when(io.dataUpdate) { - csr := nextCSR + if (Param.isSpeculativeGlobalHistory) { + nextCSR := Mux( + io.isRecoverCsr, + io.originHash, + Mux( + io.isExeFixCsr || io.isPredecodeFixCsr, + Cat(io.originHash(outputLength - 2, 0), io.originHash(outputLength - 1) ^ io.data(0)) ^ (io.data( + inputLength - 1 + ) << residual).asUInt, + Mux( + io.dataUpdate, + Cat(csr(outputLength - 2, 0), csr(outputLength - 1) ^ io.data(0)) ^ (io.data( + inputLength - 1 + ) << residual).asUInt, + csr + ) + ) + ) + } else { + // commit update + nextCSR := Mux( + io.dataUpdate, + Cat(csr(outputLength - 2, 0), csr(outputLength - 1) ^ io.data(0)) ^ (io.data( + inputLength - 1 + ) << residual).asUInt, + csr + ) } - io.hash := nextCSR +// when(io.dataUpdate) { +// csr := nextCSR +// } + csr := nextCSR + + io.hash := csr } diff --git a/src/src/frontend/bpu/utils/Lsfr.scala b/src/src/frontend/bpu/utils/Lsfr.scala deleted file mode 100644 index a551bfc9..00000000 --- a/src/src/frontend/bpu/utils/Lsfr.scala +++ /dev/null @@ -1,66 +0,0 @@ -package frontend.bpu.utils - -import chisel3._ -import chisel3.util._ - -//translate lfsr system verilog code from https://github.com/openhwgroup/cva5/blob/master/core/lfsr.sv -//3-16 bit LFSRs with additional feedback to support full 2^N range -// Linear-feedback shift register -//lfsr is used to generate random number -class Lfsr( - width: Int = 3, - NeedsReset: Int = 1) - extends Module { - // XNOR taps for LFSR from 3-16 bits wide (source: Xilinx xapp052) - val tapNums = Seq(1, 1, 1, // Dummy entries for widths 0-2 - 2, 2, 2, 2, 2, // Number of taps and indicies[3:0] for LFSRs width 3 to 16 - 4, 2, 2, 2, // 8 - 4, 4, 4, 2, 4) - val tapIndiciess = Seq( - Seq(0, 0, 0, 0), - Seq(0, 0, 0, 0), - Seq(0, 0, 0, 0), - Seq(0, 0, 1, 2), - Seq(0, 0, 2, 3), - Seq(0, 0, 2, 4), - Seq(0, 0, 4, 5), - Seq(0, 0, 5, 6), - Seq(3, 4, 5, 7), - Seq(0, 0, 4, 8), - Seq(0, 0, 8, 10), - Seq(0, 3, 5, 11), - Seq(0, 2, 3, 12), - Seq(0, 2, 4, 13), - Seq(0, 0, 13, 14), - Seq(3, 12, 14, 15) - ) - - val io = IO(new Bundle { - val en = Input(Bool()) - val value = Output(UInt(width.W)) - }) - - val num = tapNums(width) - val indicies = tapIndiciess(width) - - val feedbackInput = WireDefault(0.U(num.W)) - val feedback = WireDefault(false.B) - val value = RegInit(0.U(width.W)) - //////////////////////////////////////////////////// - // Implementation - if (width == 2) { - feedback := ~value(width - 1) - } else { - for (i <- 0 to num) { - feedbackInput(i) := value(indicies(i)) - } - // XNOR of taps and range extension to include all ones - feedback := (!(feedbackInput.xorR)) ^ (value(width - 2, 0).orR) - } - - when(io.en) { - value := RegNext(Cat(value(width - 2, 0), feedback), 0.U) - } - io.value := value - -} diff --git a/src/src/frontend/bundles/ExeFtqPort.scala b/src/src/frontend/bundles/ExeFtqPort.scala index 616c2acd..e52d0e85 100644 --- a/src/src/frontend/bundles/ExeFtqPort.scala +++ b/src/src/frontend/bundles/ExeFtqPort.scala @@ -3,7 +3,7 @@ import chisel3._ import spec.Param class ExeFtqPort extends Bundle { val queryPcBundle = new QueryPcBundle - val commitBundle = Input(new ExeCommitFtqNdPort) + val feedBack = Input(new ExeFtqFeedBackNdPort) } class QueryPcBundle extends Bundle { @@ -18,3 +18,7 @@ class ExeCommitFtqNdPort extends Bundle { val ftqMetaUpdateFallThrough = UInt(spec.Width.Mem.addr) val ftqUpdateMetaId = UInt(Param.BPU.Width.id) } +class ExeFtqFeedBackNdPort extends Bundle { + val commitBundle = new ExeCommitFtqNdPort + val fixGhrBundle = new ExeFtqFixGhrBundle +} diff --git a/src/src/frontend/bundles/FtqBpuMetaPort.scala b/src/src/frontend/bundles/FtqBpuMetaPort.scala index 849a7d75..57bfcd23 100644 --- a/src/src/frontend/bundles/FtqBpuMetaPort.scala +++ b/src/src/frontend/bundles/FtqBpuMetaPort.scala @@ -1,7 +1,7 @@ package frontend.bundles import chisel3._ import chisel3.util._ -import frontend.bpu.components.Bundles.TageMetaPort +import frontend.bpu.components.Bundles.{TageGhrInfo, TageMetaPort} import spec._ class BranchAddrBundle extends Bundle { @@ -9,6 +9,19 @@ class BranchAddrBundle extends Bundle { val jumpTargetAddr = UInt(spec.Width.Mem.addr) val fallThroughAddr = UInt(spec.Width.Mem.addr) } + +class ExeFtqFixGhrBundle extends Bundle { + val isExeFixValid = Bool() + val exeFixFirstBrTaken = Bool() + val exeFixJumpError = Bool() + val exeFixIsTaken = Bool() +} +class GhrUpdateSignalBundle extends Bundle { + val isCommitFixGhr = Bool() + val exeFixBundle = new ExeFtqFixGhrBundle + val isPredecoderFixGhr = Bool() + val isPredecoderBranchTaken = Bool() +} class FtqBpuMetaPort( ftbNway: Int = Param.BPU.FTB.nway, addr: Int = wordLength) @@ -26,6 +39,10 @@ class FtqBpuMetaPort( // FTB train meta val branchAddrBundle = new BranchAddrBundle + + // train meta + val ghrUpdateSignalBundle = new GhrUpdateSignalBundle // global history register + val tageGhrInfo = new TageGhrInfo() } object FtqBpuMetaPort { diff --git a/src/src/frontend/fetch/InstPreDecodeStage.scala b/src/src/frontend/fetch/InstPreDecodeStage.scala index c09c1490..5ccbf346 100644 --- a/src/src/frontend/fetch/InstPreDecodeStage.scala +++ b/src/src/frontend/fetch/InstPreDecodeStage.scala @@ -31,6 +31,7 @@ class ftqPreDecodeFixRasNdPort extends Bundle { } class InstPreDecodePeerPort extends Bundle { val predecodeRedirect = Output(Bool()) + val predecoderBranch = Output(Bool()) val redirectFtqId = Output(UInt(Param.BPU.ftqPtrWidth.W)) val redirectPc = Output(UInt(spec.Width.Mem.addr)) val commitRasPort = Input(Valid(new ftqPreDecodeFixRasNdPort)) @@ -108,6 +109,8 @@ class InstPreDecodeStage val isPredecoderRedirect = WireDefault(false.B) isPredecoderRedirect := isDataValid && ((isJump && canJump) || isErrorPredict) val isPredecoderRedirectReg = RegNext(isPredecoderRedirect, false.B) +// peer.predecoderBranch := RegNext(isDataValid && (isJump && canJump), false.B) + peer.predecoderBranch := true.B // decrease net delay ,do not care non branch inst jump effect branch history // connect return address stack module val rasModule = Module(new RAS) @@ -166,14 +169,20 @@ class InstPreDecodeStage } when((index + 1).U === selectBlockLength) { infoBundle.bits.ftqInfo.predictBranch := selectedIn - .enqInfos(selectBlockLength - 1.U) + .enqInfos(index) .bits .ftqInfo .predictBranch || isPredecoderRedirect + infoBundle.bits.ftqInfo.isPredictValid := selectedIn + .enqInfos(index) + .bits + .ftqInfo + .isPredictValid || isPredecoderRedirect infoBundle.bits.ftqInfo.isLastInBlock := true.B }.otherwise { - infoBundle.bits.ftqInfo.predictBranch := false.B - infoBundle.bits.ftqInfo.isLastInBlock := false.B + infoBundle.bits.ftqInfo.isPredictValid := false.B + infoBundle.bits.ftqInfo.predictBranch := false.B + infoBundle.bits.ftqInfo.isLastInBlock := false.B } } diff --git a/src/src/frontend/fetch/InstResStage.scala b/src/src/frontend/fetch/InstResStage.scala index aa0bc159..8481918f 100644 --- a/src/src/frontend/fetch/InstResStage.scala +++ b/src/src/frontend/fetch/InstResStage.scala @@ -54,11 +54,13 @@ class InstResStage infoBundle.bits.exception := selectedIn.exception.bits infoBundle.bits.ftqInfo.ftqId := selectedIn.ftqId when((index + 1).U === selectedIn.ftqBlock.length) { - infoBundle.bits.ftqInfo.predictBranch := selectedIn.ftqBlock.predictTaken - infoBundle.bits.ftqInfo.isLastInBlock := true.B + infoBundle.bits.ftqInfo.predictBranch := selectedIn.ftqBlock.predictTaken + infoBundle.bits.ftqInfo.isPredictValid := selectedIn.ftqBlock.predictValid + infoBundle.bits.ftqInfo.isLastInBlock := true.B }.otherwise { - infoBundle.bits.ftqInfo.predictBranch := false.B - infoBundle.bits.ftqInfo.isLastInBlock := false.B + infoBundle.bits.ftqInfo.predictBranch := false.B + infoBundle.bits.ftqInfo.isPredictValid := false.B + infoBundle.bits.ftqInfo.isLastInBlock := false.B } } diff --git a/src/src/pipeline/common/bundles/FtqInfoBundle.scala b/src/src/pipeline/common/bundles/FtqInfoBundle.scala index 292f20be..0e93b661 100644 --- a/src/src/pipeline/common/bundles/FtqInfoBundle.scala +++ b/src/src/pipeline/common/bundles/FtqInfoBundle.scala @@ -7,7 +7,8 @@ class FtqInfoBundle extends Bundle { val isLastInBlock = Bool() val ftqId = UInt(Param.BPU.Width.id) // val idxInBlock = UInt(log2Ceil(Param.fetchInstMaxNum).W) - val predictBranch = Bool() + val predictBranch = Bool() + val isPredictValid = Bool() } object FtqInfoBundle extends Bundle { diff --git a/src/src/execution/Alu.scala b/src/src/pipeline/complex/execution/Alu.scala similarity index 82% rename from src/src/execution/Alu.scala rename to src/src/pipeline/complex/execution/Alu.scala index 179040ed..1a6235be 100644 --- a/src/src/execution/Alu.scala +++ b/src/src/pipeline/complex/execution/Alu.scala @@ -1,4 +1,4 @@ -package execution +package pipeline.complex.execution import chisel3._ import chisel3.util._ @@ -6,6 +6,8 @@ import execution.bundles.{AluInstNdPort, AluResultNdPort} import pipeline.complex.execution.bundles.JumpBranchInfoNdPort import spec.ExeInst.Op import spec._ +import execution.Mul +import execution.Div class Alu extends Module { val io = IO(new Bundle { @@ -19,7 +21,7 @@ class Alu extends Module { }) io.outputValid := true.B - io.result := AluResultNdPort.default + io.result := DontCare def lop = io.aluInst.leftOperand @@ -28,20 +30,14 @@ class Alu extends Module { /** Result definition */ - val logic = WireDefault(zeroWord) + val logic = io.result.logic - val shift = WireDefault(zeroWord) + val shift = io.result.shift - val jumpBranchInfo = WireDefault(JumpBranchInfoNdPort.default) + val jumpBranchInfo = io.result.jumpBranchInfo // computed with one cycle - val arithmetic = WireDefault(zeroWord) - - // Fallback - io.result.arithmetic := arithmetic - io.result.logic := logic - io.result.jumpBranchInfo := jumpBranchInfo - io.result.shift := shift + val arithmetic = io.result.arithmetic // Logic computation @@ -117,14 +113,13 @@ class Alu extends Module { val mulStage = Module(new Mul) - val useSignedMul = WireDefault( + val useSignedMul = VecInit( ExeInst.Op.mul, ExeInst.Op.mulh ).contains(io.aluInst.op) - ) - val useUnsignedMul = WireDefault(io.aluInst.op === ExeInst.Op.mulhu) + val useUnsignedMul = io.aluInst.op === ExeInst.Op.mulhu val useMul = WireDefault(useSignedMul || useUnsignedMul) @@ -136,7 +131,7 @@ class Alu extends Module { mulStage.io.mulInst.bits.leftOperand := lop mulStage.io.mulInst.bits.rightOperand := rop - val mulResult = WireDefault(mulStage.io.mulResult.bits) + val mulResult = mulStage.io.mulResult.bits // Div @@ -151,9 +146,9 @@ class Alu extends Module { val divStage = Module(new Div) - val divisorValid = WireDefault(rop =/= 0.U) + val divisorValid = rop =/= 0.U - val divStart = WireDefault(useDiv && divisorValid) + val divStart = useDiv && divisorValid divStage.io.isFlush := io.isFlush divStage.io.divInst.valid := divStart @@ -164,8 +159,8 @@ class Alu extends Module { divStage.io.divInst.bits.leftOperand := lop divStage.io.divInst.bits.rightOperand := rop - val quotient = WireDefault(divStage.io.divResult.bits.quotient) - val remainder = WireDefault(divStage.io.divResult.bits.remainder) + val quotient = divStage.io.divResult.bits.quotient + val remainder = divStage.io.divResult.bits.remainder io.outputValid := !(mulStart && !mulStage.io.mulResult.valid) && ( @@ -201,6 +196,5 @@ class Alu extends Module { when(io.isFlush) { io.outputValid := false.B - mulResult := 0.U } } diff --git a/src/src/pipeline/complex/execution/ExePassWbStage.scala b/src/src/pipeline/complex/execution/ExePassWbStage.scala index b5024ef1..607913d6 100644 --- a/src/src/pipeline/complex/execution/ExePassWbStage.scala +++ b/src/src/pipeline/complex/execution/ExePassWbStage.scala @@ -7,7 +7,7 @@ import common.bundles._ import control.bundles.{CsrReadPort, CsrWriteNdPort, StableCounterReadPort} import control.csrBundles.{EraBundle, LlbctlBundle} import control.enums.ExceptionPos -import execution.Alu +import pipeline.complex.execution.Alu import frontend.bundles.ExeFtqPort import pipeline.common.bundles.RobQueryPcPort import pipeline.complex.bundles.InstInfoNdPort @@ -233,8 +233,12 @@ class ExePassWbStage(supportBranchCsr: Boolean = true) resultOutReg.bits.instInfo.ftqCommitInfo.targetMispredict.get := branchTargetMispredict && branchEnableFlag && isBranchInst } - branchSetPort.en := isRedirect - branchSetPort.ftqId := selectedIn.instInfo.ftqInfo.ftqId + branchSetPort.en := isRedirect + branchSetPort.ftqId := selectedIn.instInfo.ftqInfo.ftqId + feedbackFtq.feedBack.fixGhrBundle.isExeFixValid := isRedirect + feedbackFtq.feedBack.fixGhrBundle.exeFixFirstBrTaken := jumpBranchInfo.en && !inFtqInfo.isPredictValid && branchEnableFlag && isBranchInst // TODO predictValid + feedbackFtq.feedBack.fixGhrBundle.exeFixIsTaken := jumpBranchInfo.en + feedbackFtq.feedBack.fixGhrBundle.exeFixJumpError := branchTargetMispredict && branchEnableFlag && isBranchInst branchSetPort.pcAddr := Mux( jumpBranchInfo.en, @@ -244,25 +248,25 @@ class ExePassWbStage(supportBranchCsr: Boolean = true) if (Param.exeFeedBackFtqDelay) { - feedbackFtq.commitBundle.ftqMetaUpdateValid := (RegNext(isBranchInst, false.B) || + feedbackFtq.feedBack.commitBundle.ftqMetaUpdateValid := (RegNext(isBranchInst, false.B) || (RegNext(!isBranchInst, false.B) && RegNext(inFtqInfo.predictBranch, false.B))) && RegNext( branchEnableFlag, false.B ) - feedbackFtq.commitBundle.ftqMetaUpdateFtbDirty := RegNext(branchTargetMispredict, false.B) || + feedbackFtq.feedBack.commitBundle.ftqMetaUpdateFtbDirty := RegNext(branchTargetMispredict, false.B) || (RegNext(jumpBranchInfo.en, false.B) && !RegNext(inFtqInfo.isLastInBlock, false.B)) || (RegNext(!isBranchInst, false.B) && RegNext(inFtqInfo.predictBranch, false.B)) - feedbackFtq.commitBundle.ftqUpdateMetaId := RegNext(inFtqInfo.ftqId, 0.U) - feedbackFtq.commitBundle.ftqMetaUpdateJumpTarget := RegNext(jumpBranchInfo.pcAddr, 0.U) - feedbackFtq.commitBundle.ftqMetaUpdateFallThrough := RegNext(fallThroughPc, 0.U) + feedbackFtq.feedBack.commitBundle.ftqUpdateMetaId := RegNext(inFtqInfo.ftqId, 0.U) + feedbackFtq.feedBack.commitBundle.ftqMetaUpdateJumpTarget := RegNext(jumpBranchInfo.pcAddr, 0.U) + feedbackFtq.feedBack.commitBundle.ftqMetaUpdateFallThrough := RegNext(fallThroughPc, 0.U) } else { - feedbackFtq.commitBundle.ftqMetaUpdateValid := (isBranchInst || (!isBranchInst && inFtqInfo.predictBranch)) && branchEnableFlag - feedbackFtq.commitBundle.ftqMetaUpdateFtbDirty := branchTargetMispredict || + feedbackFtq.feedBack.commitBundle.ftqMetaUpdateValid := (isBranchInst || (!isBranchInst && inFtqInfo.predictBranch)) && branchEnableFlag + feedbackFtq.feedBack.commitBundle.ftqMetaUpdateFtbDirty := branchTargetMispredict || (jumpBranchInfo.en && !inFtqInfo.isLastInBlock) || (!isBranchInst && inFtqInfo.predictBranch) - feedbackFtq.commitBundle.ftqUpdateMetaId := inFtqInfo.ftqId - feedbackFtq.commitBundle.ftqMetaUpdateJumpTarget := jumpBranchInfo.pcAddr - feedbackFtq.commitBundle.ftqMetaUpdateFallThrough := fallThroughPc + feedbackFtq.feedBack.commitBundle.ftqUpdateMetaId := inFtqInfo.ftqId + feedbackFtq.feedBack.commitBundle.ftqMetaUpdateJumpTarget := jumpBranchInfo.pcAddr + feedbackFtq.feedBack.commitBundle.ftqMetaUpdateFallThrough := fallThroughPc } resultOutReg.bits.instInfo.ftqCommitInfo.isBranchSuccess := jumpBranchInfo.en diff --git a/src/src/pipeline/simple/Alu.scala b/src/src/pipeline/simple/Alu.scala new file mode 100644 index 00000000..cb647bc5 --- /dev/null +++ b/src/src/pipeline/simple/Alu.scala @@ -0,0 +1,200 @@ +package pipeline.simple.execution + +import chisel3._ +import chisel3.util._ +import execution.bundles.{AluInstNdPort, AluResultNdPort} +import pipeline.complex.execution.bundles.JumpBranchInfoNdPort +import spec.ExeInst.Op +import spec._ +import execution.Mul +import execution.Div + +class Alu extends Module { + val io = IO(new Bundle { + val inputValid = Input(Bool()) + val aluInst = Input(new AluInstNdPort) + + val outputValid = Output(Bool()) + val result = Output(new AluResultNdPort) + + val isFlush = Input(Bool()) + }) + + io.outputValid := true.B + io.result := DontCare + + def lop = io.aluInst.leftOperand + + def rop = io.aluInst.rightOperand + + /** Result definition + */ + + val logic = io.result.logic + + val shift = io.result.shift + + val jumpBranchInfo = io.result.jumpBranchInfo + + // computed with one cycle + val arithmetic = io.result.arithmetic + + // Logic computation + + switch(io.aluInst.op) { + is(Op.nor) { + logic := ~(lop | rop) + } + is(Op.and) { + logic := lop & rop + } + is(Op.or) { + logic := lop | rop + } + is(Op.xor) { + logic := lop ^ rop + } + } + + // shift computation + + switch(io.aluInst.op) { + is(Op.sll) { + shift := lop << rop(4, 0) + } + is(Op.srl) { + shift := lop >> rop(4, 0) + } + is(Op.sra) { + shift := (lop.asSInt >> rop(4, 0)).asUInt + } + } + + // jump and branch computation + + switch(io.aluInst.op) { + is(Op.b, Op.bl) { + jumpBranchInfo.en := true.B + // jumpBranchInfo.pcAddr := io.aluInst.jumpBranchAddr + } + is(Op.jirl) { + jumpBranchInfo.en := true.B + // jumpBranchInfo.pcAddr := lop + io.aluInst.jumpBranchAddr + } + is(Op.beq) { + jumpBranchInfo.en := lop === rop + // jumpBranchInfo.pcAddr := io.aluInst.jumpBranchAddr + } + is(Op.bne) { + jumpBranchInfo.en := lop =/= rop + // jumpBranchInfo.pcAddr := io.aluInst.jumpBranchAddr + } + is(Op.blt) { + jumpBranchInfo.en := lop.asSInt < rop.asSInt + // jumpBranchInfo.pcAddr := io.aluInst.jumpBranchAddr + } + is(Op.bge) { + jumpBranchInfo.en := lop.asSInt >= rop.asSInt + // jumpBranchInfo.pcAddr := io.aluInst.jumpBranchAddr + } + is(Op.bltu) { + jumpBranchInfo.en := lop < rop + // jumpBranchInfo.pcAddr := io.aluInst.jumpBranchAddr + } + is(Op.bgeu) { + jumpBranchInfo.en := lop >= rop + // jumpBranchInfo.pcAddr := io.aluInst.jumpBranchAddr + } + } + + // arithmetic computation + + // mul + + val mulStage = Module(new Mul) + + val useSignedMul = + VecInit( + ExeInst.Op.mul, + ExeInst.Op.mulh + ).contains(io.aluInst.op) + + val useUnsignedMul = io.aluInst.op === ExeInst.Op.mulhu + + val useMul = WireDefault(useSignedMul || useUnsignedMul) + + val mulStart = useMul + + mulStage.io.isFlush := io.isFlush + mulStage.io.mulInst.valid := mulStart + mulStage.io.mulInst.bits.isSigned := useSignedMul + mulStage.io.mulInst.bits.leftOperand := lop + mulStage.io.mulInst.bits.rightOperand := rop + + val mulResult = mulStage.io.mulResult.bits + + // Div + + val useDiv = WireDefault( + VecInit( + ExeInst.Op.div, + ExeInst.Op.divu, + ExeInst.Op.mod, + ExeInst.Op.modu + ).contains(io.aluInst.op) + ) + + val divStage = Module(new Div) + + val divisorValid = rop =/= 0.U + + val divStart = useDiv && divisorValid + + divStage.io.isFlush := io.isFlush + divStage.io.divInst.valid := divStart + divStage.io.divInst.bits.isSigned := VecInit( + ExeInst.Op.div, + ExeInst.Op.mod + ).contains(io.aluInst.op) + divStage.io.divInst.bits.leftOperand := lop + divStage.io.divInst.bits.rightOperand := rop + + val quotient = divStage.io.divResult.bits.quotient + val remainder = divStage.io.divResult.bits.remainder + + io.outputValid := + !(mulStart && !mulStage.io.mulResult.valid) && ( + !(divStart && !divStage.io.divResult.valid) + ) + + switch(io.aluInst.op) { + is(Op.add) { + arithmetic := (lop.asSInt + rop.asSInt).asUInt + } + is(Op.sub) { + arithmetic := (lop.asSInt - rop.asSInt).asUInt + } + is(Op.slt) { + arithmetic := (lop.asSInt < rop.asSInt).asUInt + } + is(Op.sltu) { + arithmetic := (lop < rop).asUInt + } + is(Op.mul) { + arithmetic := mulResult(wordLength - 1, 0) + } + is(Op.mulh, Op.mulhu) { + arithmetic := mulResult(doubleWordLength - 1, wordLength) + } + is(Op.div, Op.divu) { + arithmetic := divStage.io.divResult.bits.quotient + } + is(Op.mod, Op.modu) { + arithmetic := divStage.io.divResult.bits.remainder + } + } + + when(io.isFlush) { + io.outputValid := false.B + } +} diff --git a/src/src/pipeline/simple/MainExeStage.scala b/src/src/pipeline/simple/MainExeStage.scala index 1476d6e8..4c0dbba7 100644 --- a/src/src/pipeline/simple/MainExeStage.scala +++ b/src/src/pipeline/simple/MainExeStage.scala @@ -7,7 +7,7 @@ import common.BaseStage import common.bundles._ import control.bundles._ import control.csrBundles._ -import execution.Alu +import pipeline.simple.execution.Alu import frontend.bundles._ import pipeline.common.bundles.{CacheMaintenanceInstNdPort, RobQueryPcPort} import pipeline.simple.bundles.InstInfoNdPort @@ -72,11 +72,14 @@ class ExePeerPort extends Bundle { val stableCounterReadPort = Flipped(new StableCounterReadPort) val csrReadPort = Flipped(new CsrReadPort) - val feedbackFtq = Output(new ExeCommitFtqNdPort) + val feedbackFtq = Output(new ExeFtqFeedBackNdPort) // val robQueryPcPort = Flipped(new RobQueryPcPort) val regWakeUpPort = Output(new RegWakeUpNdPort) + + val commitFtqPort = new CommitFtqTrainNdPort + } class MainExeStage @@ -92,6 +95,9 @@ class MainExeStage resultOutReg.bits := out val peer = io.peer.get + peer.commitFtqPort := (if (Param.exeFeedBackFtqDelay) RegNext(RegNext(out.commitFtqPort)) + else RegNext(out.commitFtqPort)) + // Fallback // ALU module val alu = Module(new Alu) @@ -308,7 +314,7 @@ class MainExeStage isDbarBlockingReg := false.B } - val fallThroughPc = selectedIn.branchInfo.pc + 4.U + val fallThroughPc = selectedIn.branchInfo.fallThroughPc switch(selectedIn.exeSel) { is(Sel.logic) { @@ -413,25 +419,35 @@ class MainExeStage val branchSetPort = io.peer.get.branchSetPort // branch set - branchSetPort := BackendRedirectPcNdPort.default + branchSetPort := DontCare + branchSetPort.en := false.B val feedbackFtq = io.peer.get.feedbackFtq - val jumpBranchInfo = WireDefault(alu.io.result.jumpBranchInfo) - val inFtqInfo = WireDefault(selectedIn.instInfo.ftqInfo) + val jumpBranchInfo = alu.io.result.jumpBranchInfo + val inFtqInfo = selectedIn.instInfo.ftqInfo val ftqQueryPc = selectedIn.branchInfo.predictJumpAddr // mis predict val branchDirectionMispredict = jumpBranchInfo.en ^ inFtqInfo.predictBranch + // val jumpAddr = Mux( + // selectedIn.exeOp === ExeInst.Op.jirl, + // selectedIn.leftOperand , + // DontCare + // ) val branchTargetMispredict = ( jumpBranchInfo.en && inFtqInfo.predictBranch && - jumpBranchInfo.pcAddr =/= ftqQueryPc + Mux( + selectedIn.exeOp === ExeInst.Op.jirl, + selectedIn.leftOperand =/= selectedIn.branchInfo.predictSubImm, + !selectedIn.branchInfo.immPredictCorrect + ) ) || ( !jumpBranchInfo.en && !inFtqInfo.predictBranch && inFtqInfo.isLastInBlock && - fallThroughPc =/= ftqQueryPc + !selectedIn.branchInfo.fallThroughPredictCorrect ) // is branch @@ -448,36 +464,58 @@ class MainExeStage out.wb.instInfo.ftqCommitInfo.targetMispredict.get := branchTargetMispredict && isBranchInst } - branchSetPort.en := isRedirect && !branchBlockingReg && !isDbarBlockingReg + val isBlocking = branchBlockingReg || isDbarBlockingReg + + branchSetPort.en := isRedirect && !isBlocking branchSetPort.ftqId := selectedIn.instInfo.ftqInfo.ftqId + val jumpAddr = Mux( + selectedIn.exeOp === ExeInst.Op.jirl, + selectedIn.leftOperand + selectedIn.jumpBranchAddr, + selectedIn.jumpBranchAddr + ) + branchSetPort.pcAddr := Mux( jumpBranchInfo.en, - jumpBranchInfo.pcAddr, + jumpAddr, fallThroughPc ) if (Param.exeFeedBackFtqDelay) { - feedbackFtq.ftqMetaUpdateValid := (RegNext(isBranchInst, false.B) || + feedbackFtq.commitBundle.ftqMetaUpdateValid := (RegNext(isBranchInst, false.B) || (RegNext(!isBranchInst, false.B) && RegNext(inFtqInfo.predictBranch, false.B))) && RegNext( - !branchBlockingReg, + !isBlocking, false.B ) - feedbackFtq.ftqMetaUpdateFtbDirty := RegNext(branchTargetMispredict, false.B) || + feedbackFtq.commitBundle.ftqMetaUpdateFtbDirty := RegNext(branchTargetMispredict, false.B) || (RegNext(jumpBranchInfo.en, false.B) && !RegNext(inFtqInfo.isLastInBlock, false.B)) || (RegNext(!isBranchInst, false.B) && RegNext(inFtqInfo.predictBranch, false.B)) - feedbackFtq.ftqUpdateMetaId := RegNext(inFtqInfo.ftqId, 0.U) - feedbackFtq.ftqMetaUpdateJumpTarget := RegNext(jumpBranchInfo.pcAddr, 0.U) - feedbackFtq.ftqMetaUpdateFallThrough := RegNext(fallThroughPc, 0.U) + feedbackFtq.commitBundle.ftqUpdateMetaId := RegNext(inFtqInfo.ftqId, 0.U) + feedbackFtq.commitBundle.ftqMetaUpdateJumpTarget := RegNext(jumpAddr, 0.U) + feedbackFtq.commitBundle.ftqMetaUpdateFallThrough := RegNext(fallThroughPc, 0.U) + + feedbackFtq.fixGhrBundle.isExeFixValid := RegNext(isRedirect && !isBlocking, false.B) + feedbackFtq.fixGhrBundle.exeFixFirstBrTaken := RegNext( + jumpBranchInfo.en && !inFtqInfo.isPredictValid && !isBlocking && isBranchInst, + false.B + ) // TODO predictValid + feedbackFtq.fixGhrBundle.exeFixJumpError := RegNext(isRedirect && !isBlocking, false.B) + feedbackFtq.fixGhrBundle.exeFixIsTaken := RegNext(jumpBranchInfo.en, false.B) } else { - feedbackFtq.ftqMetaUpdateValid := (isBranchInst || (!isBranchInst && inFtqInfo.predictBranch)) && !branchBlockingReg - feedbackFtq.ftqMetaUpdateFtbDirty := branchTargetMispredict || + feedbackFtq.commitBundle.ftqMetaUpdateValid := (isBranchInst || (!isBranchInst && inFtqInfo.predictBranch)) && !branchBlockingReg + feedbackFtq.commitBundle.ftqMetaUpdateFtbDirty := branchTargetMispredict || (jumpBranchInfo.en && !inFtqInfo.isLastInBlock) || (!isBranchInst && inFtqInfo.predictBranch) - feedbackFtq.ftqUpdateMetaId := inFtqInfo.ftqId - feedbackFtq.ftqMetaUpdateJumpTarget := jumpBranchInfo.pcAddr - feedbackFtq.ftqMetaUpdateFallThrough := fallThroughPc + feedbackFtq.commitBundle.ftqUpdateMetaId := inFtqInfo.ftqId + feedbackFtq.commitBundle.ftqMetaUpdateJumpTarget := jumpAddr + feedbackFtq.commitBundle.ftqMetaUpdateFallThrough := fallThroughPc + + feedbackFtq.fixGhrBundle.isExeFixValid := isRedirect && !isBlocking + feedbackFtq.fixGhrBundle.exeFixFirstBrTaken := + jumpBranchInfo.en && !inFtqInfo.isPredictValid && !isBlocking && isBranchInst + feedbackFtq.fixGhrBundle.exeFixIsTaken := jumpBranchInfo.en + feedbackFtq.fixGhrBundle.exeFixJumpError := isRedirect && !isBlocking } // out.wb.instInfo.ftqCommitInfo.isBranchSuccess := jumpBranchInfo.en diff --git a/src/src/pipeline/simple/SimpleExeStage.scala b/src/src/pipeline/simple/SimpleExeStage.scala index bffcf54b..125978ee 100644 --- a/src/src/pipeline/simple/SimpleExeStage.scala +++ b/src/src/pipeline/simple/SimpleExeStage.scala @@ -4,7 +4,7 @@ import chisel3._ import chisel3.util._ import spec.ExeInst.Sel import common.BaseStage -import execution.Alu +import pipeline.simple.execution.Alu import pipeline.simple.ExeNdPort import pipeline.simple.bundles.WbNdPort import pipeline.simple.bundles.RegWakeUpNdPort diff --git a/src/src/pipeline/simple/bundles/MainExeBranchInfoBundle.scala b/src/src/pipeline/simple/bundles/MainExeBranchInfoBundle.scala index 383b08ba..0d23571d 100644 --- a/src/src/pipeline/simple/bundles/MainExeBranchInfoBundle.scala +++ b/src/src/pipeline/simple/bundles/MainExeBranchInfoBundle.scala @@ -4,8 +4,11 @@ import chisel3._ import spec._ class MainExeBranchInfoBundle extends Bundle { - val isBranch = Bool() - val branchType = UInt(Param.BPU.BranchType.width.W) - val pc = UInt(Width.Reg.data) - val predictJumpAddr = UInt(Width.Reg.data) + val isBranch = Bool() + val branchType = UInt(Param.BPU.BranchType.width.W) + val fallThroughPc = UInt(Width.Reg.data) + val predictSubImm = UInt(Width.Reg.data) + val predictJumpAddr = UInt(Width.Reg.data) + val fallThroughPredictCorrect = Bool() + val immPredictCorrect = Bool() } diff --git a/src/src/pipeline/simple/id/IssueQueue.scala b/src/src/pipeline/simple/id/IssueQueue.scala index 1def528d..59c97cd9 100644 --- a/src/src/pipeline/simple/id/IssueQueue.scala +++ b/src/src/pipeline/simple/id/IssueQueue.scala @@ -133,11 +133,16 @@ class IssueQueue( } // reservation station enqueue bits - mainRSEnqPort.bits.mainExeBranchInfo.pc := io.ins.head.bits.pc - io.queryPcPort.ftqId := io.ins.head.bits.instInfo.ftqInfo.ftqId + 1.U - mainRSEnqPort.bits.mainExeBranchInfo.predictJumpAddr := io.queryPcPort.pc - mainRSEnqPort.bits.mainExeBranchInfo.isBranch := io.ins.head.bits.decode.info.isBranch - mainRSEnqPort.bits.mainExeBranchInfo.branchType := io.ins.head.bits.decode.info.branchType + val fallThroughPc = io.ins.head.bits.pc + 4.U + val predictPc = io.queryPcPort.pc + mainRSEnqPort.bits.mainExeBranchInfo.fallThroughPc := fallThroughPc + mainRSEnqPort.bits.mainExeBranchInfo.fallThroughPredictCorrect := fallThroughPc === predictPc + mainRSEnqPort.bits.mainExeBranchInfo.immPredictCorrect := io.ins.head.bits.decode.info.jumpBranchAddr === predictPc + mainRSEnqPort.bits.mainExeBranchInfo.predictSubImm := predictPc - io.ins.head.bits.decode.info.jumpBranchAddr + io.queryPcPort.ftqId := io.ins.head.bits.instInfo.ftqInfo.ftqId + 1.U + mainRSEnqPort.bits.mainExeBranchInfo.predictJumpAddr := predictPc + mainRSEnqPort.bits.mainExeBranchInfo.isBranch := io.ins.head.bits.decode.info.isBranch + mainRSEnqPort.bits.mainExeBranchInfo.branchType := io.ins.head.bits.decode.info.branchType rsEnqPorts.lazyZip(io.ins).lazyZip(io.regReadPorts).zipWithIndex.foreach { case ((rs, in, readRes), index) => diff --git a/src/src/pipeline/simple/id/IssueStage.scala b/src/src/pipeline/simple/id/IssueStage.scala deleted file mode 100644 index f4c19692..00000000 --- a/src/src/pipeline/simple/id/IssueStage.scala +++ /dev/null @@ -1,352 +0,0 @@ -package pipeline.simple.id - -import chisel3._ -import chisel3.util._ -import common.DistributedQueue -import common.bundles.BackendRedirectPcNdPort -import control.enums.ExceptionPos -import pipeline.common.bundles.{FetchInstInfoBundle, InstQueueEnqNdPort, PcInstBundle} -import pipeline.simple.decode._ -import pipeline.simple.decode.bundles._ -import spec._ -import frontend.bundles.QueryPcBundle -import pipeline.simple.bundles._ -import common.MultiQueue -import pipeline.simple.id.FetchInstDecodeNdPort -import pipeline.simple.MainExeNdPort -import pipeline.simple.ExeNdPort -import utils.MultiMux1 -import pipeline.simple.pmu.bundles.PmuDispatchInfoBundle - -// assert: enqueuePorts总是最低的几位有效 -class IssueStage( - issueNum: Int = Param.issueInstInfoMaxNum, - pipelineNum: Int = Param.pipelineNum) - extends Module { - val io = IO(new Bundle { - val isFrontendFlush = Input(Bool()) - val isBackendFlush = Input(Bool()) - - val enqueuePorts = Vec( - issueNum, - Flipped(Decoupled(new FetchInstInfoBundle)) - ) - - // `InstQueue` -> `IssueStage` - val dequeuePorts = new Bundle { - val mainExePort = Decoupled(new MainExeNdPort) - val simpleExePorts = Vec(pipelineNum - 1, Decoupled(new ExeNdPort)) - } - - val idleBlocking = Input(Bool()) - val hasInterrupt = Input(Bool()) - - val robIdRequests = Vec(issueNum, Flipped(new RobRequestPort)) - - val queryPcPort = Flipped(new QueryPcBundle) - - val regReadPorts = Vec(Param.issueInstInfoMaxNum, Vec(Param.regFileReadNum, Flipped(new RegReadPort))) - val occupyPorts = Output(Vec(Param.issueInstInfoMaxNum, new RegOccupyNdPort)) - - val wakeUpPorts = Input(Vec(pipelineNum + 1, new RegWakeUpNdPort)) - - val plv = Input(UInt(2.W)) - - val pmu_dispatchInfos = Option.when(Param.usePmu)(Output(Vec(Param.issueInstInfoMaxNum, new PmuDispatchInfoBundle))) - }) - require(issueNum == pipelineNum) - - private val rsLength = 2 - - val isIdle = RegInit(false.B) - when(io.hasInterrupt) { - isIdle := false.B - }.elsewhen(io.idleBlocking) { - isIdle := true.B - } - - // Decode - val decodeInstInfos = VecInit(io.enqueuePorts.map(_.bits)) - - // Select a decoder - - val decoders = Seq.fill(issueNum)(Module(new DecodeUnit)) - - decoders.zip(decodeInstInfos).foreach { - case (decoder, decodeInstInfo) => - decoder.io.in := decodeInstInfo - } - - val selectedDecoders = decoders.map(_.io.out) - - val mainRS = Module( - new MultiQueue(rsLength, 1, 1, new MainRSBundle, 0.U.asTypeOf(new MainRSBundle), writeFirst = false) - ) - val simpleRSs = Seq.fill(pipelineNum - 1)( - Module( - new MultiQueue(rsLength, 1, 1, new RSBundle, 0.U.asTypeOf(new RSBundle), writeFirst = false) - ) - ) - - val mainRSEnqPort = mainRS.io.enqueuePorts.head - val simpleRSEnqPorts = simpleRSs.map(_.io.enqueuePorts.head) - - mainRS.io.setPorts.zip(mainRS.io.elems).foreach { - case (set, elem) => - set.bits := elem - set.valid := true.B - } - mainRS.io.isFlush := io.isBackendFlush - simpleRSs.zip(simpleRSEnqPorts).foreach { - case (rs, enq) => - rs.io.setPorts.zip(rs.io.elems).foreach { - case (set, elem) => - set.bits := elem - set.valid := true.B - } - rs.io.isFlush := io.isBackendFlush - } - - val rsEnqPorts = Seq(mainRSEnqPort) ++ simpleRSEnqPorts - val rss = Seq(mainRS) ++ simpleRSs - - // reg read - io.regReadPorts.zip(rsEnqPorts).foreach { - case (readPorts, rsEnqPort) => - readPorts.zip(rsEnqPort.bits.decodePort.decode.info.gprReadPorts).foreach { - case (dst, src) => - dst.addr := src.addr - } - } - - // occupy - io.occupyPorts.lazyZip(rsEnqPorts).lazyZip(io.robIdRequests).foreach { - case (occupy, enq, robIdReq) => - occupy.en := enq.valid && enq.ready && enq.bits.decodePort.decode.info.gprWritePort.en - occupy.addr := enq.bits.decodePort.decode.info.gprWritePort.addr - occupy.robId := robIdReq.result.bits - } - - val isBlockDequeueReg = RegInit(false.B) - when(io.isBackendFlush) { - isBlockDequeueReg := false.B - }.elsewhen( - io.isFrontendFlush - || ( - rsEnqPorts.map { port => - port.valid && port.ready && ( - port.bits.decodePort.decode.info.needRefetch || - port.bits.decodePort.instInfo.exceptionPos =/= ExceptionPos.none - ) - }.reduce(_ || _) - ) - ) { - isBlockDequeueReg := true.B - } - - // rob id request - - rsEnqPorts.lazyZip(io.enqueuePorts).lazyZip(io.robIdRequests).zipWithIndex.foreach { - case ((dst, src, robIdReq), idx) => - src.ready := dst.ready - dst.valid := src.valid - - robIdReq.request.valid := src.valid && src.ready - robIdReq.request.bits.pcAddr := src.bits.pcAddr - robIdReq.request.bits.inst := src.bits.inst - - // block - when( - isBlockDequeueReg || - isIdle || - // io.isFrontendFlush || - !robIdReq.result.valid - ) { - dst.valid := false.B - src.ready := false.B - } - - rsEnqPorts.take(idx).foreach { prev => - val prevGprWrite = prev.bits.decodePort.decode.info.gprWritePort - - // data dependence - // dst.bits.decodePort.decode.info.gprReadPorts.foreach { r => - // when(r.en && prevGprWrite.en && r.addr === prevGprWrite.addr) { - // dst.valid := false.B - // src.ready := false.B - // } - // } - - // issue in order - when(!(prev.ready && prev.valid)) { - dst.valid := false.B - src.ready := false.B - } - } - - // should issue in main - if (idx != 0) { - when(dst.bits.decodePort.decode.info.isIssueMainPipeline) { - dst.valid := false.B - src.ready := false.B - } - } - } - - // reservation station enqueue bits - mainRSEnqPort.bits.mainExeBranchInfo.pc := decodeInstInfos.head.pcAddr - io.queryPcPort.ftqId := decodeInstInfos.head.ftqInfo.ftqId + 1.U - mainRSEnqPort.bits.mainExeBranchInfo.predictJumpAddr := io.queryPcPort.pc - mainRSEnqPort.bits.mainExeBranchInfo.isBranch := decoders.head.io.out.info.isBranch - mainRSEnqPort.bits.mainExeBranchInfo.branchType := decoders.head.io.out.info.branchType - - rsEnqPorts - .lazyZip(selectedDecoders) - .lazyZip(decodeInstInfos) - .zipWithIndex - .foreach { - case ( - ( - rs, - selectedDecoder, - decodeInstInfo - ), - index - ) => - val robIdReq = io.robIdRequests(index) - - val outReadResults = io.regReadPorts(index) - - rs.bits.regReadResults.lazyZip(outReadResults).lazyZip(selectedDecoder.info.gprReadPorts).foreach { - case (dst, readRes, decodeRead) => - dst.valid := !(decodeRead.en && !readRes.data.valid) - dst.bits := readRes.data.bits - - // data dependence - selectedDecoders.take(index).zipWithIndex.foreach { - case (prev, prevIdx) => - when(decodeRead.en && prev.info.gprWritePort.en && prev.info.gprWritePort.addr === decodeRead.addr) { - dst.valid := false.B - dst.bits := io.robIdRequests(prevIdx).result.bits - } - } - } - - val outInstInfo = rs.bits.decodePort.instInfo - - rs.bits.decodePort.decode := selectedDecoder - - outInstInfo := InstInfoNdPort.default - - val isMatched = selectedDecoder.isMatched - outInstInfo.isValid := true.B - outInstInfo.isCsrWrite := selectedDecoder.info.csrWriteEn - outInstInfo.exeOp := selectedDecoder.info.exeOp - outInstInfo.isTlb := selectedDecoder.info.isTlb - outInstInfo.needRefetch := selectedDecoder.info.needRefetch - outInstInfo.ftqInfo := decodeInstInfo.ftqInfo - - outInstInfo.forbidParallelCommit := selectedDecoder.info.needRefetch - - outInstInfo.exceptionPos := ExceptionPos.none - outInstInfo.exceptionRecord := DontCare - when(io.hasInterrupt) { - outInstInfo.exceptionPos := ExceptionPos.frontend - outInstInfo.exceptionRecord := Csr.ExceptionIndex.int - }.elsewhen(decodeInstInfo.exceptionValid) { - outInstInfo.exceptionPos := ExceptionPos.frontend - outInstInfo.exceptionRecord := decodeInstInfo.exception - }.elsewhen(!isMatched) { - outInstInfo.exceptionPos := ExceptionPos.frontend - outInstInfo.exceptionRecord := Csr.ExceptionIndex.ine - }.elsewhen( - io.plv === 3.U && - selectedDecoder.info.isPrivilege - ) { - outInstInfo.exceptionPos := ExceptionPos.frontend - outInstInfo.exceptionRecord := Csr.ExceptionIndex.ipe - } - - outInstInfo.robId := robIdReq.result.bits - - if (Param.isDiffTest) { - outInstInfo.pc.get := decodeInstInfo.pcAddr - outInstInfo.inst.get := decodeInstInfo.inst - } - - } - - // wake up - rss.foreach { rs => - rs.io.setPorts.zip(rs.io.elems).foreach { - case (set, elem) => - set.bits.regReadResults.lazyZip(elem.regReadResults).lazyZip(elem.decodePort.decode.info.gprReadPorts).foreach { - case (setRegData, elemData, decodeRead) => - setRegData := elemData - - val mux = Module(new MultiMux1(pipelineNum + 1, UInt(Width.Reg.data), zeroWord)) - mux.io.inputs.zip(io.wakeUpPorts).foreach { - case (input, wakeUp) => - input.valid := wakeUp.en && - wakeUp.addr === decodeRead.addr && - wakeUp.robId(Param.Width.Rob._id - 1, 0) === elemData.bits(Param.Width.Rob._id - 1, 0) - input.bits := wakeUp.data - } - when(!elemData.valid && mux.io.output.valid) { - setRegData.valid := true.B - setRegData.bits := mux.io.output.bits - } - } - } - } - - // output - io.dequeuePorts.mainExePort.bits.branchInfo := mainRS.io.dequeuePorts.head.bits.mainExeBranchInfo - (Seq(io.dequeuePorts.mainExePort) ++ io.dequeuePorts.simpleExePorts).zip(rss.map(_.io.dequeuePorts.head)).foreach { - case (out, rs) => - val regReadResults = WireDefault(rs.bits.regReadResults) - - regReadResults.lazyZip(rs.bits.regReadResults).lazyZip(rs.bits.decodePort.decode.info.gprReadPorts).foreach { - case (setRegData, elemData, decodeRead) => - val mux = Module(new MultiMux1(pipelineNum + 1, UInt(Width.Reg.data), zeroWord)) - mux.io.inputs.zip(io.wakeUpPorts).foreach { - case (input, wakeUp) => - input.valid := wakeUp.en && - // wakeUp.addr === decodeRead.addr && - wakeUp.robId(Param.Width.Rob._id - 1, 0) === elemData.bits(Param.Width.Rob._id - 1, 0) - input.bits := wakeUp.data - } - when(!elemData.valid && mux.io.output.valid) { - setRegData.valid := true.B - setRegData.bits := mux.io.output.bits - } - - } - - val deqEn = regReadResults.map(_.valid).reduce(_ && _) - out.valid := rs.valid && deqEn - rs.ready := out.ready && deqEn - out.bits.exeOp := rs.bits.decodePort.decode.info.exeOp - out.bits.exeSel := rs.bits.decodePort.decode.info.exeSel - out.bits.gprWritePort := rs.bits.decodePort.decode.info.gprWritePort - out.bits.instInfo := rs.bits.decodePort.instInfo - out.bits.jumpBranchAddr := rs.bits.decodePort.decode.info.jumpBranchAddr - out.bits.leftOperand := regReadResults(0).bits - out.bits.rightOperand := Mux( - rs.bits.decodePort.decode.info.isHasImm, - rs.bits.decodePort.decode.info.imm, - regReadResults(1).bits - ) - } - - if (Param.usePmu) { - val pmuInfos = io.pmu_dispatchInfos.get - pmuInfos.lazyZip(rss).lazyZip(Seq(io.dequeuePorts.mainExePort) ++ io.dequeuePorts.simpleExePorts).foreach { - case (pmu, rs, out) => - pmu.isIssueInst := out.valid && out.ready && !io.isBackendFlush - pmu.bubbleFromFrontend := !rs.io.dequeuePorts.head.valid && !io.isBackendFlush && !isBlockDequeueReg - pmu.bubbleFromBackend := out.valid && !out.ready && !io.isBackendFlush - pmu.bubbleFromDataDependence := rs.io.dequeuePorts.head.valid && !out.valid && !io.isBackendFlush - } - } -} diff --git a/src/src/spec/Param.scala b/src/src/spec/Param.scala index 52f9f871..21676fb1 100644 --- a/src/src/spec/Param.scala +++ b/src/src/spec/Param.scala @@ -15,18 +15,19 @@ object Param { val usePmu = false || isChiplab // 性能计数器 - val isDiffTest = false || isChiplab - val isOutOfOrderIssue = true - val isFullUncachedPatch = true - val isMmioDelay = false || isChiplab || isFullFpga - val isNoPrivilege = false || isReleasePackage - val isCacheOnPg = false - val isForcedCache = false || isReleasePackage - val isForcedUncached = false - val isBranchPredict = true - val isPredecode = true - val isOverideRas = true - val isFTBupdateRet = true + val isDiffTest = false || isChiplab + val isOutOfOrderIssue = true + val isFullUncachedPatch = true + val isMmioDelay = false || isChiplab || isFullFpga + val isNoPrivilege = false || isReleasePackage + val isCacheOnPg = false + val isForcedCache = false || isReleasePackage + val isForcedUncached = false + val isBranchPredict = true + val isPredecode = true + val isOverideRas = true + val isFTBupdateRet = true + val isSpeculativeGlobalHistory = false val isWritebackPassThroughWakeUp = true val canIssueSameWbRegInsts = true @@ -184,15 +185,16 @@ object Param { object TagePredictor { // ComponentTableDepth // predictor num = tagComponentNum + 1 (BasePredictor) - val ghrLength = 140 val tagComponentTagWidth = 12 val tagComponentNum = 4 - val componentHistoryLength = Seq(0, 11, 23, 53, 131, 230, 479, 1012) // ipc 0.6452 + val componentHistoryLength = Seq(0, 11, 23, 53, 112) val componentTableDepth = Seq(8192, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024, 1024) - val componentCtrWidth = Seq(2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3) - val componentUsefulWidth = Seq(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, - 1) // tage paper suggest 2, but in order to save source, we use 1 bit(won't decrease ipc) + val componentCtrWidth = Seq(2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3) + val componentUsefulWidth = Seq(0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1) + // tage paper suggest 2-bits useful, but in order to save source, we use 1 bit(won't decrease ipc) + val ghrLength = componentHistoryLength(tagComponentNum) + ftqSize + val ghrPtrWidth = log2Ceil(ghrLength) } object FTB { @@ -217,6 +219,24 @@ object Param { def width = log2Ceil(count + 1) } + + object GhrFixType { + var count = 0 + + private def next = { + count += 1 + count.U + } + + val commitRecover = 0.U + val exeFixJumpError = next + val exeUpdateJump = next + val exeRecover = next + val decodeUpdateJump = next + val decodeBrExcp = next + + def width = log2Ceil(count + 1) + } } object SimpleFetchStageState extends ChiselEnum {