diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/BaseCPU.py --- a/src/cpu/BaseCPU.py Sun Apr 18 21:33:59 2010 -0700 +++ b/src/cpu/BaseCPU.py Wed Apr 28 19:24:20 2010 +0800 @@ -37,8 +37,17 @@ from ExeTracer import ExeTracer from MemObject import MemObject +# Dec. 2009, Gou Pengfei +if buildEnv['TARGET_ISA'] == 'trips': + from EdgeInstTracer import EdgeInstTracer + from EdgeExeTracer import EdgeExeTracer + default_tracer = ExeTracer() +# Dec. 2009, Gou Pengfei +if buildEnv['TARGET_ISA'] == 'trips': + default_edge_tracer = EdgeExeTracer() + if buildEnv['TARGET_ISA'] == 'alpha': from AlphaTLB import AlphaDTB, AlphaITB if buildEnv['FULL_SYSTEM']: @@ -63,6 +72,10 @@ from PowerTLB import PowerTLB if buildEnv['FULL_SYSTEM']: from PowerInterrupts import PowerInterrupts + # Dec. 2009, phoenix + # For TRIPS +elif buildEnv['TARGET_ISA'] == 'trips': + from TripsTLB import TripsDTB, TripsITB class BaseCPU(MemObject): type = 'BaseCPU' @@ -127,6 +140,12 @@ if buildEnv['FULL_SYSTEM']: interrupts = Param.PowerInterrupts( PowerInterrupts(), "Interrupt Controller") + # Dec. 2009, phoenix + # For TRIPS + elif buildEnv['TARGET_ISA'] == 'trips': + UnifiedTLB = Param.Bool(True, "Is this a Unified TLB?") + dtb = Param.TripsTLB(TripsDTB(), "Data TLB") + itb = Param.TripsTLB(TripsITB(), "Instruction TLB") else: print "Don't know what TLB to use for ISA %s" % \ buildEnv['TARGET_ISA'] @@ -151,6 +170,10 @@ tracer = Param.InstTracer(default_tracer, "Instruction tracer") + # Dec. 2009, Gou Pengfei + if buildEnv['TARGET_ISA'] == 'trips': + edge_tracer = Param.EdgeInstTracer(default_edge_tracer, "Edge Instruction tracer") + _mem_ports = [] if buildEnv['TARGET_ISA'] == 'x86' and buildEnv['FULL_SYSTEM']: _mem_ports = ["itb.walker.port", diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/SConscript --- a/src/cpu/SConscript Sun Apr 18 21:33:59 2010 -0700 +++ b/src/cpu/SConscript Wed Apr 28 19:24:20 2010 +0800 @@ -61,6 +61,29 @@ mem_comp_sig_template = ''' virtual Fault completeAcc(uint8_t *data, %s *xc, Trace::InstRecord *traceData) const { panic("Not defined!"); return NoFault; M5_DUMMY_RETURN }; ''' +# Dec. 2009, Gou Pengfei +# Template for Edge execute() signature +edge_exec_sig_template = ''' +virtual Fault execute(%(type)s *xc, Trace::EdgeInstRecord *traceData) const = 0; +virtual Fault eaComp(%(type)s *xc, Trace::EdgeInstRecord *traceData) const +{ panic("eaComp not defined!"); M5_DUMMY_RETURN }; +virtual Fault initiateAcc(%(type)s *xc, Trace::EdgeInstRecord *traceData) const +{ panic("initiateAcc not defined!"); M5_DUMMY_RETURN }; +virtual Fault completeAcc(Packet *pkt, %(type)s *xc, + Trace::EdgeInstRecord *traceData) const +{ panic("completeAcc not defined!"); M5_DUMMY_RETURN }; +''' + +edge_mem_ini_sig_template = ''' +virtual Fault eaComp(%(type)s *xc, Trace::InstRecord *traceData) const +{ panic("eaComp not defined!"); M5_DUMMY_RETURN }; +virtual Fault initiateAcc(%s *xc, Trace::InstRecord *traceData) const { panic("Not defined!"); M5_DUMMY_RETURN }; +''' + +edge_mem_comp_sig_template = ''' +virtual Fault completeAcc(uint8_t *data, %s *xc, Trace::EdgeInstRecord *traceData) const { panic("Not defined!"); return NoFault; M5_DUMMY_RETURN }; +''' + # Generate a temporary CPU list, including the CheckerCPU if # it's enabled. This isn't used for anything else other than StaticInst @@ -77,10 +100,18 @@ print >> f, ''' #ifndef __CPU_STATIC_INST_EXEC_SIGS_HH__ #define __CPU_STATIC_INST_EXEC_SIGS_HH__ -''' - for cpu in temp_cpu_list: - xc_type = CpuModel.dict[cpu].strings['CPU_exec_context'] - print >> f, exec_sig_template % { 'type' : xc_type } +''' + # + # Dec. 2009, Gou Pengfei + # + if 'SimpleEdgeCPU' in temp_cpu_list: + for cpu in temp_cpu_list: + xc_type = CpuModel.dict[cpu].strings['CPU_exec_context'] + print >> f, edge_exec_sig_template % { 'type' : xc_type } + else: + for cpu in temp_cpu_list: + xc_type = CpuModel.dict[cpu].strings['CPU_exec_context'] + print >> f, exec_sig_template % { 'type' : xc_type } print >> f, ''' #endif // __CPU_STATIC_INST_EXEC_SIGS_HH__ ''' diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/EdgeExeTracer.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/EdgeExeTracer.py Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,36 @@ +# Copyright (c) 2007 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Gou Pengfei +# + +from m5.SimObject import SimObject +from m5.params import * +from EdgeInstTracer import EdgeInstTracer + +class EdgeExeTracer(EdgeInstTracer): + type = 'EdgeExeTracer' + cxx_class = 'Trace::EdgeExeTracer' diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/EdgeInstTracer.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/EdgeInstTracer.py Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,36 @@ +# Copyright (c) 2007 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Gou Pengfei +# + +from m5.SimObject import SimObject +from m5.params import * + +class EdgeInstTracer(SimObject): + type = 'EdgeInstTracer' + cxx_class = 'Trace::EdgeInstTracer' + abstract = True diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/FUPool.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/FUPool.py Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,40 @@ +# Copyright (c) 2006-2007 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Gou Pengfei + +from m5.SimObject import SimObject +from m5.params import * +from FuncUnit import * +from FuncUnitConfig import * + +class FUPool(SimObject): + type = 'FUPool' + FUList = VectorParam.FUDesc("list of FU's for this pool") + +class DefaultFUPool(FUPool): + FUList = [ IntALU(), IntMultDiv(), FP_ALU(), FP_MultDiv(), ReadPort(), + WritePort(), RdWrPort(), IprPort() ] diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/FuncUnitConfig.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/FuncUnitConfig.py Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,70 @@ +# Copyright (c) 2006-2007 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Gou Pengfei +# + +from m5.SimObject import SimObject +from m5.params import * +from FuncUnit import * + +class IntALU(FUDesc): + opList = [ OpDesc(opClass='IntAlu') ] + count = 16 + +class IntMultDiv(FUDesc): + opList = [ OpDesc(opClass='IntMult', opLat=3), + OpDesc(opClass='IntDiv', opLat=20, issueLat=19) ] + count=16 + +class FP_ALU(FUDesc): + opList = [ OpDesc(opClass='FloatAdd', opLat=2), + OpDesc(opClass='FloatCmp', opLat=2), + OpDesc(opClass='FloatCvt', opLat=2) ] + count = 16 + +class FP_MultDiv(FUDesc): + opList = [ OpDesc(opClass='FloatMult', opLat=4), + OpDesc(opClass='FloatDiv', opLat=12, issueLat=12), + OpDesc(opClass='FloatSqrt', opLat=24, issueLat=24) ] + count = 16 + +class ReadPort(FUDesc): + opList = [ OpDesc(opClass='MemRead') ] + count = 0 + +class WritePort(FUDesc): + opList = [ OpDesc(opClass='MemWrite') ] + count = 0 + +class RdWrPort(FUDesc): + opList = [ OpDesc(opClass='MemRead'), OpDesc(opClass='MemWrite') ] + count = 16 + +class IprPort(FUDesc): + opList = [ OpDesc(opClass='IprAccess', opLat = 3, issueLat = 3) ] + count = 1 + diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/SConscript --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/SConscript Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,97 @@ +# -*- mode:python -*- + +# Copyright (c) 2009-2010 HIT Microelectronic Center +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Gou Pengfei +# + +Import('*') + +print env['CPU_MODELS'] + +if 'SimpleEdgeCPU' in env['CPU_MODELS']: + Source('cpu.cc') + Source('static_inst.cc') + Source('fetch.cc') + Source('thread_context.cc') + Source('dyn_inst.cc') + Source('base_dyn_inst.cc') + Source('cpu_builder.cc') + Source('exetrace.cc') + Source('base_block.cc') + Source('block.cc') + Source('map.cc') + Source('execute.cc') + Source('commit.cc') + Source('lsq.cc') + Source('lsq_unit.cc') + Source('inst_queue.cc') + Source('fu_pool.cc') + Source('mem_dep_unit.cc') + Source('store_set.cc') + Source('rob.cc') + Source('bpred_unit.cc') + + SimObject('SimpleEdgeCPU.py') + SimObject('EdgeExeTracer.py') + SimObject('EdgeInstTracer.py') + SimObject('FUPool.py') + SimObject('FuncUnitConfig.py') + + TraceFlag('EdgeFetch') + TraceFlag('EdgeCPU') + TraceFlag('EdgeReg') + TraceFlag('EdgeFetchTest') + TraceFlag('EdgeMap') + TraceFlag('EdgeBlock') + TraceFlag('EdgeExe') + TraceFlag('EdgeLSQ') + TraceFlag('EdgeLSQUnit') + TraceFlag('EdgeLSQWriteBack') + TraceFlag('EdgeIQ') + TraceFlag('EdgeCommit') + TraceFlag('EdgeCommitResult') + TraceFlag('EdgeCommitResultSym') + TraceFlag('EdgeMemDepUnit') + TraceFlag('EdgeROB') + TraceFlag('EdgeRegDep') + TraceFlag('EdgeRAS') + TraceFlag('EdgePredUnit') + TraceFlag('StoreSet') + TraceFlag('EdgeBlockOutput') + TraceFlag('EdgeBlockCount') + + TraceFlag('EdgeIQTest') + + CompoundFlag('EdgePred', ['EdgeRAS', 'EdgePredUnit']) + + CompoundFlag('EdgeCPUAll', ['EdgeFetch', 'EdgeCPU', 'EdgeReg', 'EdgeMap', 'EdgeBlock', + 'EdgeExe', 'EdgeLSQ', 'EdgeLSQUnit', 'EdgeIQ', 'EdgeCommit', 'EdgeMemDepUnit', + 'EdgeROB', 'StoreSet', 'EdgeLSQWriteBack', 'EdgeCommitResult', 'EdgeRegDep', + 'EdgeRAS', 'EdgePredUnit']) + + diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/SConsopts --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/SConsopts Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,37 @@ +# -*- mode:python -*- + +# Copyright (c) 2009-2010 HIT Microelectronic Center +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Gou Pengfei +# + +Import('*') + +CpuModel('SimpleEdgeCPU', 'simple_edge_cpu_exec.cc', + '#include "cpu/edge/isa_specific.hh"', + { 'CPU_exec_context': 'EdgeDynInst' }, + default=True) diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/SimpleEdgeCPU.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/SimpleEdgeCPU.py Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,127 @@ +# Copyright (c) 2009-2010 HIT Microelectronic Center +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Gou Pengfei +# + +from m5.defines import buildEnv +from m5.params import * +from m5.proxy import * +from BaseCPU import BaseCPU +from FUPool import * + +class SimpleEdgeCPU(BaseCPU): + type = 'SimpleEdgeCPU' + activity = Param.Unsigned(0, "Initial count") + + fuPool = Param.FUPool(DefaultFUPool(), "Functional Unit pool") + + cachePorts = Param.Unsigned(200, "Cache Ports") + icache_port = Port("Instruction Port") + dcache_port = Port("Data Port") + _mem_ports = BaseCPU._mem_ports + ['icache_port', 'dcache_port'] + + mapToFetchDelay = Param.Unsigned(1 ,"Map to fetch delay") + executeToFetchDelay = Param.Unsigned(1, "Execute to fetch " + "delay") + commitToFetchDelay = Param.Unsigned(1, "Commit to fetch delay") + executeToMapDelay = Param.Unsigned(1, "Execute to map delay") + fetchToMapDelay = Param.Unsigned(1,"Fetch to map delay") + + commitToExecuteDelay = Param.Unsigned(1, "Commit to execute delay") + commitToMapDelay = Param.Unsigned(1, "Commit to map delay") + issueToExecuteDelay = Param.Unsigned(1, "Issu to execute delay") + mapToExecuteDelay = Param.Unsigned(2, "Map to execute delay") + + # This is the size of a inst chunk in TRIPS architecture + fetchWidth = Param.Unsigned(32, "Fetch width") + mapWidth = Param.Unsigned(1,"Map width") # This is in blocks + issueWidth = Param.Unsigned(16, "Issue width") # This is in insts + dispatchWidth = Param.Unsigned(160, "Dispatch width") # This is in insts + wbWidth = Param.Unsigned(16, "Write back width") + wbDepth = Param.Unsigned(16, "Write back depth") + + executeToCommitDelay = Param.Unsigned(1, "Execute to commit " + "delay") + mapToROBDelay = Param.Unsigned(1, "Map to reorder buffer delay") + commitWidth = Param.Unsigned(1, "Commit width") + squashWidth = Param.Unsigned(8, "Squash width") + + trapLatency = Param.Tick(1, "Trap latency") + + fetchTrapLatency = Param.Tick(1, "Fetch trap latency") + + LQEntries = Param.Unsigned(32, "Number of load queue entries") + SQEntries = Param.Unsigned(32, "Number of store queue entries") + LFSTSize = Param.Unsigned(1024, "Last fetched store table size") + SSITSize = Param.Unsigned(1024, "Store set ID table size") + + numIQEntries = Param.Unsigned(160, "Number of instruction queue entries") + numROBEntries = Param.Unsigned(192, "Number of reorder buffer entries") + + numPhysIntRegs = Param.Unsigned(128, "Number of physical integer registers") + numPhysFloatRegs = Param.Unsigned(128, "Number of physical floating point " + "registers") + backComSize = Param.Unsigned(5, "Time buffer size for backwards communication") + forwardComSize = Param.Unsigned(5, "Time buffer size for forward communication") + + instShiftAmt = Param.Unsigned(2, "Number of bits to shift instructions by") + + BTBEntries = Param.Unsigned(4096,"Number of BTBEntries") + IBTBEntries = Param.Unsigned(256,"Number of IBTBEntries") + iExitPredictorEntries = Param.Unsigned(512,"Number of IExitPredictorEntries") + iExitLength = Param.Unsigned(3,"Length of iExit") + CTBEntries = Param.Unsigned(128,"Number of CTBEntries") + BTPEntries = Param.Unsigned(4096,"Number of BTPEntries") + blockShiftAmt = Param.Unsigned(7,"Number of bits to shift blocks by") + offsetLength = Param.Unsigned(13,"Length of offset") + predType = Param.String("tournament", "Branch predictor type ('local', 'tournament')") + localPredictorSize = Param.Unsigned(1024, "Size of local predictor") + localHistoryTableSize = Param.Unsigned(512, "Size of local history table") + localHistoryBits = Param.Unsigned(10, "Bits for the local history") + globalPredictorSize = Param.Unsigned(4096, "Size of global predictor") + globalHistoryBits = Param.Unsigned(12, "Bits of global history") + choiceHistoryBits = Param.Unsigned(12, "Bits of choice history") + choicePredictorSize = Param.Unsigned(4096, "Size of choice predictor") + choiceCtrBits = Param.Unsigned(2, "Bits of choice counters") + RASSize = Param.Unsigned(64,"Size of return address stack") + lsEntries = Param.Unsigned(64,"Number of LSEntries") + + + smtNumFetchingThreads = Param.Unsigned(1, "SMT Number of Fetching Threads") + smtFetchPolicy = Param.String('SingleThread', "SMT Fetch policy") + smtLSQPolicy = Param.String('Partitioned', "SMT LSQ Sharing Policy") + smtLSQThreshold = Param.Int(100, "SMT LSQ Threshold Sharing Parameter") + smtIQPolicy = Param.String('Partitioned', "SMT IQ Sharing Policy") + smtIQThreshold = Param.Int(100, "SMT IQ Threshold Sharing Parameter") + smtROBPolicy = Param.String('Partitioned', "SMT ROB Sharing Policy") + smtROBThreshold = Param.Int(100, "SMT ROB Threshold Sharing Parameter") + smtCommitPolicy = Param.String('RoundRobin', "SMT Commit Policy") + + def addPrivateSplitL1Caches(self, ic, dc): + BaseCPU.addPrivateSplitL1Caches(self, ic, dc) + self.icache.tgts_per_mshr = 20 + self.dcache.tgts_per_mshr = 20 diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/base_block.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/base_block.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,282 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Jin Yanhan + * Gou Pengfei + * + * Date: Dec. 2009 + * + */ + +#ifndef __CPU_BASE_EDGE_BLOCK_HH__ +#define __CPU_BASE_EDGE_BLOCK_HH__ + +#include +#include +#include +#include +#include + +#include "base/statistics.hh" +#include "base/fast_alloc.hh" +#include "base/refcnt.hh" +#include "config/the_isa.hh" +#include "arch/faults.hh" +#include "cpu/edge/comm.hh" +//#include "cpu/edge/static_inst.hh" + +//class EdgeCPUImpl; + +template +class BaseEdgeBlock : public FastAlloc, public RefCounted +{ + public: + /** Typedefs from Impl. */ + typedef typename Impl::CPU CPU; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::EdgeBlockPtr BlockPtr; + + /** Typedefs from ISA. */ + typedef TheISA::MachInst MachInst; + typedef TheISA::ExtMachInst ExtMachInst; + + typedef typename std::list::iterator ListIt; + typedef typename std::list::iterator BlockListIt; + typedef typename std::deque::iterator QueueIt; + + /// Block ID + typedef TheISA::BlockID BlockID; + typedef TheISA::InstID InstID; + typedef TheISA::HeaderInfo HeaderInfo; + + BaseEdgeBlock(BlockID blockID, CPU* cpu); + + ~BaseEdgeBlock(); + + std::string name() const; + + protected: + + /** Init some variables. */ + void initVars(); + + /** cpu model this block point to*/ + CPU* _cpu; + + /** Start addr of this block*/ + Addr _startPC; + + /** Thread ID */ + ThreadID _tid; + + BlockID _blockID; + //bool _speculative; + + uint32_t _numTotalInst; + uint32_t _numNopInst; + uint32_t _numControlInst; + uint32_t _numMemRefInst; + uint32_t _numStoreInst; + uint32_t _numRegWriteInst; + uint32_t _numRegReadInst; + + enum Flags { + InEdgeROB, + IsSpeculative, // Indicate speculative or not + IsCompleted, // Indicate completed or in progress + IsAcknowledged, // Indicate this block has been acknowledged by commit unit + IsCommitted, + IsSquashed, + IsExecuting, + IsFault, + IsNop, + CanCommit, + NeedSyscall, + NumFlags + }; + + std::bitset flags; + + /** Instruction list of this block. */ + std::list instList; + + BlockListIt blockListIt; + + struct lsidCompare { + bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const + { + return (lhs->staticInst->getLSID()) > (rhs->staticInst->getLSID()); + } + }; + + typedef std::priority_queue, lsidCompare> MemRefQueue; + + /** The container hold the ld/st insts in lsid order. */ + MemRefQueue memRefInsts; + + /** Fault class indicate the fault caused by this block. */ + Fault fault; + + #ifndef NDEBUG + + struct destRegCompare { + bool operator() (const QueueIt &lhs, const QueueIt &rhs) const + { + return ((*lhs)->getMappedDestReg(0)) > ((*rhs)->getMappedDestReg(0)); + } + }; + + typedef std::priority_queue, destRegCompare> WriteQueue; + + /** The container hold store insts for dumpping output. */ + WriteQueue writeInsts; + + struct effAddrCompare { + bool operator() (const QueueIt &lhs, const QueueIt &rhs) const + { + return ((*lhs)->effAddr) > ((*rhs)->effAddr); + } + }; + + typedef std::priority_queue, effAddrCompare> StoreQueue; + + /** The container hold write insts for dumpping output. */ + StoreQueue storeInsts; + + #endif + + public: + void setBlockListIt( BlockListIt it ) { blockListIt = it; } + BlockListIt &getBlockListIt() { return blockListIt; } + + ListIt addInst(DynInstPtr inst_ptr); + + DynInstPtr getInst(); + DynInstPtr getMemRefInst(); + + void dumpOutput(); + + void setStartPC(Addr pc); + void setTid ( ThreadID id ) { _tid = id; } + ThreadID getTid() const { return _tid;} + + Addr getStartPC(); + + uint64_t getNumInst() const; + + uint64_t getNumMemRefInst() const; + + void removeAllInsts(); + + BlockID getBlockID() const {return _blockID;} + + uint32_t getNumRegWriteInst() const {return _numRegWriteInst; } + + uint32_t getNumRegReadInst() const {return _numRegReadInst;} + + uint32_t getNumNopInst() const { return _numNopInst;} + + uint32_t getNumUsefulInst() const { return _numTotalInst - _numNopInst; } + + uint32_t getNumAllInst() const { return _numTotalInst; } + + uint32_t getNumInstButMemRef() const { return (_numTotalInst - _numMemRefInst); } + + uint32_t getNumStoreInst() const { return _numStoreInst; } + + uint32_t getNumLoadInst() const { return _numMemRefInst - _numStoreInst; } + + uint32_t getNumControlInst() const { return _numControlInst;} + + void setSquashed() { flags.set(IsSquashed);} + + void setExecuting() { flags.set(IsExecuting); } + + void setFault(Fault block_fault ) { fault = block_fault; flags.set(IsFault); } + + void setFault() { flags.set(IsFault); } + + void setCompleted() { + if ( flags[IsExecuting] ) flags.reset(IsExecuting); + flags.set(IsCompleted); + } + + void setInEdgeROB() { flags.set(InEdgeROB); } + + void clearInEdgeROB() { flags.reset(InEdgeROB); } + + void setCommitted() { flags.set(IsCommitted); } + + void setCanCommit() { flags.set(CanCommit); } + + void setNeedSyscall() { flags.set(NeedSyscall); } + + void setNop() { flags.set(IsNop); } + + bool isInEdgeROB() const { return flags[InEdgeROB];} + + bool isSpeculative() const { return flags[IsSpeculative]; } + + bool isNonSpeculative() const { return !flags[IsSpeculative];} + + bool isCompleted() const { return flags[IsCompleted];} + + bool isAcknowledged() const {return flags[IsAcknowledged]; } + + bool isSquashed() const { return flags[IsSquashed]; } + + bool isExecuting() const { return flags[IsExecuting]; } + + bool isCommitted() const { return flags[IsCommitted]; } + + bool isFault() const { return flags[IsFault]; } + + bool isNop() const { return flags[IsNop]; } + + Fault getFault() const { return fault; } + + bool readyToCommit() const { return flags[CanCommit]; } + + bool isNeedSyscall() const { return flags[NeedSyscall]; } + + void setSpeculative() { + flags.set(IsSpeculative); + } + + void setNonSpeculative() { + flags.reset(IsSpeculative); + } + + #ifndef NDEBUG + void setStoreInstIt(QueueIt &inst_it); + void setWriteInstIt(QueueIt &inst_it); + #endif + +}; + +#endif //__CPU_EDGE_BLOCK_HH__ diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/base_block.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/base_block.cc Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Jin Yanhan + * Gou Pengfei + * + * Date: Dec. 2009 + * + */ + +#include "cpu/edge/base_block_impl.hh" +#include "cpu/edge/impl.hh" + +template class BaseEdgeBlock; + diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/base_block_impl.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/base_block_impl.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,321 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Jin Yanhan + * Gou Pengfei + * + * Date: Dec. 2009 + * + */ + +#include +#include + +#include "base/types.hh" +#include "base/trace.hh" +#include "config/the_isa.hh" +#include "cpu/edge/base_block.hh" +#include "sim/byteswap.hh" +#include "base/loader/symtab.hh" + + +template +BaseEdgeBlock::BaseEdgeBlock(BlockID blockID, CPU * cpu) + : _cpu(cpu), _blockID(blockID) +{ + initVars(); +} + +template +BaseEdgeBlock::~BaseEdgeBlock() +{ + +#ifndef NDEBUG + --_cpu->blockcount; + + DPRINTF(EdgeBlockCount, + "EdgeBlock: [Bid:%lli] destroyed. Blockcount for %s = %i\n", + _blockID, _cpu->name(), _cpu->blockcount); +#endif + +} + +template +void +BaseEdgeBlock::initVars() +{ + flags.set(IsSpeculative); // Inst block is initialed as speculative. + _numRegWriteInst=0; + _numRegReadInst = 0; + _numStoreInst =0; + _numTotalInst = 0; + _numNopInst = 0; + _numMemRefInst = 0; + _numControlInst = 0; + + #ifndef NDEBUG + ++_cpu->blockcount; + + if (_cpu->blockcount > 150) { + assert(_cpu->blockcount <= 150); + } + + DPRINTF(EdgeBlockCount, + "EdgeBlock: [Bid:%lli] created. Blockcount for %s = %i\n", + _blockID, _cpu->name(), _cpu->blockcount); + #endif +} + +template +std::string +BaseEdgeBlock::name() const { + return "EdgeBlock"; +} + +template +typename BaseEdgeBlock::ListIt +BaseEdgeBlock::addInst(DynInstPtr inst_ptr) +{ + assert( _blockID == inst_ptr->getBlockID()); + + // Every inst should be added into instList including ld/st. + instList.push_back(inst_ptr); + + _numTotalInst ++; + + if ( !inst_ptr->isNop() ) { + + if ( inst_ptr->isMemRef() ) { + + // Ld/st insts will be added into this priority queue + // in lsid order. + memRefInsts.push(inst_ptr); + + DPRINTF(EdgeBlock, "Adding load/store insts[lsid:%i].\n", + inst_ptr->staticInst->getLSID() ); + + _numMemRefInst ++; + + if ( inst_ptr->isStore() ) + _numStoreInst ++; + + } else if ( inst_ptr->isControl() ) { + _numControlInst ++; + } else if ( inst_ptr->isGRegWR()) { + + if ( inst_ptr->isGRegWriteValid() ) _numRegWriteInst ++; + + if ( inst_ptr->isGRegReadValid() ) _numRegReadInst ++; + + } + + DPRINTF(EdgeBlock,"tid[%i] -- PC:%#x\n ", + _tid, inst_ptr->readPC()); + DPRINTF(EdgeBlock, "Blcok ID: %lli -- Chunk ID: %lli -- Inst ID: %lli\n", + inst_ptr->getBlockID(), inst_ptr->getChunkID(), inst_ptr->getInstID() ); + DPRINTF(EdgeBlock, "Instruction is: %s\n\n", + inst_ptr->staticInst->disassemble(inst_ptr->readPC())); + }else { + + DPRINTF(EdgeBlock, "tid[%i] -- PC:%#x NOP\n", + _tid, inst_ptr->readPC() ); + _numNopInst ++; + + } + + return (--instList.end()); + +} + +template +typename BaseEdgeBlock::DynInstPtr +BaseEdgeBlock::getInst() +{ + assert( !instList.empty() ); + DynInstPtr inst = instList.front(); + instList.pop_front(); + return inst; + +} + +template +typename BaseEdgeBlock::DynInstPtr +BaseEdgeBlock::getMemRefInst() +{ + assert(!memRefInsts.empty() ); + DynInstPtr inst = memRefInsts.top(); + memRefInsts.pop(); + return inst; +} + +template +void +BaseEdgeBlock::setStartPC(Addr pc) +{ + assert((pc&TheISA::ChunkOffset) == 0); + _startPC = pc; +} + +template +Addr +BaseEdgeBlock::getStartPC() +{ + return _startPC; +} + +template +uint64_t +BaseEdgeBlock::getNumInst() const +{ + assert(instList.size() == _numTotalInst ); + return _numTotalInst; +} + +template +uint64_t +BaseEdgeBlock::getNumMemRefInst() const +{ + assert(memRefInsts.size() == _numMemRefInst); + return _numMemRefInst; +} + +template +void +BaseEdgeBlock::removeAllInsts() +{ + DPRINTF(EdgeBlock, "Removing insts in block %i\n", _blockID); + if (instList.empty()) + return; + + DPRINTF(EdgeBlock, "Block size:%i\nOutput stores:%i\nOutput writes:%i\n", + instList.size(), + storeInsts.size(), + writeInsts.size()); + + _numRegWriteInst=0; + _numRegReadInst = 0; + _numStoreInst =0; + _numTotalInst = 0; + _numMemRefInst = 0; + _numControlInst = 0; + + instList.clear(); + + // Queues have no clear() method. + while(!memRefInsts.empty()){ + memRefInsts.pop(); + } + + #ifndef NDEBUG + + while(!storeInsts.empty()){ + storeInsts.pop(); + } + + while(!writeInsts.empty()){ + writeInsts.pop(); + } + + #endif + +/* + ListIt it = instList.begin(); + while( it != instList.end()) + { + instList.erase(it); + it++; + } + */ +} + +#ifndef NDEBUG + +template +void +BaseEdgeBlock::setStoreInstIt(QueueIt &inst_it) +{ + assert((*inst_it)->isStore()&&(*inst_it)->isExecuted()); + + storeInsts.push(inst_it); +} + +template +void +BaseEdgeBlock::setWriteInstIt(QueueIt &inst_it) +{ + assert((*inst_it)->isGRegWR() && (*inst_it)->isGRegWriteValid() && + (*inst_it)->isWriteRegExecuted() ); + + writeInsts.push(inst_it); +} + +#endif + +template +void +BaseEdgeBlock::dumpOutput() +{ +#ifndef NDEBUG + std::string sym_str; + Addr sym_addr; + debugSymbolTable->findNearestSymbol(this->getStartPC(), + sym_str, sym_addr); + + DPRINTFR(EdgeBlockOutput, "PC %#x\n.BLOCK %s\n", this->getStartPC(), sym_str.c_str()); + + while( !storeInsts.empty() ){ + DynInstPtr store = *(storeInsts.top()); + + if ( store->isNullified() ){ + storeInsts.pop(); + continue; + } + + assert(store->effAddrValid); + DPRINTFR( EdgeBlockOutput, "M[%#x,%i] %#x\n", store->effAddr, + store->memAccSize, store->getIntIQOperand(1) ); + storeInsts.pop(); + } + + while( !writeInsts.empty() ){ + DynInstPtr write = *(writeInsts.top()); + + if( write->isNullified() ){ + writeInsts.pop(); + continue; + } + + DPRINTFR( EdgeBlockOutput, "G[%i] %#x\n", write->getMappedDestReg(0), + write->getIntIQOperand(0)); + writeInsts.pop(); + } +#else + panic("DumpOutput is not supported in non-debug mode. Recompile M5 with DEBUG-mode enable.\n"); +#endif +} + diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/base_dyn_inst.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/base_dyn_inst.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,1124 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#ifndef __CPU_BASE_DYN_INST_HH__ +#define __CPU_BASE_DYN_INST_HH__ + +#include +#include +#include +#include + +#include "arch/faults.hh" +#include "base/fast_alloc.hh" +#include "base/trace.hh" +#include "config/full_system.hh" +#include "config/the_isa.hh" +#include "cpu/edge/comm.hh" +#include "cpu/edge/exetrace.hh" +#include "cpu/edge/block.hh" +#include "cpu/inst_seq.hh" +#include "cpu/op_class.hh" +#include "cpu/edge/static_inst.hh" +#include "mem/packet.hh" +#include "sim/system.hh" +#include "sim/tlb.hh" + +/** + * @file + * Defines a EDGE dynamic instruction context. + */ + +// Forward declaration. +class EDGEStaticInstPtr; + +template +class BaseEdgeDynInst : public FastAlloc, public RefCounted +{ + public: + // Typedef for the CPU. + typedef typename Impl::CPUType ImplCPU; + typedef typename ImplCPU::ImplState ImplState; + typedef typename Impl::EdgeBlockPtr BlockPtr; + + // Logical register index type. + typedef TheISA::RegIndex RegIndex; + // Integer register type. + typedef TheISA::IntReg IntReg; + // Floating point register type. + typedef TheISA::FloatReg FloatReg; + + // The DynInstPtr type. + typedef typename Impl::DynInstPtr DynInstPtr; + + // The list of instructions iterator type. + typedef typename std::list::iterator ListIt; + typedef typename std::deque::iterator QueueIt; + + enum { + MaxInstSrcRegs = TheISA::MaxInstSrcRegs, /// Max source regs + MaxInstDestRegs = TheISA::MaxInstDestRegs, /// Max dest regs + }; + + /** The EDGEStaticInst used by this BaseEdgeDynInst. */ + EDGEStaticInstPtr staticInst; + + //////////////////////////////////////////// + // + // INSTRUCTION EXECUTION + // + //////////////////////////////////////////// + /** InstRecord that tracks this instructions. */ + Trace::EdgeInstRecord *traceData; + + void demapPage(Addr vaddr, uint64_t asn) + { + cpu->demapPage(vaddr, asn); + } + void demapInstPage(Addr vaddr, uint64_t asn) + { + cpu->demapPage(vaddr, asn); + } + void demapDataPage(Addr vaddr, uint64_t asn) + { + cpu->demapPage(vaddr, asn); + } + + /** + * Does a read to a given address. + * @param addr The address to read. + * @param data The read's data is written into this parameter. + * @param flags The request's flags. + * @return Returns any fault due to the read. + */ + template + Fault read(Addr addr, T &data, unsigned flags); + + /** + * Does a write to a given address. + * @param data The data to be written. + * @param addr The address to write to. + * @param flags The request's flags. + * @param res The result of the write (for load locked/store conditionals). + * @return Returns any fault due to the write. + */ + template + Fault write(T data, Addr addr, unsigned flags, + uint64_t *res); + + void prefetch(Addr addr, unsigned flags); + void writeHint(Addr addr, int size, unsigned flags); + Fault copySrcTranslate(Addr src); + Fault copy(Addr dest); + + /** @todo: Consider making this private. */ + public: + /** The sequence number of the instruction. */ + InstSeqNum seqNum; + + enum Status { + IqEntry, /// Instruction is in the IQ + RobEntry, /// Instruction is in the ROB + LsqEntry, /// Instruction is in the LSQ + Completed, /// Instruction has completed + BlockCompleted, /// Block of this instruction has completed + ResultReady, /// Instruction has its result + CanIssue, /// Instruction can issue and execute + Issued, /// Instruction has issued + Executed, /// Instruction has executed + CanCommit, /// Instruction can commit + AtCommit, /// Instruction has reached commit + Committed, /// Instruction has committed + Squashed, /// Instruction is squashed + SquashedInIQ, /// Instruction is squashed in the IQ + SquashedInLSQ, /// Instruction is squashed in the LSQ + SquashedInROB, /// Instruction is squashed in the ROB + RecoverInst, /// Is a recover instruction + BlockingInst, /// Is a blocking instruction + ThreadsyncWait, /// Is a thread synchronization instruction + SerializeBefore, /// Needs to serialize on + /// instructions ahead of it + SerializeAfter, /// Needs to serialize instructions behind it + SerializeHandled, /// Serialization has been handled + PredMatched, /// Predication matched + Nullified, /// This inst has received nullify token + + // Dec. 2009, phoenix + // These two flags are CRUCIAL for write_read inst class in TRIPS. + // Because of some concerns of implementation complexity, write/read + // global register inst has been put together into one class named Write_read. + // As a result, we should explicitly set these two flags respectively to indicate + // the operation method. That is to say, if IsGRegR is set, the inst should read + // while if IsGRegW is set, the inst shoud write. The conditional execution is + // implemented in execute() method of Write_read class. + // + ExecutingReadReg, /// Now executing Reg-read. + ExecutingWriteReg, /// Now executing Reg-write. + ReadRegExecuted, /// Reg-read has been executed. + WriteRegExecuted, /// Reg-write has been executed. + ReadRegIssued, /// Reg-read has been issued. + WriteRegIssued, /// Reg-write has been issued. + ReadRegIssuing, /// Now issuing Reg-read. + WriteRegIssuing, /// Now issuing Reg-write. + + NumStatus + }; + + /** The status of this BaseEdgeDynInst. Several bits can be set. */ + std::bitset status; + + /** The thread this instruction is from. */ + ThreadID threadNumber; + + /** data address space ID, for loads & stores. */ + short asid; + + /** How many source registers are ready. */ + unsigned readyRegs; + + /** Pointer to the Impl's CPU object. */ + ImplCPU *cpu; + + /** Pointer to the thread state. */ + ImplState *thread; + + /** The kind of fault this instruction has generated. */ + Fault fault; + + /** Pointer to the data for the memory access. */ + uint8_t *memData; + + /** The effective virtual address (lds & stores only). */ + Addr effAddr; + + /** The size of mem access. Currently only used for dump block outputs. */ + int memAccSize; + + /** Is the effective virtual address valid. */ + bool effAddrValid; + + /** The effective physical address. */ + Addr physEffAddr; + + /** Effective virtual address for a copy source. */ + Addr copySrcEffAddr; + + /** Effective physical address for a copy source. */ + Addr copySrcPhysEffAddr; + + /** The memory request flags (from translation). */ + unsigned memReqFlags; + + union Result { + uint64_t integer; +// float fp; + double dbl; + }; + + /** The result of the instruction; assumes for now that there's only one + * destination register. + */ + Result instResult; + + /** Records changes to result? */ + bool recordResult; + + /** PC of this instruction. */ + Addr PC; + + /** Micro PC of this instruction. */ + Addr microPC; + + protected: + + /** Edge inst block class pointer this inst belongs to. */ + BlockPtr instBlock; + + /** Next non-speculative PC. It is not filled in at fetch, but rather + * once the target of the branch is truly known (either decode or + * execute). + */ + Addr nextPC; + + /** Block PC of this inst*/ +// Addr blockPC; + + /** Next block PC for EDGE architecture */ +// Addr nextBlockPC; + + /** Next non-speculative NPC. Target PC for Mips or Sparc. */ + Addr nextNPC; + + /** Next non-speculative micro PC. */ + Addr nextMicroPC; + + /** Predicted next PC. */ + Addr predPC; + + /** Predicted next block PC. */ + Addr predBlockPC; + + /** Predicted next NPC. */ + Addr predNPC; + + /** Predicted next microPC */ + Addr predMicroPC; + + /** If this is a branch that was predicted taken */ + bool predTaken; + + /** Block status of this dyn inst*/ + TheISA::BlockStatus blockStatus; + + public: + +#ifdef DEBUG + void dumpSNList(); +#endif + + /** Whether or not the source register is ready. + * @todo: Not sure this should be here vs the derived class. + */ + bool _readySrcRegIdx[MaxInstSrcRegs]; + + protected: + /** Flattened register index of the destination registers of this + * instruction. + */ + TheISA::RegIndex _flatDestRegIdx[TheISA::MaxInstDestRegs]; + + /** Flattened register index of the source registers of this + * instruction. + */ + TheISA::RegIndex _flatSrcRegIdx[TheISA::MaxInstSrcRegs]; + + /** Physical register index of the destination registers of this + * instruction. + */ + PhysRegIndex _destRegIdx[TheISA::MaxInstDestRegs]; + + /** Physical register index of the source registers of this + * instruction. + */ + PhysRegIndex _srcRegIdx[TheISA::MaxInstSrcRegs]; + + /** Physical register index of the previous producers of the + * architected destinations. + */ + PhysRegIndex _prevDestRegIdx[TheISA::MaxInstDestRegs]; + + public: + + /** Returns the physical register index of the i'th destination + * register. + */ + PhysRegIndex renamedDestRegIdx(int idx) const + { + return _destRegIdx[idx]; + } + + /** Returns the physical register index of the i'th source register. */ + PhysRegIndex renamedSrcRegIdx(int idx) const + { + return _srcRegIdx[idx]; + } + + /** Returns the flattened register index of the i'th destination + * register. + */ + TheISA::RegIndex flattenedDestRegIdx(int idx) const + { + return _flatDestRegIdx[idx]; + } + + /** Returns the flattened register index of the i'th source register */ + TheISA::RegIndex flattenedSrcRegIdx(int idx) const + { + return _flatSrcRegIdx[idx]; + } + + /** Returns the physical register index of the previous physical register + * that remapped to the same logical register index. + */ + PhysRegIndex prevDestRegIdx(int idx) const + { + return _prevDestRegIdx[idx]; + } + + /** Change renameDestReg to mapDestReg. + * This method will be used to set a proper dest reg idx + * when mapping inst blocks. + */ + void mapDestReg(int idx, + PhysRegIndex mapped_dest ) + { + _destRegIdx[idx] = mapped_dest; + } + + /** Get the mapped dest reg idx. */ + PhysRegIndex getMappedDestReg( int idx ) const + { + return _destRegIdx[idx]; + } + + /** change renameSrcReg to mapSrcReg. + * This method will be used to set a proper src reg idx + * when mapping inst blocks. + */ + void mapSrcReg(int idx, PhysRegIndex mapped_src) + { + _srcRegIdx[idx] = mapped_src; + } + + /** Get mapped src reg idx. */ + PhysRegIndex getMappedSrcReg ( int idx ) const + { + return _srcRegIdx[idx]; + } + + /** Flattens a source architectural register index into a logical index. + */ + void flattenSrcReg(int idx, TheISA::RegIndex flattened_src) + { + _flatSrcRegIdx[idx] = flattened_src; + } + + /** Flattens a destination architectural register index into a logical + * index. + */ + void flattenDestReg(int idx, TheISA::RegIndex flattened_dest) + { + _flatDestRegIdx[idx] = flattened_dest; + } + /** BaseEdgeDynInst constructor given a binary instruction. + * @param staticInst A EDGEStaticInstPtr to the underlying instruction. + * @param PC The PC of the instruction. + * @param pred_PC The predicted next PC. + * @param pred_NPC The predicted next NPC. + * @param seq_num The sequence number of the instruction. + * @param cpu Pointer to the instruction's CPU. + */ + BaseEdgeDynInst(EDGEStaticInstPtr staticInst, Addr PC, Addr NPC, Addr microPC, + Addr pred_PC, Addr pred_NPC, Addr pred_MicroPC, + InstSeqNum seq_num, ImplCPU *cpu, TheISA::BlockStatus blockstatus); + + /** BaseEdgeDynInst constructor given a binary instruction. + * @param inst The binary instruction. + * @param PC The PC of the instruction. + * @param pred_PC The predicted next PC. + * @param pred_NPC The predicted next NPC. + * @param seq_num The sequence number of the instruction. + * @param cpu Pointer to the instruction's CPU. + */ + BaseEdgeDynInst(TheISA::ExtMachInst inst, Addr PC, Addr NPC, Addr microPC, + Addr pred_PC, Addr pred_NPC, Addr pred_MicroPC, + InstSeqNum seq_num, ImplCPU *cpu, TheISA::BlockStatus blockstatus); + + /** BaseEdgeDynInst constructor given a EDGEStaticInst pointer. + * @param _staticInst The EDGEStaticInst for this BaseEdgeDynInst. + */ + //BaseEdgeDynInst(EDGEStaticInstPtr &_staticInst); + + /** BaseEdgeDynInst destructor. */ + ~BaseEdgeDynInst(); + + private: + /** Function to initialize variables in the constructors. */ + void initVars(); + + public: + /** Dumps out contents of this BaseEdgeDynInst. */ + void dump(); + + /** Dumps out contents of this BaseEdgeDynInst into given string. */ + void dump(std::string &outstring); + + /** Read this CPU's ID. */ + int cpuId() { return cpu->cpuId(); } + + /** Read this context's system-wide ID **/ + int contextId() { return thread->contextId(); } + + /** Returns the fault type. */ + Fault getFault() { return fault; } + + /** Set fault method allowing set fault externally. */ + void setFault(Fault fault_to_set ) { fault = fault_to_set; } + + /** Checks whether or not this instruction has had its branch target + * calculated yet. For now it is not utilized and is hacked to be + * always false. + * @todo: Actually use this instruction. + */ + bool doneTargCalc() { return false; } + + /** Returns the next PC. This could be the speculative next PC if it is + * called prior to the actual branch target being calculated. + */ + Addr readNextPC() { return nextPC; } + + /** Return the next block PC. */ + Addr getNextBlockPC() const { return instBlock->getBranchTarget(); } + + /** Return the block pc of this inst*/ + Addr getBlockPC() { return instBlock->getStartPC(); } + + /** Return the block pointer of this inst. */ + BlockPtr getBlockPtr() const { return instBlock;} + + + /** Returns the next NPC. This could be the speculative next NPC if it is + * called prior to the actual branch target being calculated. + */ + Addr readNextNPC() + { +#if ISA_HAS_DELAY_SLOT + return nextNPC; +#else + return nextPC + sizeof(TheISA::MachInst); +#endif + } + + Addr readNextMicroPC() + { + return nextMicroPC; + } + + /** Set the predicted target of this current instruction. */ + void setPredTarg(Addr predicted_PC, Addr predicted_NPC, + Addr predicted_MicroPC) + { + predPC = predicted_PC; + predNPC = predicted_NPC; + predMicroPC = predicted_MicroPC; + } + + /** Set the predicted target of this inst block. */ + void setPredBlockTarg( Addr predicted_block_pc ) + { + predBlockPC = predicted_block_pc; + } + + /** Returns the predicted PC immediately after the branch. */ + Addr readPredPC() { return predPC; } + + /** Returns the predicted block PC. */ + Addr readPredBlockPC() const { return predBlockPC;} + + /** Returns the predicted PC two instructions after the branch */ + Addr readPredNPC() { return predNPC; } + + /** Returns the predicted micro PC after the branch */ + Addr readPredMicroPC() { return predMicroPC; } + + /** Returns whether the instruction was predicted taken or not. */ + bool readPredTaken() + { + return predTaken; + } + + void setPredTaken(bool predicted_taken) + { + predTaken = predicted_taken; + } + + /** Returns whether the instruction mispredicted. */ + bool mispredicted() + { + return readPredPC() != readNextPC() || + readPredNPC() != readNextNPC() || + readPredMicroPC() != readNextMicroPC(); + } + + // + // Instruction types. Forward checks to EDGEStaticInst object. + // + bool isGRegWR() const { return staticInst->isGRegWR(); } + bool isGRegWriteValid() const { return staticInst->isGRegWriteValid();} + bool isGRegReadValid() const { return staticInst->isGRegReadValid(); } + bool isMove() const { return staticInst->isMove(); } + bool isNop() const { return staticInst->isNop(); } + bool isMemRef() const { return staticInst->isMemRef(); } + bool isLoad() const { return staticInst->isLoad(); } + bool isStore() const { return staticInst->isStore(); } + bool isStoreConditional() const + { return staticInst->isStoreConditional(); } + bool isInstPrefetch() const { return staticInst->isInstPrefetch(); } + bool isDataPrefetch() const { return staticInst->isDataPrefetch(); } + bool isCopy() const { return staticInst->isCopy(); } + bool isInteger() const { return staticInst->isInteger(); } + bool isFloating() const { return staticInst->isFloating(); } + bool isControl() const { return staticInst->isControl(); } + bool isCall() const { return staticInst->isCall(); } + bool isReturn() const { return staticInst->isReturn(); } + bool isDirectCtrl() const { return staticInst->isDirectCtrl(); } + bool isIndirectCtrl() const { return staticInst->isIndirectCtrl(); } + bool isCondCtrl() const { return staticInst->isCondCtrl(); } + bool isUncondCtrl() const { return staticInst->isUncondCtrl(); } + bool isCondDelaySlot() const { return staticInst->isCondDelaySlot(); } + bool isThreadSync() const { return staticInst->isThreadSync(); } + bool isSerializing() const { return staticInst->isSerializing(); } + bool isSerializeBefore() const + { return staticInst->isSerializeBefore() || status[SerializeBefore]; } + bool isSerializeAfter() const + { return staticInst->isSerializeAfter() || status[SerializeAfter]; } + bool isMemBarrier() const { return staticInst->isMemBarrier(); } + bool isWriteBarrier() const { return staticInst->isWriteBarrier(); } + bool isNonSpeculative() const { return staticInst->isNonSpeculative(); } + bool isQuiesce() const { return staticInst->isQuiesce(); } + bool isIprAccess() const { return staticInst->isIprAccess(); } + bool isUnverifiable() const { return staticInst->isUnverifiable(); } + bool isSyscall() const { return staticInst->isSyscall(); } + bool isMacroop() const { return staticInst->isMacroop(); } + bool isMicroop() const { return staticInst->isMicroop(); } + bool isDelayedCommit() const { return staticInst->isDelayedCommit(); } + bool isLastMicroop() const { return staticInst->isLastMicroop(); } + bool isFirstMicroop() const { return staticInst->isFirstMicroop(); } + bool isMicroBranch() const { return staticInst->isMicroBranch(); } + + /** Temporarily sets this instruction as a serialize before instruction. */ + void setSerializeBefore() { status.set(SerializeBefore); } + + /** Clears the serializeBefore part of this instruction. */ + void clearSerializeBefore() { status.reset(SerializeBefore); } + + /** Checks if this serializeBefore is only temporarily set. */ + bool isTempSerializeBefore() { return status[SerializeBefore]; } + + /** Temporarily sets this instruction as a serialize after instruction. */ + void setSerializeAfter() { status.set(SerializeAfter); } + + /** Clears the serializeAfter part of this instruction.*/ + void clearSerializeAfter() { status.reset(SerializeAfter); } + + /** Checks if this serializeAfter is only temporarily set. */ + bool isTempSerializeAfter() { return status[SerializeAfter]; } + + /** Sets the serialization part of this instruction as handled. */ + void setSerializeHandled() { status.set(SerializeHandled); } + + /** Checks if the serialization part of this instruction has been + * handled. This does not apply to the temporary serializing + * state; it only applies to this instruction's own permanent + * serializing state. + */ + bool isSerializeHandled() { return status[SerializeHandled]; } + + /** + * Edge reg operation related flags. + */ + bool isExecutingReadReg() const { return status[ExecutingReadReg]; } + bool isExecutingWriteReg() const { return status[ExecutingWriteReg]; } + bool isReadRegExecuted() const { return status[ReadRegExecuted]; } + bool isWriteRegExecuted() const { return status[WriteRegExecuted]; } + bool isReadRegIssued() const { return status[ReadRegIssued]; } + bool isWriteRegIssued() const { return status[WriteRegIssued];} + bool isReadRegIssuing() const { return status[ReadRegIssuing]; } + bool isWriteRegIssuing() const { return status[WriteRegIssuing]; } + bool isPredMatched() const { return status[PredMatched]; } + void setPredMatched() { status.set(PredMatched); } + bool isNullified() const { return status[Nullified]; } + bool isBlockSpeculative() { return instBlock->isSpeculative();} + + void setNullified() { status.set(Nullified); } + + void setExecutingReadReg() { + status.reset(ExecutingWriteReg); + status.set(ExecutingReadReg); + } + void setExecutingWriteReg() { + status.reset(ExecutingReadReg); + status.set(ExecutingWriteReg); + } + + void setReadRegExecuted() { status.set(ReadRegExecuted); } + void setWriteRegExecuted() { status.set(WriteRegExecuted); } + + void setReadRegIssued() { + status.reset(ReadRegIssuing); + status.set(ReadRegIssued); + } + void setWriteRegIssued() { + status.reset(WriteRegIssuing); + status.set(WriteRegIssued); + } + + void setReadRegIssuing(){ + status.set(ReadRegIssuing); + } + void setWriteRegIssuing() { + status.set(WriteRegIssuing); + } + + /** Returns the opclass of this instruction. */ + OpClass opClass() const { return staticInst->opClass(); } + + /** Returns the branch target address. */ + Addr branchTarget() const { return staticInst->branchTarget(PC); } + + /** Returns the instruction block branch target address. */ + Addr blockBranchTarget() const { return instBlock->getBranchTarget(); } + + /** Returns the number of source registers. */ + int8_t numSrcRegs() const { return staticInst->numSrcRegs(); } + + /** Returns the number of destination registers. */ + int8_t numDestRegs() const { return staticInst->numDestRegs(); } + + // the following are used to track physical register usage + // for machines with separate int & FP reg files + int8_t numFPDestRegs() const { return staticInst->numFPDestRegs(); } + int8_t numIntDestRegs() const { return staticInst->numIntDestRegs(); } + + /** Returns the logical register index of the i'th destination register. */ + RegIndex destRegIdx(int i) const { return staticInst->destRegIdx(i); } + + /** Returns the logical register index of the i'th source register. */ + RegIndex srcRegIdx(int i) const { return staticInst->srcRegIdx(i); } + + /** Returns the result of an integer instruction. */ + uint64_t readIntResult() { return instResult.integer; } + + /** Returns the result of a floating point instruction. */ + float readFloatResult() { return (float)instResult.dbl; } + + /** Returns the result of a floating point (double) instruction. */ + double readDoubleResult() { return instResult.dbl; } + + /** Records an integer register being set to a value. */ + void setIntRegOperand(const EDGEStaticInst *si, int idx, uint64_t val) + { + if (recordResult) + instResult.integer = val; + } + + /** Records an fp register being set to a value. */ + void setFloatRegOperand(const EDGEStaticInst *si, int idx, FloatReg val, + int width) + { + if (recordResult) { + if (width == 32) + instResult.dbl = (double)val; + else if (width == 64) + instResult.dbl = val; + else + panic("Unsupported width!"); + } + } + + /** Records an fp register being set to a value. */ + void setFloatRegOperand(const EDGEStaticInst *si, int idx, FloatReg val) + { + if (recordResult) + instResult.dbl = (double)val; + } + + /** Records an fp register being set to an integer value. */ + void setFloatRegOperandBits(const EDGEStaticInst *si, int idx, uint64_t val, + int width) + { + if (recordResult) + instResult.integer = val; + } + + /** Records an fp register being set to an integer value. */ + void setFloatRegOperandBits(const EDGEStaticInst *si, int idx, uint64_t val) + { + if (recordResult) + instResult.integer = val; + } + + /** Records that one of the source registers is ready. */ + void markSrcRegReady(); + + /** Marks a specific register as ready. */ + void markSrcRegReady(RegIndex src_idx); + + /** Returns if a source register is ready. */ + bool isReadySrcRegIdx(int idx) const + { + return this->_readySrcRegIdx[idx]; + } + + /** Sets the block of this instruction has completed. */ + void setBlockCompleted() { status.set(BlockCompleted);} + + /** Returns whether or not the block of this instruction is completed. */ + bool isBlockCompleted() { return status[BlockCompleted]; } + + /** Sets this instruction as completed. */ + void setCompleted() { status.set(Completed); } + + /** Returns whether or not this instruction is completed. */ + bool isCompleted() const { return status[Completed]; } + + /** Marks the result as ready. */ + void setResultReady() { status.set(ResultReady); } + + /** Returns whether or not the result is ready. */ + bool isResultReady() const { return status[ResultReady]; } + + /** Sets this instruction as ready to issue. */ + void setCanIssue() { status.set(CanIssue); } + + /** Returns whether or not this instruction is ready to issue. */ + bool readyToIssue() const { return status[CanIssue]; } + + /** Clears this instruction being able to issue. */ + void clearCanIssue() { status.reset(CanIssue); } + + /** Sets this instruction as issued from the IQ. */ + void setIssued() { status.set(Issued); } + + /** Returns whether or not this instruction has issued. */ + bool isIssued() const { return status[Issued]; } + + /** Clears this instruction as being issued. */ + void clearIssued() { status.reset(Issued); } + + /** Sets this instruction as executed. */ + void setExecuted() { status.set(Executed); } + + /** Returns whether or not this instruction has executed. */ + bool isExecuted() const { return status[Executed]; } + + /** Sets this instruction as ready to commit. */ + void setCanCommit() { status.set(CanCommit); } + + /** Clears this instruction as being ready to commit. */ + void clearCanCommit() { status.reset(CanCommit); } + + /** Returns whether or not this instruction is ready to commit. */ + bool readyToCommit() const { return status[CanCommit]; } + + void setAtCommit() { status.set(AtCommit); } + + bool isAtCommit() { return status[AtCommit]; } + + /** Sets this instruction as committed. */ + void setCommitted() { status.set(Committed); } + + /** Returns whether or not this instruction is committed. */ + bool isCommitted() const { return status[Committed]; } + + /** Sets this instruction as squashed. */ + void setSquashed() { status.set(Squashed); } + + /** Returns whether or not this instruction is squashed. */ + bool isSquashed() const { return status[Squashed]; } + + //Instruction Queue Entry + //----------------------- + /** Sets this instruction as a entry the IQ. */ + void setInIQ() { status.set(IqEntry); } + + /** Sets this instruction as a entry the IQ. */ + void clearInIQ() { status.reset(IqEntry); } + + /** Returns whether or not this instruction has issued. */ + bool isInIQ() const { return status[IqEntry]; } + + /** Sets this instruction as squashed in the IQ. */ + void setSquashedInIQ() { status.set(SquashedInIQ); status.set(Squashed);} + + /** Returns whether or not this instruction is squashed in the IQ. */ + bool isSquashedInIQ() const { return status[SquashedInIQ]; } + + + //Load / Store Queue Functions + //----------------------- + /** Sets this instruction as a entry the LSQ. */ + void setInLSQ() { status.set(LsqEntry); } + + /** Sets this instruction as a entry the LSQ. */ + void removeInLSQ() { status.reset(LsqEntry); } + + /** Returns whether or not this instruction is in the LSQ. */ + bool isInLSQ() const { return status[LsqEntry]; } + + /** Sets this instruction as squashed in the LSQ. */ + void setSquashedInLSQ() { status.set(SquashedInLSQ);} + + /** Returns whether or not this instruction is squashed in the LSQ. */ + bool isSquashedInLSQ() const { return status[SquashedInLSQ]; } + + + //Reorder Buffer Functions + //----------------------- + /** Sets this instruction as a entry the ROB. */ + void setInROB() { status.set(RobEntry); } + + /** Sets this instruction as a entry the ROB. */ + void clearInROB() { status.reset(RobEntry); } + + /** Returns whether or not this instruction is in the ROB. */ + bool isInROB() const { return status[RobEntry]; } + + /** Sets this instruction as squashed in the ROB. */ + void setSquashedInROB() { status.set(SquashedInROB); } + + /** Returns whether or not this instruction is squashed in the ROB. */ + bool isSquashedInROB() const { return status[SquashedInROB]; } + + /** Mem ref inst could want to get the last store in LSID sequence. */ + TheISA::LsID getLastStore() { + assert(isMemRef()); + return instBlock->getLastStore(staticInst->getLSID()); + } + + /** Read the PC of this instruction. */ + const Addr readPC() const { return PC; } + + /**Read the micro PC of this instruction. */ + const Addr readMicroPC() const { return microPC; } + + /** Set the next PC of this instruction (its actual target). */ + void setNextPC(Addr val) + { + nextPC = val; + } + + /** Set the block PC of this inst*/ + void setBlockPC(Addr val) + { + instBlock->setStartPC(val); + } + + /** Set the PC of the next inst block. */ + void setNextBlockPC( Addr val ) + { + instBlock->setBranchTarget(val); + } + + /** Set the next NPC of this instruction (the target in Mips or Sparc).*/ + void setNextNPC(Addr val) + { +#if ISA_HAS_DELAY_SLOT + nextNPC = val; +#endif + } + + void setNextMicroPC(Addr val) + { + nextMicroPC = val; + } + + /** Sets the block pointer of this inst. */ + void setBlockPtr(BlockPtr block_ptr) { instBlock = block_ptr; } + + /** Sets the ASID. */ + void setASID(short addr_space_id) { asid = addr_space_id; } + + /** Sets the thread id. */ + void setTid(ThreadID tid) { threadNumber = tid; } + + /** Sets the pointer to the thread state. */ + void setThreadState(ImplState *state) { thread = state; } + + /** Returns the thread context. */ + ThreadContext *tcBase() { return thread->getTC(); } + + private: + /** Instruction effective address. + * @todo: Consider if this is necessary or not. + */ + Addr instEffAddr; + + /** Whether or not the effective address calculation is completed. + * @todo: Consider if this is necessary or not. + */ + bool eaCalcDone; + + /** Is this instruction's memory access uncacheable. */ + bool isUncacheable; + + /** Has this instruction generated a memory request. */ + bool reqMade; + + public: + /** Sets the effective address. */ + void setEA(Addr &ea) { instEffAddr = ea; eaCalcDone = true; } + + /** Returns the effective address. */ + const Addr &getEA() const { return instEffAddr; } + + /** Returns whether or not the eff. addr. calculation has been completed. */ + bool doneEACalc() { return eaCalcDone; } + + /** Returns whether or not the eff. addr. source registers are ready. */ + bool eaSrcsReady(); + + /** Whether or not the memory operation is done. */ + bool memOpDone; + + /** Is this instruction's memory access uncacheable. */ + bool uncacheable() { return isUncacheable; } + + /** Has this instruction generated a memory request. */ + bool hasRequest() { return reqMade; } + + public: + /** Load queue index. */ + int16_t lqIdx; + + /** Store queue index. */ + int16_t sqIdx; + + /** Iterator pointing to this BaseEdgeDynInst in the list of all insts. */ + ListIt instListIt; + + /** Iterator pointing to this BaseEdgeDynInst in the queue of IQ. */ + QueueIt instQueueIt; + + /** Returns iterator to this instruction in the list of all insts. */ + ListIt &getInstListIt() { return instListIt; } + + /** Sets iterator for this instruction in the list of all insts. */ + void setInstListIt(ListIt _instListIt) { instListIt = _instListIt; } + + /** Returns iterator to this instruction in IQ. */ + QueueIt &getInstQueueIt() { return instQueueIt; } + + /** Sets iterator for this instruction in the IQ. */ + void setInstQueueIt(QueueIt queueit) { instQueueIt = queueit; } + + public: + /** Returns the number of consecutive store conditional failures. */ + unsigned readStCondFailures() + { return thread->storeCondFailures; } + + /** Sets the number of consecutive store conditional failures. */ + void setStCondFailures(unsigned sc_failures) + { thread->storeCondFailures = sc_failures; } +}; + +template +template +inline Fault +BaseEdgeDynInst::read(Addr addr, T &data, unsigned flags) +{ + reqMade = true; + Request *req = new Request(asid, addr, sizeof(T), flags, this->PC, + thread->contextId(), threadNumber); + + fault = cpu->dtb->translateAtomic(req, thread->getTC(), BaseTLB::Read); + + if (req->isUncacheable()) + isUncacheable = true; + + if (fault == NoFault) { + effAddr = req->getVaddr(); + effAddrValid = true; + physEffAddr = req->getPaddr(); + memReqFlags = req->getFlags(); + memAccSize = req->getSize(); + +#if 0 + if (cpu->system->memctrl->badaddr(physEffAddr)) { + fault = TheISA::genMachineCheckFault(); + data = (T)-1; + this->setExecuted(); + } else { + fault = cpu->read(req, data, lqIdx); + } +#else + fault = cpu->read(req, data, lqIdx); +#endif + } else { + // Return a fixed value to keep simulation deterministic even + // along misspeculated paths. + data = (T)-1; + + // Commit will have to clean up whatever happened. Set this + // instruction as executed. + this->setExecuted(); + delete req; + } + + if (traceData) { + traceData->setAddr(addr); + traceData->setData(data); + } + + return fault; +} + +template +template +inline Fault +BaseEdgeDynInst::write(T data, Addr addr, unsigned flags, uint64_t *res) +{ + if (traceData) { + traceData->setAddr(addr); + traceData->setData(data); + } + + reqMade = true; + Request *req = new Request(asid, addr, sizeof(T), flags, this->PC, + thread->contextId(), threadNumber); + DPRINTF(EdgeExe,"Store Addr = %#x, Store Size=%d\n",addr,req->getSize()); + fault = cpu->dtb->translateAtomic(req, thread->getTC(), BaseTLB::Write); + + if (req->isUncacheable()) + isUncacheable = true; + + if (fault == NoFault) { + effAddr = req->getVaddr(); + effAddrValid = true; + physEffAddr = req->getPaddr(); + memReqFlags = req->getFlags(); + memAccSize = req->getSize(); + + if (req->isCondSwap()) { + assert(res); + req->setExtraData(*res); + } +#if 0 + if (cpu->system->memctrl->badaddr(physEffAddr)) { + fault = TheISA::genMachineCheckFault(); + } else { + fault = cpu->write(req, data, sqIdx); + } +#else + fault = cpu->write(req, data, sqIdx); +#endif + } else { + delete req; + } + + return fault; +} + +#endif // __CPU_BASE_DYN_INST_HH__ diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/base_dyn_inst.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/base_dyn_inst.cc Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#include "cpu/edge/base_dyn_inst_impl.hh" +#include "cpu/edge/cpu.hh" +#include "cpu/edge/isa_specific.hh" +//#include "cpu/edge/impl.hh" + +// Explicit instantiation +template class BaseEdgeDynInst; + diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/base_dyn_inst_impl.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/base_dyn_inst_impl.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,362 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#include +#include +#include +#include + +#include "base/cprintf.hh" +#include "base/trace.hh" +#include "config/the_isa.hh" +#include "cpu/edge/base_dyn_inst.hh" +#include "cpu/exetrace.hh" +#include "mem/request.hh" +#include "sim/faults.hh" + +#define NOHASH +#ifndef NOHASH + +#include "base/hashmap.hh" + +unsigned int MyHashFunc(const BaseEdgeDynInst *addr) +{ + unsigned a = (unsigned)addr; + unsigned hash = (((a >> 14) ^ ((a >> 2) & 0xffff))) & 0x7FFFFFFF; + + return hash; +} + +typedef m5::hash_map +my_hash_t; + +my_hash_t thishash; +#endif + +template +BaseEdgeDynInst::BaseEdgeDynInst(EDGEStaticInstPtr _staticInst, + Addr inst_PC, Addr inst_NPC, + Addr inst_MicroPC, + Addr pred_PC, Addr pred_NPC, + Addr pred_MicroPC, + InstSeqNum seq_num, ImplCPU *cpu, TheISA::BlockStatus blockstatus) + : staticInst(_staticInst), traceData(NULL), cpu(cpu) +{ + seqNum = seq_num; + + bool nextIsMicro = + staticInst->isMicroop() && !staticInst->isLastMicroop(); + + PC = inst_PC; + microPC = inst_MicroPC; + if (nextIsMicro) { + nextPC = inst_PC; + nextNPC = inst_NPC; + nextMicroPC = microPC + 1; + } else { + nextPC = inst_NPC; + nextNPC = nextPC + sizeof(TheISA::MachInst); + nextMicroPC = 0; + } + predPC = pred_PC; + predNPC = pred_NPC; + predMicroPC = pred_MicroPC; + predTaken = false; + + blockStatus = blockstatus; + + initVars(); +} + +template +BaseEdgeDynInst::BaseEdgeDynInst(TheISA::ExtMachInst inst, + Addr inst_PC, Addr inst_NPC, + Addr inst_MicroPC, + Addr pred_PC, Addr pred_NPC, + Addr pred_MicroPC, + InstSeqNum seq_num, ImplCPU *cpu, TheISA::BlockStatus blockstatus) + : staticInst(inst, inst_PC, blockstatus), traceData(NULL), cpu(cpu) +{ + seqNum = seq_num; + + bool nextIsMicro = + staticInst->isMicroop() && !staticInst->isLastMicroop(); + + PC = inst_PC; + microPC = inst_MicroPC; + if (nextIsMicro) { + nextPC = inst_PC; + nextNPC = inst_NPC; + nextMicroPC = microPC + 1; + } else { + nextPC = inst_NPC; + nextNPC = nextPC + sizeof(TheISA::MachInst); + nextMicroPC = 0; + } + predPC = pred_PC; + predNPC = pred_NPC; + predMicroPC = pred_MicroPC; + predTaken = false; + + blockStatus = blockstatus; + + initVars(); +} + +//template +//BaseEdgeDynInst::BaseEdgeDynInst(EDGEStaticInstPtr &_staticInst) +// : staticInst(_staticInst), traceData(NULL) +//{ +// seqNum = 0; +// initVars(); +//} + +template +void +BaseEdgeDynInst::initVars() +{ + memData = NULL; + effAddr = 0; + effAddrValid = false; + physEffAddr = 0; + + isUncacheable = false; + reqMade = false; + readyRegs = 0; + + instResult.integer = 0; + recordResult = true; + + status.reset(); + + eaCalcDone = false; + memOpDone = false; + + lqIdx = -1; + sqIdx = -1; + + // Eventually make this a parameter. + threadNumber = 0; + + // Also make this a parameter, or perhaps get it from xc or cpu. + asid = 0; + + // Initialize the fault to be NoFault. + fault = NoFault; + instBlock = NULL; + +#ifndef NDEBUG + ++cpu->instcount; + + if (cpu->instcount > 1500) { +#ifdef DEBUG + //cpu->dumpInsts(); + dumpSNList(); +#endif + assert(cpu->instcount <= 1500); + } + + DPRINTF(DynInst, + "DynInst: [sn:%lli] Instruction created. Instcount for %s = %i\n", + seqNum, cpu->name(), cpu->instcount); +#endif + +#ifdef DEBUG + cpu->snList.insert(seqNum); +#endif +} + +template +BaseEdgeDynInst::~BaseEdgeDynInst() +{ + if (memData) { + delete [] memData; + } + + if (traceData) { + delete traceData; + } + + fault = NoFault; + instBlock = NULL; + +#ifndef NDEBUG + --cpu->instcount; + + DPRINTF(DynInst, + "DynInst: [sn:%lli] Instruction destroyed. Instcount for %s = %i\n", + seqNum, cpu->name(), cpu->instcount); +#endif +#ifdef DEBUG + cpu->snList.erase(seqNum); +#endif +} + +#ifdef DEBUG +template +void +BaseEdgeDynInst::dumpSNList() +{ + std::set::iterator sn_it = cpu->snList.begin(); + + int count = 0; + while (sn_it != cpu->snList.end()) { + cprintf("%i: [sn:%lli] not destroyed\n", count, (*sn_it)); + count++; + sn_it++; + } +} +#endif + +template +void +BaseEdgeDynInst::prefetch(Addr addr, unsigned flags) +{ + // This is the "functional" implementation of prefetch. Not much + // happens here since prefetches don't affect the architectural + // state. +/* + // Generate a MemReq so we can translate the effective address. + MemReqPtr req = new MemReq(addr, thread->getXCProxy(), 1, flags); + req->asid = asid; + + // Prefetches never cause faults. + fault = NoFault; + + // note this is a local, not BaseEdgeDynInst::fault + Fault trans_fault = cpu->translateDataReadReq(req); + + if (trans_fault == NoFault && !(req->isUncacheable())) { + // It's a valid address to cacheable space. Record key MemReq + // parameters so we can generate another one just like it for + // the timing access without calling translate() again (which + // might mess up the TLB). + effAddr = req->vaddr; + physEffAddr = req->paddr; + memReqFlags = req->flags; + } else { + // Bogus address (invalid or uncacheable space). Mark it by + // setting the eff_addr to InvalidAddr. + effAddr = physEffAddr = MemReq::inval_addr; + } + + if (traceData) { + traceData->setAddr(addr); + } +*/ +} + +template +void +BaseEdgeDynInst::writeHint(Addr addr, int size, unsigned flags) +{ + // Not currently supported. +} + +/** + * @todo Need to find a way to get the cache block size here. + */ +template +Fault +BaseEdgeDynInst::copySrcTranslate(Addr src) +{ + // Not currently supported. + return NoFault; +} + +/** + * @todo Need to find a way to get the cache block size here. + */ +template +Fault +BaseEdgeDynInst::copy(Addr dest) +{ + // Not currently supported. + return NoFault; +} + +template +void +BaseEdgeDynInst::dump() +{ + cprintf("T%d : %#08d `", threadNumber, PC); + std::cout << staticInst->disassemble(PC); + cprintf("'\n"); +} + +template +void +BaseEdgeDynInst::dump(std::string &outstring) +{ + std::ostringstream s; + s << "T" << threadNumber << " : 0x" << PC << " " + << staticInst->disassemble(PC); + + outstring = s.str(); +} + +template +void +BaseEdgeDynInst::markSrcRegReady() +{ + if (++readyRegs == numSrcRegs()) { + setCanIssue(); + } +} + +template +void +BaseEdgeDynInst::markSrcRegReady(RegIndex src_idx) +{ + _readySrcRegIdx[src_idx] = true; + + markSrcRegReady(); +} + +template +bool +BaseEdgeDynInst::eaSrcsReady() +{ + // For now I am assuming that src registers 1..n-1 are the ones that the + // EA calc depends on. (i.e. src reg 0 is the source of the data to be + // stored) + + for (int i = 1; i < numSrcRegs(); ++i) { + if (!_readySrcRegIdx[i]) + return false; + } + + return true; +} diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/block.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/block.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,174 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Jin Yanhan + * Gou Pengfei + * + * Date: Dec. 2009 + * + */ + +#ifndef __CPU_EDGE_BLOCK_HH__ +#define __CPU_EDGE_BLOCK_HH__ + +#include + +#include "base/statistics.hh" +#include "cpu/edge/base_block.hh" +//#include "cpu/edge/isa_specific.hh" + +template +class SimpleEdgeBlock : public BaseEdgeBlock +{ + public: + /** Typedefs from Impl. */ + typedef typename Impl::CPU CPU; + typedef typename Impl::DynInstPtr DynInstPtr; + + /// Block ID + typedef TheISA::BlockID BlockID; + typedef TheISA::HeaderInfo HeaderInfo; + typedef TheISA::InstID InstID; + typedef TheISA::ExitID ExitID; + typedef TheISA::ConsumerType ConsumerType; + typedef TheISA::ExitType ExitType; + typedef TheISA::ConsumerBitfield ConsumerBitfield; + + typedef typename std::list::iterator ListIt; + + SimpleEdgeBlock(BlockID blockID, CPU* cpu); + + ~SimpleEdgeBlock(); + + Addr getBranchTarget() const { return branchTarget;} + Addr getPredBranchTarget() const { return predBranchTarget; } + Addr getNextBlockPC() const { return nextBlockPC;} + + void setBranchTarget( Addr branch_pc ) ; + void setPredBranchTarget ( Addr pred_branch_pc ); + + /** We set next block pc when we depack the header, + * but I still add this method to modify next block pc + * manually. + */ + void setNextBlockPC(Addr next_block_pc); + + /** If this block is mispredicted. */ + bool misPredicted(); + + protected: + uint32_t _mark; + uint32_t _type; + uint32_t _sMask; + uint32_t _xFlags; + uint32_t _chunkNum; + + /** Frame id of this block during execution. */ + int _frameID; + + std::bitset _sMaskFlag; + + /** Start address of next block in program order*/ + Addr nextBlockPC; + + /** Branch target of this block. */ + Addr branchTarget; + /** Predicted branch target of this block. */ + Addr predBranchTarget; + + ExitID exitID; + ExitID pred_exitID; + ExitType exitType; + + uint32_t _numReceivedStore; + uint32_t _numReceivedWrite; + uint32_t _numReceivedExit; + + public: + + uint32_t getChunkNum() const { return _chunkNum;} + uint32_t getStoreMask() const { return _sMask;} + + /** Assemble header info from fetched header insts. + * Return true if everything is OK. + * Return false if the header is not compitable with TRIPS ISA + * which means an address with unaligned inst block boundary + * has been used. + */ + bool depackHeader( HeaderInfo& info); + + /** This will explain what consumer id means + * and where will each inst locates. + */ + void map(); + + void setFrameID( int frame_id ) { + assert(frame_id < Impl::MaxFrameNum && frame_id >= 0); + _frameID = frame_id; + } + + int getFrameID() const { + assert( _frameID >= 0 && _frameID < Impl::MaxFrameNum ); + return _frameID; + } + + bool isCompletionCondSatisfied(); + + void incReceivedWrite(){ _numReceivedWrite++; } + + void incReceivedStore(){ _numReceivedStore++; } + + void setStoreMask( TheISA::LsID lsid ) { _sMaskFlag.set(lsid); } + + TheISA::LsID getLastStore( TheISA::LsID lsid ); + + TheISA::ExitID getExitID(){return exitID;} + + TheISA::ExitType getExitType(){return exitType;} + + void incReceivedExit(){ assert(_numReceivedExit == 0); _numReceivedExit++; } + + void recordExitID(ExitID _exitID){ assert(_numReceivedExit == 1); exitID = _exitID;} + + void recordExitType(bool isCall, bool isReturn, bool isIndirect); + + uint32_t getNumReceivedWrite() const { return _numReceivedWrite; } + + uint32_t getNumReceivedStore() const { return _numReceivedStore; } + + uint32_t getNumReceivedExit() const { return _numReceivedExit; } + + void setPredInfo(ExitID _exitID, Addr _PC) + { + pred_exitID = _exitID; + predBranchTarget = _PC; + } + +}; + +#endif //__CPU_EDGE_BLOCK_HH__ diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/block.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/block.cc Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Jin Yanhan + * Gou Pengfei + * + * Date: Dec. 2009 + * + */ +#include "cpu/edge/block_impl.hh" +#include "cpu/edge/impl.hh" + +template class SimpleEdgeBlock; + diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/block_impl.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/block_impl.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,316 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Jin Yanhan + * Gou Pengfei + * + * Date: Dec. 2009 + * + */ +#include +#include + +#include "base/types.hh" +#include "config/the_isa.hh" +#include "arch/isa_traits.hh" +#include "cpu/edge/block.hh" +#include "base/bitfield.hh" +#include "base/trace.hh" + + +template +SimpleEdgeBlock::SimpleEdgeBlock(BlockID blockID, CPU* cpu) + : BaseEdgeBlock(blockID, cpu) +{ + _mark = 0; + _type = 0; + _xFlags = 0; + _sMask = 0; + _chunkNum = 0; + + // Init frame id to be invalid. + _frameID = -1; + + _sMaskFlag.reset(); + + _numReceivedStore = 0; + _numReceivedWrite = 0; + _numReceivedExit = 0; +} + +template +SimpleEdgeBlock::~SimpleEdgeBlock(){} + +template +bool +SimpleEdgeBlock::depackHeader(HeaderInfo& info) +{ + assert(info.size() >= TheISA::HeaderInfoSize); + _mark = bits(info[3], 31, 24 ); + _type = bits(info[3], 23, 16); + _xFlags = bits( info[3], 15, 8 ); + _sMask = info[1]; + _chunkNum = ( _type == 0 ) ? 4 : _type; + if(_chunkNum > 4) + _chunkNum = 1; + // As we know how many chunks this block has we can set the next + // block PC in program order + nextBlockPC = this->_startPC + ( _chunkNum + 1) * TheISA::ChunkSize; + + // Currently do nothing but dprint it + DPRINTF(EdgeBlock, "\nBlockID %d -- Block StartAddress 0x%#x -- " + "NextBlockAddress 0x%#x\n", + this->_blockID, this->_startPC, nextBlockPC ); + DPRINTF (EdgeBlock, "Header MARK : 0x%x\nHeader TYPE : 0x%x\nHeader XFLAGS : 0x%x\n" + "Header SMASK : 0x%x\n", _mark, _type, _xFlags, _sMask); + + if ( _mark != 0xff ) { + + //warn ("Wrong header at blockID %d -- Block StartAddress 0x%#x!\n", + // this->_blockID, this->_startPC); + + return false; + } + + return true; + +} + +template +void +SimpleEdgeBlock::map() +{ + ListIt it = this->instList.begin(); + ListIt end = this->instList.end(); + + while(it != end){ + DynInstPtr inst = *it; + + // Mapping reg idx of reg-write and reg-read insts + // Mapping strategy comes from trips doc. + // InstID indicates the position of reg queue. + if ( inst->isGRegReadValid() ) { + for ( int i = 0; i < inst->numSrcRegs(); i ++ ) { + + PhysRegIndex mapped_idx = + inst->getInstID() % 4 + inst->srcRegIdx(i) * 4; + + inst->mapSrcReg(i, mapped_idx ); + + DPRINTF(EdgeBlock, "Mapping src reg from %i to %i\n", + inst->srcRegIdx(i), mapped_idx ); + } + } + + if ( inst->isGRegWriteValid() ) { + for ( int i = 0; i < inst->numDestRegs(); i ++ ) { + + PhysRegIndex mapped_idx = + inst->getInstID() % 4 + inst->destRegIdx(i) * 4; + + inst->mapDestReg(i, mapped_idx ); + + DPRINTF(EdgeBlock, "Mapping dest reg from %i to %i\n", + inst->destRegIdx(i), mapped_idx ); + } + + } + + DPRINTF(EdgeBlock, "Mapping consumers for inst[Iid:%lli] in inst block[Bid:%lli].\n", + inst->getInstID(), inst->getBlockID()); + + for ( int idx = 0; idx < inst->getNumConsumers(); idx ++) { + + ConsumerBitfield consumer_raw_bit = inst->getRawConsumerID(idx); + + inst->setConsumerType ( idx, consumer_raw_bit.type ); + + if ( consumer_raw_bit.type == TheISA::WriteSlotOrNoTarget ) { + + if ( consumer_raw_bit.subtype == TheISA::WriteSlot ) { + + inst->setConsumerSubType( idx, TheISA::WriteSlot); + inst->setConsumerID ( idx, consumer_raw_bit.write_id ); + + } else if ( consumer_raw_bit.subtype == TheISA::NoTarget ) { + + inst->setConsumerSubType( idx, TheISA::NoTarget ); + inst->setConsumerID ( idx, 0 ); + + } else + panic("Unrecogonized consumer subtype\n"); + + } else { + + inst->setConsumerID ( idx, consumer_raw_bit.id ); + + } + + DPRINTF(EdgeBlock, "Consumer[idx:%i][Type:%i][ID:%i]\n", idx, + consumer_raw_bit.type, + consumer_raw_bit.id + ); + } + + it++; + } +} + +template +bool +SimpleEdgeBlock::isCompletionCondSatisfied() +{ + DPRINTF(EdgeBlock, "Block[id:%lli] has received %i writes, %i stores and %i exits.\n", + this->getBlockID(), + _numReceivedWrite, + _numReceivedStore, + _numReceivedExit ); + + return (this->getNumRegWriteInst() == _numReceivedWrite ) && + (_sMask == _sMaskFlag.to_ulong() ) && + (_numReceivedExit == 1); +} + +template +void +SimpleEdgeBlock::setBranchTarget( Addr branch_pc ) +{ + // Make sure this is an chunk-aligned address + // Sometimes a mispredicted series of blocks will + // set the branch target to silly value. This + // will not affect the output due to + // misprediction handling methods. + //assert( (branch_pc & TheISA::ChunkOffset) == 0); + + branchTarget = branch_pc; +} + +template +void +SimpleEdgeBlock::setPredBranchTarget( Addr pred_branch_pc ) +{ + assert( ( pred_branch_pc & TheISA::ChunkOffset ) == 0 ); + + predBranchTarget = pred_branch_pc; +} + +template +void +SimpleEdgeBlock::setNextBlockPC( Addr next_block_pc ) +{ + // Make sure this is an chunk-aligned address + assert( (next_block_pc & TheISA::ChunkOffset) == 0); + nextBlockPC = next_block_pc; +} + +template +bool +SimpleEdgeBlock::misPredicted() +{ + if(exitType == TheISA::seq) + DPRINTF(EdgePredUnit,"Exit type is seq.\n"); + else if(exitType == TheISA::branch) + DPRINTF(EdgePredUnit,"Exit type is branch.\n"); + else if(exitType == TheISA::call) + DPRINTF(EdgePredUnit,"Exit type is call.\n"); + else if(exitType == TheISA::ret) + DPRINTF(EdgePredUnit,"Exit type is return.\n"); + else if(exitType == TheISA::ibranch) + DPRINTF(EdgePredUnit,"Exit type is ibranch.\n"); + else + panic("Unknown branch type!"); + assert(exitType == TheISA::branch || + exitType == TheISA::call || + exitType == TheISA::ret || + exitType == TheISA::ibranch || + exitType == TheISA::seq); + + if(exitID != pred_exitID) { + DPRINTF(EdgePredUnit,"\nBlock [bid:%i][PC:%#x]: eixt is mispredicted!" + "Actual exit is %i while predicted exit is %i. Target is %#x.\n", + this->_blockID, + this->getStartPC(), + this->exitID, + this->pred_exitID, + this->branchTarget); + + }else if(branchTarget != predBranchTarget) { + DPRINTF(EdgePredUnit,"\nBlock [bid:%i][PC:%#x]: target is mispredicted!" + "Actual target is %#x while predicted target is %#x.\n", + this->_blockID, + this->getStartPC(), + this->branchTarget, + this->predBranchTarget); + + } + if(branchTarget != predBranchTarget) { + return true; + } + DPRINTF(EdgePredUnit,"\nBlock [bid:%i][PC:%#x]:predict correctly!\n",this->_blockID, + this->getStartPC()); + return false; +} + +template +TheISA::LsID +SimpleEdgeBlock::getLastStore( TheISA::LsID lsid ) +{ + assert( lsid < TheISA::StoreMaskNum ); + + DPRINTF(EdgeBlock, "Get last store for LSID %i\n", lsid); + + if ( lsid == 0 ) return -1; + + while ( !bits(_sMask, --lsid) ) { + if (lsid == 0 ) return -1; + } + + DPRINTF(EdgeBlock, "Last store is LSID %i\n", lsid); + + return lsid; +} + +template +void +SimpleEdgeBlock::recordExitType(bool isCall, bool isReturn, bool isIndirect) +{ + assert(!(isCall & isReturn) ); + + if(isCall){ + exitType = TheISA::call; + } else if(isReturn){ + exitType = TheISA::ret; + } else if(isIndirect){ + exitType = TheISA::ibranch; + } else if(this->branchTarget == this->nextBlockPC){ + exitType = TheISA::seq; + } else { + exitType = TheISA::branch; + } +} + diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/bpred_unit.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/bpred_unit.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#ifndef __CPU_EDGE_BPRED_UNIT_HH__ +#define __CPU_EDGE_BPRED_UNIT_HH__ + +#include + +#include "base/statistics.hh" +#include "base/types.hh" +#include "cpu/inst_seq.hh" +#include "cpu/edge/pred/2bit_local.hh" +#include "cpu/edge/pred/btb.hh" +#include "cpu/edge/pred/ibtb.hh" +#include "cpu/edge/pred/ctb.hh" +#include "cpu/edge/pred/ras.hh" +#include "cpu/edge/pred/btp.hh" +#include "cpu/edge/pred/tournament.hh" + +class SimpleEdgeCPUParams; + +/** + * Basically a wrapper class to hold both the branch predictor + * and the BTB. + */ +template +class BPredUnit +{ + public: + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::EdgeBlockPtr BlockPtr; + typedef TheISA::ExitID ExitID; + typedef TheISA::BlockID BlockID; + typedef TheISA::ExitType ExitType; + enum PredType { + Local, + Tournament + }; + + private: + PredType predictor; + + const std::string _name; + + public: + + /** + * @param params The params object, that has the size of the BP and BTB. + */ + BPredUnit(SimpleEdgeCPUParams *params); + + ~BPredUnit(){} + + const std::string &name() const { return _name; } + + /** + * Registers statistics. + */ + void regStats(); + + void switchOut(); + + void takeOverFrom(); + + /** + * Predicts whether or not the instruction is a taken branch, and the + * target of the branch if it is taken. + * @param inst The branch instruction. + * @param PC The predicted PC is passed back through this parameter. + * @param tid The thread id. + * @return Returns if the branch is taken or not. + */ + TheISA::ExitID predict(BlockID oldest_blockID,BlockPtr &block, Addr &PC, + int addr_space_ID, ThreadID tid); + + /** + * Tells the branch predictor to commit any updates until the given + * sequence number. + * @param done_sn The sequence number to commit any older updates up until. + * @param tid The thread id. + */ + void update(const BlockID &done_sn, ThreadID tid, + Addr &PC, + int addr_space_id, + const Addr &corr_target, + ExitType actually_branch_type, + ExitID actually_exit_ID); + + /** + * Squashes all outstanding updates until a given sequence number. + * @param squashed_sn The sequence number to squash any younger updates up + * until. + * @param tid The thread id. + */ + void squash(const BlockID &squashed_sn, ThreadID tid); + + /** + * Squashes all outstanding updates until a given sequence number, and + * corrects that sn's update with the proper address and taken/not taken. + * @param squashed_sn The sequence number to squash any younger updates up + * until. + * @param corr_target The correct branch target. + * @param actually_taken The correct branch direction. + * @param tid The thread id. + */ + void squash(const BlockID &squashed_sn, + int addr_space_id, + Addr &PC, + const Addr &corr_target, + ExitType actually_branch_type, + ExitID actually_exit_ID, ThreadID tid); + + /** + * @param bp_history Pointer to the history object. The predictor + * will need to update any state and delete the object. + */ + void BPSquash(BlockID blockID,ExitID exitID,ThreadID tid); + + void BPSquash(BlockID blockID,ThreadID tid); + /** + * Looks up a given PC in the BP to see if it is taken or not taken. + * @param inst_PC The PC to look up. + * @param bp_history Pointer that will be set to an object that + * has the branch predictor state associated with the lookup. + * @return Whether the branch is taken or not taken. + */ + TheISA::ExitID BPLookup(BlockID blockID, BlockID oldest_blockID,Addr &block_PC, + int addr_space_ID,ThreadID tid); + + + /** + * Updates the BP with taken/not taken information. + * @param inst_PC The branch's PC that will be updated. + * @param taken Whether the branch was taken or not taken. + * @param bp_history Pointer to the branch predictor state that is + * associated with the branch lookup that is being updated. + * @todo Make this update flexible enough to handle a global predictor. + */ + void BPUpdate(BlockID blockID,Addr &block_PC, int addr_space_ID, + ExitID exitID,ThreadID tid); + + /** + *Get index for IBTB at lookup time so that IBTB need not to store history. + */ + unsigned BPLookupGetIndex(Addr &block_PC, ThreadID tid); + + unsigned BPUpdateGetIndex(BlockID blockID, Addr &block_PC, ThreadID tid); + + void dump(); + + private: + struct PredictorHistory { + PredictorHistory(BlockID _blockID,Addr _addr,int _addrSpaceID, + ThreadID _tid) + :blockID(_blockID), + PC(_addr), + addr_space_ID(_addrSpaceID), + tid(_tid) + {} + + BlockID blockID; + Addr PC; + int addr_space_ID; + ThreadID tid; + }; + + + /** The per thread predicted but not commited or squashed block. */ + + /** The local branch predictor. */ + LocalBP *localBP; + + /** The tournament branch predictor. */ + TournamentBP *tournamentBP; + + /** The branch type predictor. */ + BranchTypePredictor branchTypePredictor; + + /** The BTB. */ + DefaultBTB BTB; + + /** The per-thread return address stack. */ + ReturnAddrStack RAS; + + /** The CTB. */ + DefaultCTB CTB; + + /** The IBTB. */ + DefaultIBTB IBTB; + + /** The number of bits to align*/ + unsigned blockShiftAmt; + + /** Stat for number of BP lookups. */ + Stats::Scalar lookups; + /** Stat for number of conditional branches predicted. */ + Stats::Scalar condPredicted; + /** Stat for number of conditional branches predicted incorrectly. */ + Stats::Scalar condIncorrect; + /** Stat for number of BTB lookups. */ + Stats::Scalar BTBLookups; + /** Stat for number of BTB hits. */ + Stats::Scalar BTBHits; + /** Stat for number of times the BTB is correct. */ + Stats::Scalar BTBCorrect; + /** Stat for number of times the RAS is used to get a target. */ + Stats::Scalar usedRAS; + /** Stat for number of times the RAS is incorrect. */ + Stats::Scalar RASIncorrect; +}; + +#endif // __CPU_EDGE_BPRED_UNIT_HH__ diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/bpred_unit.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/bpred_unit.cc Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,35 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#include "cpu/edge/isa_specific.hh" +#include "cpu/edge/bpred_unit_impl.hh" + +template class BPredUnit; + diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/bpred_unit_impl.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/bpred_unit_impl.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,443 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#include + +#include "arch/types.hh" +#include "base/trace.hh" +#include "base/traceflags.hh" +#include "cpu/edge/bpred_unit.hh" +#include "params/SimpleEdgeCPU.hh" + +template +BPredUnit::BPredUnit(SimpleEdgeCPUParams *params) + : _name(params->name + ".BPredUnit"), + BTB(params->BTBEntries, + params->offsetLength, + params->blockShiftAmt), + IBTB(params->iExitPredictorEntries, + params->IBTBEntries, + params->iExitLength, + params->blockShiftAmt), + blockShiftAmt(params->blockShiftAmt) +{ + // Setup the selected predictor. + if (params->predType == "local") { +#if 0 + localBP = new LocalBP(params->localPredictorSize, + params->localCtrBits, + params->instShiftAmt); + predictor = Local; +#endif + } else if (params->predType == "tournament") { + tournamentBP = new TournamentBP(params->localPredictorSize, + params->localHistoryTableSize, + params->localHistoryBits, + params->globalPredictorSize, + params->globalHistoryBits, + params->choiceHistoryBits, + params->choicePredictorSize, + params->choiceCtrBits, + params->blockShiftAmt); + predictor = Tournament; + } else { + fatal("Invalid BP selected!"); + } + + CTB.init(params->CTBEntries,params->blockShiftAmt,params->offsetLength); + + branchTypePredictor.init(params->BTPEntries,params->blockShiftAmt); + + RAS.init(params->RASSize,params->lsEntries,params->blockShiftAmt); +} + +template +void +BPredUnit::regStats() +{ + lookups + .name(name() + ".lookups") + .desc("Number of BP lookups") + ; + + condPredicted + .name(name() + ".condPredicted") + .desc("Number of conditional branches predicted") + ; + + condIncorrect + .name(name() + ".condIncorrect") + .desc("Number of conditional branches incorrect") + ; + + BTBLookups + .name(name() + ".BTBLookups") + .desc("Number of BTB lookups") + ; + + BTBHits + .name(name() + ".BTBHits") + .desc("Number of BTB hits") + ; + + BTBCorrect + .name(name() + ".BTBCorrect") + .desc("Number of correct BTB predictions (this stat may not " + "work properly.") + ; + + usedRAS + .name(name() + ".usedRAS") + .desc("Number of times the RAS was used to get a target.") + ; + + RASIncorrect + .name(name() + ".RASInCorrect") + .desc("Number of incorrect RAS predictions.") + ; +} + +template +void +BPredUnit::switchOut() +{ + // Clear any state upon switch out. + for (int i = 0; i < Impl::MaxThreads; ++i) { + squash(0, i); + } +} + +template +void +BPredUnit::takeOverFrom() +{ + // Can reset all predictor state, but it's not necessarily better + // than leaving it be. + + tournamentBP->reset(); + branchTypePredictor.reset(); + BTB.reset(); + IBTB.reset(); + RAS.reset(); + CTB.reset(); + +} + +template +TheISA::ExitID +BPredUnit::predict(BlockID oldest_blockID, BlockPtr &block, Addr &PC, + int addr_space_ID, ThreadID tid) +{ + // See if branch predictor predicts taken. + // If so, get its target addr either from the BTB or the RAS. + // Save off record of branch stuff so the RAS can be fixed + // up once it's done. + + using TheISA::MachInst; + + ExitID pred_exitID; + ExitType pred_branchType; + Addr target = PC; + Addr seq_target; + Addr btb_target; + Addr ibtb_target; + Addr ctb_target; + Addr ras_target; + Addr ras_entry; + uint32_t ret_addr; + unsigned ibtb_index; + ++lookups; + + +#if 0 + if (inst->isUncondCtrl()) { + DPRINTF(Fetch, "BranchPred: [tid:%i]: Unconditional control.\n", tid); + pred_taken = true; + // Tell the BP there was an unconditional branch. + BPUncond(bp_history); + } else { + ++condPredicted; + + pred_taken = BPLookup(PC, bp_history); + + DPRINTF(Fetch, "BranchPred: [tid:%i]: Branch predictor predicted %i " + "for PC %#x\n", + tid, pred_taken, inst->readPC()); + } +#endif + ibtb_index = BPLookupGetIndex(PC,tid); + pred_exitID = BPLookup(block->getBlockID(),oldest_blockID,PC, addr_space_ID,tid); + DPRINTF(EdgePredUnit,"BranchPred: [tid:%i]: Branch predictor predicted %i " + "for PC %#x\n", + tid, pred_exitID,block->getStartPC()); + DPRINTF(EdgePredUnit, "BranchPred: [tid:%i]: [sn:%i] Creating prediction history " + "for PC %#x\n", + tid, block->getBlockID(), block->getStartPC()); + + // Now lookup in the BTB, RAS or CTB. + seq_target = block->getNextBlockPC(); + btb_target = BTB.lookup(PC,addr_space_ID,pred_exitID); + ibtb_target = IBTB.lookup(PC,ibtb_index,pred_exitID); + ctb_target = CTB.lookupTarget(PC,addr_space_ID,pred_exitID); + ret_addr = CTB.lookupRetAddr(PC,addr_space_ID,pred_exitID); + + ras_entry = RAS.lookup(block->getBlockID(),tid); + ras_target = RAS.targetGen(ras_entry); + + pred_branchType = branchTypePredictor.lookup(PC,addr_space_ID,pred_exitID); + if(pred_branchType == TheISA::seq) { + DPRINTF(EdgePredUnit,"BranchPred: [tid:%i][bid:%i]: ExitType is Seq.\n",tid, + block->getBlockID()); + target = seq_target; + } else if(pred_branchType == TheISA::branch) { + DPRINTF(EdgePredUnit,"BranchPred: [tid:%i][bid:%i]: ExitType is Branch.\n",tid, + block->getBlockID()); + target = btb_target; + } else if(pred_branchType == TheISA::call) { + DPRINTF(EdgePredUnit,"BranchPred: [tid:%i][bid:%i]: ExitType is Call." + "Return address is %#x.\n",tid,block->getBlockID(),ret_addr<<7); + target = ctb_target; + RAS.push(ret_addr,tid); + } else if(pred_branchType == TheISA::ret) { + DPRINTF(EdgePredUnit,"BranchPred: [tid:%i][bid:%i]: ExitType is Return.\n",tid, + block->getBlockID()); + target = ras_target; + RAS.pop(tid); + } else if(pred_branchType == TheISA::ibranch){ + DPRINTF(EdgePredUnit,"BranchPred: [tid:%i][bid:%i]: ExitType is Ibranch.\n",tid, + block->getBlockID()); + target = ibtb_target; + } else{ + panic("Unknown branch type!"); + } + PC = target; + DPRINTF(EdgePredUnit,"Block [PC:%#x]:Predicted target is %#x. \n", + block->getStartPC(),target); + return pred_exitID; +} + +template +void +BPredUnit::update(const BlockID &done_sn, ThreadID tid, Addr &PC, + int addr_space_id, + const Addr &corr_target, + ExitType actually_branch_type, + ExitID actually_exit_ID) +{ + DPRINTF(EdgePredUnit, "BranchPred: [tid:%i]: Committing branches until " + "[sn:%lli].\n PC = %#x, ExitID = %i and target = %#x.\n", + tid, done_sn, PC, + actually_exit_ID, + corr_target); + // Update the branch predictor with the correct results. + unsigned index = BPUpdateGetIndex(done_sn,PC,tid); + BPUpdate(done_sn, + PC, + addr_space_id, + actually_exit_ID, + tid); + branchTypePredictor.update(PC, + addr_space_id, + actually_exit_ID, + actually_branch_type + ); + if( actually_branch_type == TheISA::seq) { + } else if(actually_branch_type == TheISA::branch) { + BTB.update(PC, + addr_space_id, + actually_exit_ID, + corr_target); + } else if(actually_branch_type == TheISA::call) { + unsigned index = CTB.CTBIdxGen(PC, + addr_space_id, + actually_exit_ID); + CTB.update(index,corr_target); + RAS.lspush(PC,index,tid); + } else if(actually_branch_type == TheISA::ret) { + unsigned ctb_index = RAS.lsPopIdx(tid); + Addr ctb_addr = RAS.lsPopAddr(tid); + CTB.update(ctb_index,ctb_addr,corr_target); + } else if(actually_branch_type == TheISA::ibranch) { + IBTB.update(PC,index,actually_exit_ID,corr_target); + } else { + panic("Unknown predicted branch type!"); + } + +} + +template +void +BPredUnit::squash(const BlockID &squashed_sn, ThreadID tid) +{ + // If there's a squash due to a syscall, there may not be an entry + // corresponding to the squash. In that case, don't bother trying to + // fix up the entry. + BPSquash(squashed_sn, tid); + RAS.squash(squashed_sn, tid); +} + +template +void +BPredUnit::squash(const BlockID &squashed_sn, + int addr_space_id, + Addr &PC, + const Addr &corr_target, + ExitType actually_branch_type, + ExitID actually_exit_ID, + ThreadID tid) +{ + // Now that we know that a branch was mispredicted, we need to undo + // all the branches that have been seen up until this branch and + // fix up everything. + // NOTE: This should be call conceivably in 2 scenarios: + // (1) After an branch is executed, it updates its status in the ROB + // The commit stage then checks the ROB update and sends a signal to + // the fetch stage to squash history after the mispredict + // (2) In the decode stage, you can find out early if a unconditional + // PC-relative, branch was predicted incorrectly. If so, a signal + // to the fetch stage is sent to squash history after the mispredict + Addr retAddrPush = 0; + + DPRINTF(EdgePredUnit, "BranchPred: [tid:%i]: Squashing from sequence number %i, " + "setting target to %#x.\n", + tid, squashed_sn, corr_target); + + // If there's a squash due to a syscall, there may not be an entry + // corresponding to the squash. In that case, don't bother trying to + // fix up the entry. + unsigned index = BPUpdateGetIndex(squashed_sn,PC,tid); + BPSquash(squashed_sn,actually_exit_ID,tid); + BPUpdate(squashed_sn, + PC, + addr_space_id, + actually_exit_ID, + tid); + RAS.squash(squashed_sn,tid); + branchTypePredictor.update(PC, + addr_space_id, + actually_exit_ID, + actually_branch_type + ); + if(actually_branch_type == TheISA::call) { + retAddrPush = CTB.lookupRetAddr(PC, addr_space_id, actually_exit_ID); + } + if( actually_branch_type == TheISA::seq) { + } else if(actually_branch_type == TheISA::branch) { + BTB.update(PC, + addr_space_id, + actually_exit_ID, + corr_target); + } else if(actually_branch_type == TheISA::call) { + unsigned index = CTB.CTBIdxGen(PC, + addr_space_id, + actually_exit_ID); + CTB.update(index,corr_target); + RAS.lspush(PC,index,tid); + RAS.push(retAddrPush,tid); + DPRINTF(EdgePredUnit,"Squashing!Exit Type is call. Return address is %#x.\n", + retAddrPush<<7); + } else if(actually_branch_type== TheISA::ret) { + unsigned ctb_index = RAS.lsPopIdx(tid); + Addr ctb_addr = RAS.lsPopAddr(tid); + CTB.update(ctb_index,ctb_addr,corr_target); + RAS.pop(tid); + } else if(actually_branch_type == TheISA::ibranch) { + IBTB.update(PC,index,actually_exit_ID,corr_target); + }else { + panic("Unknown predicted branch type!"); + } +} + +#if 0 +template +void +BPredUnit::BPUncond(void * &bp_history) +{ + // Only the tournament predictor cares about unconditional branches. + if (predictor == Tournament) { + tournamentBP->uncondBr(bp_history); + } +} +#endif + +template +void +BPredUnit::BPSquash(BlockID blockID,ExitID exitID,ThreadID tid) +{ + tournamentBP->squash(blockID,exitID,tid); +} + +template +void +BPredUnit::BPSquash(BlockID blockID, ThreadID tid) +{ + tournamentBP->squash(blockID,tid); +} + +template +TheISA::ExitID +BPredUnit::BPLookup(BlockID blockID, BlockID oldest_blockID, + Addr &inst_PC, int addr_space_ID,ThreadID tid) +{ + return tournamentBP->lookup(blockID,oldest_blockID,inst_PC, addr_space_ID,tid); +} + +template +void +BPredUnit::BPUpdate(BlockID blockID,Addr &block_PC, + int addr_space_ID,ExitID exitID,ThreadID tid) +{ + tournamentBP->update(blockID,block_PC,addr_space_ID,exitID,tid); +} + +template +unsigned +BPredUnit::BPLookupGetIndex(Addr &block_PC, ThreadID tid) +{ + unsigned index = tournamentBP->lookupGetIndex(block_PC,tid); + return index; +} + +template +unsigned +BPredUnit::BPUpdateGetIndex(BlockID blockID, Addr &block_PC, ThreadID tid) +{ + unsigned index = tournamentBP->updateGetIndex(blockID, block_PC,tid); + return index; +} + +template +void +BPredUnit::dump() +{ + + for (int i = 0; i < Impl::MaxThreads; ++i) { + } +} diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/comm.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/comm.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,215 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#ifndef __CPU_EDGE_COMM_HH__ +#define __CPU_EDGE_COMM_HH__ + +#include + +#include "base/types.hh" +#include "cpu/inst_seq.hh" +#include "sim/faults.hh" + +// Typedef for physical register index type. Although the Impl would be the +// most likely location for this, there are a few classes that need this +// typedef yet are not templated on the Impl. For now it will be defined here. +typedef short int PhysRegIndex; + +/** Struct that defines the information passed from fetch to decode. */ +template +struct CommFetch2Map { + //typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::EdgeBlockPtr BlockPtr; + + int size; + + //DynInstPtr insts[Impl::MaxWidth]; + + BlockPtr instBlocks[Impl::MaxFetchWidth]; + + bool blockFetched; + + Fault fetchFault; + InstSeqNum fetchFaultSN; + bool clearFetchFault; +}; + +/** Struct that defines the information passed from map to execute. */ +template +struct CommMap2Execute { + //typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::EdgeBlockPtr BlockPtr; + + int size; + + BlockPtr instBlocks[Impl::MaxFetchWidth]; + //DynInstPtr insts[Impl::MaxWidth]; +}; + +/** Struct that defines the information passed from IEW to commit. */ +template +struct CommExecute2Commit { + //typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::EdgeBlockPtr BlockPtr; + + int size; + + //DynInstPtr insts[Impl::MaxWidth]; + BlockPtr instBlocks[Impl::MaxCompleteWidth]; + + bool squash[Impl::MaxThreads]; + bool branchMispredict[Impl::MaxThreads]; + bool branchTaken[Impl::MaxThreads]; + Addr mispredPC[Impl::MaxThreads]; + Addr nextPC[Impl::MaxThreads]; + + TheISA::ExitID exitID; + TheISA::ExitType exitType; + + InstSeqNum squashedSeqNum[Impl::MaxThreads]; + + bool includeSquashInstBlock[Impl::MaxThreads]; + +}; + +template +struct CommIssue2Execute { + typedef typename Impl::DynInstPtr DynInstPtr; + + int size; + + DynInstPtr insts[Impl::MaxIssueWidth]; +}; + +/** Struct that defines all backwards communication. */ +template +struct TimeBufStruct { + struct mapComm { + bool squash; + bool predIncorrect; + uint64_t branchAddr; + + InstSeqNum doneSeqNum; + + // @todo: Might want to package this kind of branch stuff into a single + // struct as it is used pretty frequently. + bool branchMispredict; + bool branchTaken; + Addr mispredPC; + Addr nextPC; + + unsigned branchCount; + + // Represents the instruction that has either been retired or + // squashed. Similar to having a single bus that broadcasts the + // retired or squashed sequence number. + //InstSeqNum doneSeqNum; + TheISA::BlockID doneBlockID; + }; + + mapComm mapInfo[Impl::MaxThreads]; + + struct executeComm { + // Also eventually include skid buffer space. + bool usedIQ; + unsigned freeIQEntries; + bool usedLSQ; + unsigned freeLSQEntries; + + unsigned iqCount; + unsigned ldstqCount; + + unsigned dispatched; + unsigned dispatchedToLSQ; + }; + + executeComm executeInfo[Impl::MaxThreads]; + + struct commitComm { + bool usedROB; + unsigned freeROBEntries; + bool emptyROB; + + bool squash; + bool robSquashing; + + bool branchMispredict; + bool branchTaken; + Addr mispredPC; + Addr blockPC; + Addr nextPC; + Addr nextNPC; + Addr nextMicroPC; + + // Represents the instruction that has either been retired or + // squashed. Similar to having a single bus that broadcasts the + // retired or squashed sequence number. + //InstSeqNum doneSeqNum; + TheISA::BlockID doneBlockID; + TheISA::ExitID exitID; + TheISA::ExitType exitType; + //Just in case we want to do a commit/squash on a cycle + //(necessary for multiple ROBs?) + bool commitInsts; + InstSeqNum squashSeqNum; + + // Communication specifically to the IQ to tell the IQ that it can + // schedule a non-speculative instruction. + InstSeqNum nonSpecSeqNum; + + // Hack for now to send back an uncached access to the IEW stage. + typedef typename Impl::DynInstPtr DynInstPtr; + bool uncached; + DynInstPtr uncachedLoad; + + bool interruptPending; + bool clearInterrupt; + + // These is for syscall handling in execute stage because we + // can only handle syscall in TRIPS when block committed + bool needSyscall; + }; + + commitComm commitInfo[Impl::MaxThreads]; + + bool mapBlock[Impl::MaxThreads]; + bool mapUnblock[Impl::MaxThreads]; + bool executeBlock[Impl::MaxThreads]; + bool executeUnblock[Impl::MaxThreads]; + bool commitBlock[Impl::MaxThreads]; + bool commitUnblock[Impl::MaxThreads]; +}; + +#endif //__CPU_O3_COMM_HH__ diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/commit.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/commit.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,488 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#ifndef __CPU_EDGE_COMMIT_HH__ +#define __CPU_EDGE_COMMIT_HH__ + +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "cpu/exetrace.hh" +#include "cpu/inst_seq.hh" + +class SimpleEdgeCPUParams; + +template +class EdgeThreadState; + +/** + * SimpleEdgeCommit handles single threaded and SMT commit. Its width is + * specified by the parameters; each cycle it tries to commit that + * many instructions. The SMT policy decides which thread it tries to + * commit instructions from. Non- speculative instructions must reach + * the head of the ROB before they are ready to execute; once they + * reach the head, commit will broadcast the instruction's sequence + * number to the previous stages so that they can issue/ execute the + * instruction. Only one non-speculative instruction is handled per + * cycle. Commit is responsible for handling all back-end initiated + * redirects. It receives the redirect, and then broadcasts it to all + * stages, indicating the sequence number they should squash until, + * and any necessary branch misprediction information as well. It + * priortizes redirects by instruction's age, only broadcasting a + * redirect if it corresponds to an instruction that should currently + * be in the ROB. This is done by tracking the sequence number of the + * youngest instruction in the ROB, which gets updated to any + * squashing instruction's sequence number, and only broadcasting a + * redirect if it corresponds to an older instruction. Commit also + * supports multiple cycle squashing, to model a ROB that can only + * remove a certain number of instructions per cycle. + */ +template +class SimpleEdgeCommit +{ + public: + // Typedefs from the Impl. + typedef typename Impl::CPU CPU; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::EdgeBlockPtr BlockPtr; + typedef typename Impl::CPUPol CPUPol; + + //typedef typename CPUPol::RenameMap RenameMap; + typedef typename CPUPol::EdgeROB ROB; + + typedef typename CPUPol::TimeStruct TimeStruct; + typedef typename CPUPol::Fetch2Map Fetch2Map; + typedef typename CPUPol::Execute2Commit Execute2Commit; + typedef typename CPUPol::Map2Execute Map2Execute; + + typedef typename CPUPol::Fetch Fetch; + typedef typename CPUPol::Execute Execute; + + typedef EdgeThreadState Thread; + + /** Event class used to schedule a squash due to a trap (fault or + * interrupt) to happen on a specific cycle. + */ + class TrapEvent : public Event { + private: + SimpleEdgeCommit *commit; + ThreadID tid; + + public: + TrapEvent(SimpleEdgeCommit *_commit, ThreadID _tid); + + void process(); + const char *description() const; + }; + + /** Overall commit status. Used to determine if the CPU can deschedule + * itself due to a lack of activity. + */ + enum CommitStatus{ + Active, + Inactive + }; + + /** Individual thread status. */ + enum ThreadStatus { + Running, + Idle, + ROBSquashing, + TrapPending, + FetchTrapPending + }; + + /** Commit policy for SMT mode. */ + enum CommitPolicy { + Aggressive, + RoundRobin, + OldestReady + }; + + private: + /** Overall commit status. */ + CommitStatus _status; + /** Next commit status, to be set at the end of the cycle. */ + CommitStatus _nextStatus; + /** Per-thread status. */ + ThreadStatus commitStatus[Impl::MaxThreads]; + /** Commit policy used in SMT mode. */ + CommitPolicy commitPolicy; + + public: + /** Construct a SimpleEdgeCommit with the given parameters. */ + SimpleEdgeCommit(CPU *_cpu, SimpleEdgeCPUParams *params); + + /** Returns the name of the SimpleEdgeCommit. */ + std::string name() const; + + /** Registers statistics. */ + void regStats(); + + /** Sets the list of threads. */ + void setThreads(std::vector &threads); + + /** Sets the main time buffer pointer, used for backwards communication. */ + void setTimeBuffer(TimeBuffer *tb_ptr); + + void setFetchQueue(TimeBuffer *fq_ptr); + + /** Sets the pointer to the queue coming from rename. */ + void setMapQueue(TimeBuffer *rq_ptr); + + /** Sets the pointer to the queue coming from IEW. */ + void setExecuteQueue(TimeBuffer *iq_ptr); + + /** Sets the pointer to the IEW stage. */ + void setExecuteStage(Execute *execute_stage); + + /** Skid buffer between map and commit. */ + //std::queue skidBuffer; + std::queue skidBuffer; + + /** The pointer to the Execute stage. Used solely to ensure that + * various events (traps, interrupts, syscalls) do not occur until + * all stores have written back. + */ + Execute *executeStage; + + /** Sets pointer to list of active threads. */ + void setActiveThreads(std::list *at_ptr); + + /** Sets pointer to the commited state rename map. */ + //void setRenameMap(RenameMap rm_ptr[Impl::MaxThreads]); + + /** Sets pointer to the ROB. */ + void setROB(ROB *rob_ptr); + + /** Initializes stage by sending back the number of free entries. */ + void initStage(); + + /** Initializes the draining of commit. */ + bool drain(); + + /** Resumes execution after draining. */ + void resume(); + + /** Completes the switch out of commit. */ + void switchOut(); + + /** Takes over from another CPU's thread. */ + void takeOverFrom(); + + /** Ticks the commit stage, which tries to commit instructions. */ + void tick(); + + /** Handles any squashes that are sent from IEW, and adds instructions + * to the ROB and tries to commit instructions. + */ + void commit(); + + /** Returns the number of free ROB entries for a specific thread. */ + size_t numROBFreeEntries(ThreadID tid); + + /** Generates an event to schedule a squash due to a trap. */ + void generateTrapEvent(ThreadID tid); + + /** Records that commit needs to initiate a squash due to an + * external state update through the TC. + */ + void generateTCEvent(ThreadID tid); + + private: + /** Updates the overall status of commit with the nextStatus, and + * tell the CPU if commit is active/inactive. + */ + void updateStatus(); + + /** Sets the next status based on threads' statuses, which becomes the + * current status at the end of the cycle. + */ + void setNextStatus(); + + /** Checks if the ROB is completed with squashing. This is for the case + * where the ROB can take multiple cycles to complete squashing. + */ + bool robDoneSquashing(); + + /** Returns if any of the threads have the number of ROB entries changed + * on this cycle. Used to determine if the number of free ROB entries needs + * to be sent back to previous stages. + */ + bool changedROBEntries(); + + /** Squashes all in flight instructions. */ + void squashAll(ThreadID tid); + + /** Handles squashing due to a trap. */ + void squashFromTrap(ThreadID tid); + + /** Handles squashing due to an TC write. */ + void squashFromTC(ThreadID tid); + +#if FULL_SYSTEM + /** Handles processing an interrupt. */ + void handleInterrupt(); +#endif // FULL_SYSTEM + + /** Commits as many instructions as possible. */ + void commitInstBlocks(); + + /** Tries to commit the head ROB instruction passed in. + * @param head_inst The instruction to be committed. + */ + bool commitHead(BlockPtr &head_inst_block, unsigned inst_block_num); + + /** Gets instructions from rename and inserts them into the ROB. */ + void getInstBlocks(); + + /** Insert all instructions from rename into skidBuffer */ + void skidInsert(); + + /** Marks completed instructions using information sent from IEW. */ + void markCompletedInstBlocks(); + + /** Gets the thread to commit, based on the SMT policy. */ + ThreadID getCommittingThread(); + + /** Returns the thread ID to use based on a round robin policy. */ + ThreadID roundRobin(); + + /** Returns the thread ID to use based on an oldest instruction policy. */ + ThreadID oldestReady(); + + public: + /** Returns the PC of the head instruction of the ROB. + * @todo: Probably remove this function as it returns only thread 0. + */ + Addr readPC() { return PC[0]; } + + /** Returns the PC of a specific thread. */ + Addr readPC(ThreadID tid) { return PC[tid]; } + + /** Sets the PC of a specific thread. */ + void setPC(Addr val, ThreadID tid) { PC[tid] = val; } + + /** Reads the next PC of a specific thread. */ + Addr readNextPC(ThreadID tid) { return nextPC[tid]; } + + /** Sets the next PC of a specific thread. */ + void setNextPC(Addr val, ThreadID tid) { nextPC[tid] = val; } + + /** Reads the next NPC of a specific thread. */ + Addr readNextNPC(ThreadID tid) { return nextNPC[tid]; } + + /** Sets the next NPC of a specific thread. */ + void setNextNPC(Addr val, ThreadID tid) { nextNPC[tid] = val; } + + private: + /** Time buffer interface. */ + TimeBuffer *timeBuffer; + + /** Wire to write information heading to previous stages. */ + typename TimeBuffer::wire toExecute; + + /** Wire to read information from Execute (for ROB). */ + typename TimeBuffer::wire robInfoFromExecute; + + TimeBuffer *fetch2commitQueue; + + typename TimeBuffer::wire fromFetch; + + /** Execute instruction block queue interface. */ + TimeBuffer *execute2commitQueue; + + /** Wire to read information from IEW queue. */ + typename TimeBuffer::wire fromExecute; + + /** map instruction queue interface, for ROB. */ + TimeBuffer *map2commitQueue; + + /** Wire to read information from map queue. */ + typename TimeBuffer::wire fromMap; + + public: + /** ROB interface. */ + ROB *rob; + + private: + /** Pointer to CPU. */ + CPU *cpu; + + /** Vector of all of the threads. */ + std::vector thread; + + /** Records that commit has written to the time buffer this cycle. Used for + * the CPU to determine if it can deschedule itself if there is no activity. + */ + bool wroteToTimeBuffer; + + /** Records if the number of ROB entries has changed this cycle. If it has, + * then the number of free entries must be re-broadcast. + */ + bool changedROBNumEntries[Impl::MaxThreads]; + + /** A counter of how many threads are currently squashing. */ + ThreadID squashCounter; + + /** Records if a thread has to squash this cycle due to a trap. */ + bool trapSquash[Impl::MaxThreads]; + + /** Records if a thread has to squash this cycle due to an XC write. */ + bool tcSquash[Impl::MaxThreads]; + + /** Priority List used for Commit Policy */ + std::list priority_list; + + /** Execute to Commit delay, in ticks. */ + unsigned executeToCommitDelay; + + /** Commit to IEW delay, in ticks. */ + unsigned commitToExecuteDelay; + + /** Map to ROB delay, in ticks. */ + unsigned mapToROBDelay; + + unsigned fetchToCommitDelay; + + /** Rename width, in instructions. Used so ROB knows how many + * instructions to get from the rename instruction queue. + */ + unsigned mapWidth; + + /** Commit width, in instructions. */ + unsigned commitWidth; + + /** Number of Reorder Buffers */ + unsigned numRobs; + + /** Number of Active Threads */ + ThreadID numThreads; + + /** Is a drain pending. */ + bool drainPending; + + /** Is commit switched out. */ + bool switchedOut; + + /** The latency to handle a trap. Used when scheduling trap + * squash event. + */ + Tick trapLatency; + + /** The interrupt fault. */ + Fault interrupt; + + /** The commit PC of each thread. Refers to the instruction that + * is currently being processed/committed. + */ + Addr PC[Impl::MaxThreads]; + + /** The commit micro PC of each thread. Refers to the instruction that + * is currently being processed/committed. + */ + Addr microPC[Impl::MaxThreads]; + + /** The next PC of each thread. */ + Addr nextPC[Impl::MaxThreads]; + + /** The next NPC of each thread. */ + Addr nextNPC[Impl::MaxThreads]; + + /** The next micro PC of each thread. */ + Addr nextMicroPC[Impl::MaxThreads]; + + /** The sequence number of the youngest valid instruction in the ROB. */ + InstSeqNum youngestSeqNum[Impl::MaxThreads]; + + /** Records if there is a trap currently in flight. */ + bool trapInFlight[Impl::MaxThreads]; + + /** Records if there were any stores committed this cycle. */ + bool committedStores[Impl::MaxThreads]; + + /** Records if commit should check if the ROB is truly empty (see + commit_impl.hh). */ + bool checkEmptyROB[Impl::MaxThreads]; + + /** Pointer to the list of active threads. */ + std::list *activeThreads; + + /** Rename map interface. */ + //RenameMap *renameMap[Impl::MaxThreads]; + + /** Updates commit stats based on this instruction. */ + void updateComInstBlockStats(BlockPtr &inst_block); + + /** Stat for the number of committed usefule instructions. */ + Stats::Formula commitCommittedUsefulInsts; + /** Stat for the percent of NOP instructions. */ + Stats::Formula commitPercentNop; + /** Stat for the number of committed NOP instructions. */ + Stats::Scalar commitCommittedNopInsts; + /** Stat for the total number of committed instructions. */ + Stats::Scalar commitCommittedInsts; + /** Stat for the total number of committed instructions. */ + Stats::Scalar commitCommittedInstBlocks; + + /** Stat for the total number of squashed instructions discarded by commit. + */ + Stats::Scalar commitSquashedInsts; + /** Stat for the total number of times commit is told to squash. + * @todo: Actually increment this stat. + */ + Stats::Scalar commitSquashEvents; + /** Stat for the total number of times commit has had to stall due to a non- + * speculative instruction reaching the head of the ROB. + */ + Stats::Scalar commitNonSpecStalls; + /** Stat for the total number of branch mispredicts that caused a squash. */ + Stats::Scalar branchMispredicts; + /** Number of correct prediction. */ + Stats::Scalar correctPredictions; + /** Percent of correct predictions of all committed inst blocks. */ + Stats::Formula branchPredRate; + /** Distribution of the number of committed useful insts each cycle. */ + Stats::Distribution numCommittedDist; + + /** Total number of instructions committed. */ + Stats::Vector statComInstBlock; + + /** Number of cycles where the commit bandwidth limit is reached. */ + Stats::Scalar commitEligibleSamples; + /** Number of instructions not committed due to bandwidth limits. */ + Stats::Vector commitEligible; +}; + +#endif // __CPU_EDGE_COMMIT_HH__ diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/commit.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/commit.cc Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#include "cpu/edge/isa_specific.hh" +#include "cpu/edge/commit_impl.hh" + +template class SimpleEdgeCommit; diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/commit_impl.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/commit_impl.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,1384 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#include +#include + +#include "arch/utility.hh" +#include "base/cp_annotate.hh" +#include "base/loader/symtab.hh" +#include "base/timebuf.hh" +#include "config/full_system.hh" +#include "config/the_isa.hh" +#include "config/use_checker.hh" +#include "cpu/exetrace.hh" +#include "cpu/edge/commit.hh" +#include "cpu/edge/thread_state.hh" +#include "params/SimpleEdgeCPU.hh" + +#if USE_CHECKER +#include "cpu/checker/cpu.hh" +#endif + +using namespace std; + +template +SimpleEdgeCommit::TrapEvent::TrapEvent(SimpleEdgeCommit *_commit, + ThreadID _tid) + : Event(CPU_Tick_Pri), commit(_commit), tid(_tid) +{ + this->setFlags(AutoDelete); +} + +template +void +SimpleEdgeCommit::TrapEvent::process() +{ + // This will get reset by commit if it was switched out at the + // time of this event processing. + commit->trapSquash[tid] = true; +} + +template +const char * +SimpleEdgeCommit::TrapEvent::description() const +{ + return "Trap"; +} + +template +SimpleEdgeCommit::SimpleEdgeCommit(CPU *_cpu, SimpleEdgeCPUParams *params) + : cpu(_cpu), + squashCounter(0), + executeToCommitDelay(params->executeToCommitDelay), + commitToExecuteDelay(params->commitToExecuteDelay), + mapToROBDelay(params->mapToROBDelay), + fetchToCommitDelay(params->commitToFetchDelay), + mapWidth(params->mapWidth), + commitWidth(params->commitWidth), + numThreads(params->numThreads), + drainPending(false), + switchedOut(false), + trapLatency(params->trapLatency) +{ + _status = Active; + _nextStatus = Inactive; + std::string policy = params->smtCommitPolicy; + + //Convert string to lowercase + std::transform(policy.begin(), policy.end(), policy.begin(), + (int(*)(int)) tolower); + + //Assign commit policy + if (policy == "aggressive"){ + commitPolicy = Aggressive; + + DPRINTF(EdgeCommit,"Commit Policy set to Aggressive."); + } else if (policy == "roundrobin"){ + commitPolicy = RoundRobin; + + //Set-Up Priority List + for (ThreadID tid = 0; tid < numThreads; tid++) { + priority_list.push_back(tid); + } + + DPRINTF(EdgeCommit,"Commit Policy set to Round Robin."); + } else if (policy == "oldestready"){ + commitPolicy = OldestReady; + + DPRINTF(EdgeCommit,"Commit Policy set to Oldest Ready."); + } else { + assert(0 && "Invalid SMT Commit Policy. Options Are: {Aggressive," + "RoundRobin,OldestReady}"); + } + + for (ThreadID tid = 0; tid < numThreads; tid++) { + commitStatus[tid] = Idle; + changedROBNumEntries[tid] = false; + checkEmptyROB[tid] = false; + trapInFlight[tid] = false; + committedStores[tid] = false; + trapSquash[tid] = false; + tcSquash[tid] = false; + microPC[tid] = 0; + nextMicroPC[tid] = 0; + PC[tid] = 0; + nextPC[tid] = 0; + nextNPC[tid] = 0; + } +#if FULL_SYSTEM + interrupt = NoFault; +#endif +} + +template +std::string +SimpleEdgeCommit::name() const +{ + return cpu->name() + ".commit"; +} + +template +void +SimpleEdgeCommit::regStats() +{ + using namespace Stats; + + commitCommittedNopInsts + .name(name() + ".commitCommittedNopInsts") + .desc("The number of committed NOP instructions") + .prereq(commitCommittedNopInsts); + + commitCommittedInsts + .name(name() + ".commitCommittedInsts") + .desc("The number of committed instructions") + .prereq(commitCommittedInsts); + + commitCommittedUsefulInsts + .name(name() + ".commitCommittedUsefulInsts") + .desc("The number of committed instructions without NOPs") + .prereq(commitCommittedUsefulInsts); + + commitPercentNop + .name(name() + ".commitPercentNop") + .desc("The percent of NOP instructions") + .precision(6); + + commitCommittedUsefulInsts = + commitCommittedInsts -commitCommittedNopInsts; + commitPercentNop = + commitCommittedNopInsts / commitCommittedInsts; + + commitCommittedInstBlocks + .name(name() + ".commitCommittedInstBlocks") + .desc("The number of committed inst blocks") + .prereq(commitCommittedInstBlocks); + + commitSquashedInsts + .name(name() + ".commitSquashedInsts") + .desc("The number of squashed insts skipped by commit") + .prereq(commitSquashedInsts); + + commitSquashEvents + .name(name() + ".commitSquashEvents") + .desc("The number of times commit is told to squash") + .prereq(commitSquashEvents); + + commitNonSpecStalls + .name(name() + ".commitNonSpecStalls") + .desc("The number of times commit has been forced to stall to " + "communicate backwards") + .prereq(commitNonSpecStalls); + + branchMispredicts + .name(name() + ".branchMispredicts") + .desc("The number of times a branch was mispredicted") + .prereq(branchMispredicts); + + correctPredictions + .name(name() + ".correctPredictions") + .desc("The number of correct prediction") + .prereq(correctPredictions); + + branchPredRate + .name(name() + ".branchPredRate") + .desc("Correct prediction rate of all committed inst blocks. ") + .precision(6); + branchPredRate = correctPredictions / commitCommittedInstBlocks; + + numCommittedDist + .init(0,commitWidth,1) + .name(name() + ".COM:committed_per_cycle") + .desc("Number of useful insts commited each cycle") + .flags(Stats::pdf) + ; + + statComInstBlock + .init(cpu->numThreads) + .name(name() + ".COM:count") + .desc("Number of inst blocks committed") + .flags(total) + ; + + commitEligible + .init(cpu->numThreads) + .name(name() + ".COM:bw_limited") + .desc("number of insts not committed due to BW limits") + .flags(total) + ; + + commitEligibleSamples + .name(name() + ".COM:bw_lim_events") + .desc("number cycles where commit BW limit reached") + ; +} + +template +void +SimpleEdgeCommit::setThreads(std::vector &threads) +{ + thread = threads; +} + +template +void +SimpleEdgeCommit::setTimeBuffer(TimeBuffer *tb_ptr) +{ + timeBuffer = tb_ptr; + + // Setup wire to send information back to Execute. + toExecute = timeBuffer->getWire(0); + + // Setup wire to read data from IEW (for the ROB). + robInfoFromExecute = timeBuffer->getWire(-executeToCommitDelay); +} + +template +void +SimpleEdgeCommit::setFetchQueue(TimeBuffer *fq_ptr) +{ + fetch2commitQueue = fq_ptr; + + // Setup wire to get instructions from rename (for the ROB). + fromFetch = fetch2commitQueue->getWire(-fetchToCommitDelay); +} + +template +void +SimpleEdgeCommit::setMapQueue(TimeBuffer *rq_ptr) +{ + map2commitQueue = rq_ptr; + + // Setup wire to get instructions from rename (for the ROB). + fromMap = map2commitQueue->getWire(-mapToROBDelay); +} + +template +void +SimpleEdgeCommit::setExecuteQueue(TimeBuffer *iq_ptr) +{ + execute2commitQueue = iq_ptr; + + // Setup wire to get instructions from IEW. + fromExecute = execute2commitQueue->getWire(-executeToCommitDelay); +} + +template +void +SimpleEdgeCommit::setExecuteStage(Execute *execute_stage) +{ + executeStage = execute_stage; +} + +template +void +SimpleEdgeCommit::setActiveThreads(list *at_ptr) +{ + activeThreads = at_ptr; +} + +#if 0 +template +void +SimpleEdgeCommit::setRenameMap(RenameMap rm_ptr[]) +{ + for (ThreadID tid = 0; tid < numThreads; tid++) + renameMap[tid] = &rm_ptr[tid]; +} +#endif + +template +void +SimpleEdgeCommit::setROB(ROB *rob_ptr) +{ + rob = rob_ptr; +} + +template +void +SimpleEdgeCommit::initStage() +{ + rob->setActiveThreads(activeThreads); + rob->resetEntries(); + + // Broadcast the number of free entries. + for (ThreadID tid = 0; tid < numThreads; tid++) { + toExecute->commitInfo[tid].usedROB = true; + toExecute->commitInfo[tid].freeROBEntries = rob->numFreeEntries(tid); + toExecute->commitInfo[tid].emptyROB = true; + } + + // Commit must broadcast the number of free entries it has at the + // start of the simulation, so it starts as active. + cpu->activateStage(CPU::CommitIdx); + + cpu->activityThisCycle(); + trapLatency = cpu->ticks(trapLatency); +} + +template +bool +SimpleEdgeCommit::drain() +{ + drainPending = true; + + return false; +} + +template +void +SimpleEdgeCommit::switchOut() +{ + switchedOut = true; + drainPending = false; + rob->switchOut(); +} + +template +void +SimpleEdgeCommit::resume() +{ + drainPending = false; +} + +template +void +SimpleEdgeCommit::takeOverFrom() +{ + switchedOut = false; + _status = Active; + _nextStatus = Inactive; + for (ThreadID tid = 0; tid < numThreads; tid++) { + commitStatus[tid] = Idle; + changedROBNumEntries[tid] = false; + trapSquash[tid] = false; + tcSquash[tid] = false; + } + squashCounter = 0; + rob->takeOverFrom(); +} + +template +void +SimpleEdgeCommit::updateStatus() +{ + // reset ROB changed variable + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + changedROBNumEntries[tid] = false; + + // Also check if any of the threads has a trap pending + if (commitStatus[tid] == TrapPending || + commitStatus[tid] == FetchTrapPending) { + _nextStatus = Active; + } + } + + if (_nextStatus == Inactive && _status == Active) { + DPRINTF(Activity, "Deactivating stage.\n"); + cpu->deactivateStage(CPU::CommitIdx); + } else if (_nextStatus == Active && _status == Inactive) { + DPRINTF(Activity, "Activating stage.\n"); + cpu->activateStage(CPU::CommitIdx); + } + + _status = _nextStatus; +} + +template +void +SimpleEdgeCommit::setNextStatus() +{ + int squashes = 0; + + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + if (commitStatus[tid] == ROBSquashing) { + squashes++; + } + } + + squashCounter = squashes; + + // If commit is currently squashing, then it will have activity for the + // next cycle. Set its next status as active. + if (squashCounter) { + _nextStatus = Active; + } +} + +template +bool +SimpleEdgeCommit::changedROBEntries() +{ + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + if (changedROBNumEntries[tid]) { + return true; + } + } + + return false; +} + +template +size_t +SimpleEdgeCommit::numROBFreeEntries(ThreadID tid) +{ + return rob->numFreeEntries(tid); +} + +template +void +SimpleEdgeCommit::generateTrapEvent(ThreadID tid) +{ + DPRINTF(EdgeCommit, "Generating trap event for [tid:%i]\n", tid); + + TrapEvent *trap = new TrapEvent(this, tid); + + cpu->schedule(trap, curTick + trapLatency); + trapInFlight[tid] = true; +} + +template +void +SimpleEdgeCommit::generateTCEvent(ThreadID tid) +{ + assert(!trapInFlight[tid]); + DPRINTF(EdgeCommit, "Generating TC squash event for [tid:%i]\n", tid); + + tcSquash[tid] = true; +} + +template +void +SimpleEdgeCommit::squashAll(ThreadID tid) +{ + // If we want to include the squashing instruction in the squash, + // then use one older sequence number. + // Hopefully this doesn't mess things up. Basically I want to squash + // all instructions of this thread. + TheISA::BlockID squashed_inst_block = rob->isEmpty() ? + 0 : rob->readHeadInstBlock(tid)->getBlockID() - 1; + + // All younger instructions will be squashed. Set the sequence + // number as the youngest instruction in the ROB (0 in this case. + // Hopefully nothing breaks.) + youngestSeqNum[tid] = 0; + + rob->squash(squashed_inst_block, tid); + changedROBNumEntries[tid] = true; + + // Send back the sequence number of the squashed instruction. + toExecute->commitInfo[tid].doneBlockID = squashed_inst_block; + + // Send back the squash signal to tell stages that they should + // squash. + toExecute->commitInfo[tid].squash = true; + + // Send back the rob squashing signal so other stages know that + // the ROB is in the process of squashing. + toExecute->commitInfo[tid].robSquashing = true; + + toExecute->commitInfo[tid].branchMispredict = false; + + toExecute->commitInfo[tid].nextPC = PC[tid]; + toExecute->commitInfo[tid].nextNPC = PC[tid] + sizeof(TheISA::MachInst); + //toExecute->commitInfo[tid].nextMicroPC = nextMicroPC[tid]; +} + +template +void +SimpleEdgeCommit::squashFromTrap(ThreadID tid) +{ + squashAll(tid); + + DPRINTF(EdgeCommit, "Squashing from trap, restarting at PC %#x\n", PC[tid]); + + thread[tid]->trapPending = false; + thread[tid]->inSyscall = false; + trapInFlight[tid] = false; + + trapSquash[tid] = false; + + commitStatus[tid] = ROBSquashing; + cpu->activityThisCycle(); +} + +template +void +SimpleEdgeCommit::squashFromTC(ThreadID tid) +{ + squashAll(tid); + + DPRINTF(EdgeCommit, "Squashing from TC, restarting at PC %#x\n", PC[tid]); + + thread[tid]->inSyscall = false; + assert(!thread[tid]->trapPending); + + commitStatus[tid] = ROBSquashing; + cpu->activityThisCycle(); + + tcSquash[tid] = false; +} + +template +void +SimpleEdgeCommit::tick() +{ + wroteToTimeBuffer = false; + _nextStatus = Inactive; + + if (drainPending && rob->isEmpty() && !executeStage->hasStoresToWB()) { + cpu->signalDrained(); + drainPending = false; + return; + } + + if (activeThreads->empty()) + return; + + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + // Check if any of the threads are done squashing. Change the + // status if they are done. + while (threads != end) { + ThreadID tid = *threads++; + + // Clear the bit saying if the thread has committed stores + // this cycle. + committedStores[tid] = false; + + if (commitStatus[tid] == ROBSquashing) { + + if (rob->isDoneSquashing(tid)) { + commitStatus[tid] = Running; + } else { + DPRINTF(EdgeCommit,"[tid:%u]: Still Squashing, cannot commit any" + " inst blocks this cycle.\n", tid); + rob->doSquash(tid); + toExecute->commitInfo[tid].robSquashing = true; + wroteToTimeBuffer = true; + } + } + } + + commit(); + + markCompletedInstBlocks(); + + threads = activeThreads->begin(); + + while (threads != end) { + ThreadID tid = *threads++; + + if (!rob->isEmpty(tid) && (rob->readHeadInstBlock(tid))->readyToCommit()) { + // The ROB has more instruction blocks it can commit. Its next status + // will be active. + _nextStatus = Active; + + BlockPtr inst_block = rob->readHeadInstBlock(tid); + + DPRINTF(EdgeCommit,"[tid:%i]: Inst block [id:%lli] PC %#x is head of" + " ROB and ready to commit\n", + tid, inst_block->getBlockID(), inst_block->getStartPC()); + + } else if (!rob->isEmpty(tid)) { + BlockPtr inst_block = rob->readHeadInstBlock(tid); + + DPRINTF(EdgeCommit,"[tid:%i]: Can't commit, Inst block [id:%lli] PC " + "%#x is head of ROB and not ready\n", + tid, inst_block->getBlockID(), inst_block->getStartPC()); + } + + DPRINTF(EdgeCommit, "[tid:%i]: ROB has %d inst blocks & %d free entries.\n", + tid, rob->countInstBlocks(tid), rob->numFreeEntries(tid)); + } + + + if (wroteToTimeBuffer) { + DPRINTF(Activity, "Activity This Cycle.\n"); + cpu->activityThisCycle(); + } + + updateStatus(); +} + +#if FULL_SYSTEM +template +void +SimpleEdgeCommit::handleInterrupt() +{ + if (interrupt != NoFault) { + // Wait until the ROB is empty and all stores have drained in + // order to enter the interrupt. + if (rob->isEmpty() && !executeStage->hasStoresToWB()) { + // Squash or record that I need to squash this cycle if + // an interrupt needed to be handled. + DPRINTF(EdgeCommit, "Interrupt detected.\n"); + + // Clear the interrupt now that it's going to be handled + toExecute->commitInfo[0].clearInterrupt = true; + + assert(!thread[0]->inSyscall); + thread[0]->inSyscall = true; + + // CPU will handle interrupt. + cpu->processInterrupts(interrupt); + + thread[0]->inSyscall = false; + + commitStatus[0] = TrapPending; + + // Generate trap squash event. + generateTrapEvent(0); + + interrupt = NoFault; + } else { + DPRINTF(EdgeCommit, "Interrupt pending, waiting for ROB to empty.\n"); + } + } else if (commitStatus[0] != TrapPending && + cpu->checkInterrupts(cpu->tcBase(0)) && + !trapSquash[0] && + !tcSquash[0]) { + // Process interrupts if interrupts are enabled, not in PAL + // mode, and no other traps or external squashes are currently + // pending. + // @todo: Allow other threads to handle interrupts. + + // Get any interrupt that happened + interrupt = cpu->getInterrupts(); + + if (interrupt != NoFault) { + // Tell fetch that there is an interrupt pending. This + // will make fetch wait until it sees a non PAL-mode PC, + // at which point it stops fetching instructions. + toExecute->commitInfo[0].interruptPending = true; + } + } +} +#endif // FULL_SYSTEM + +template +void +SimpleEdgeCommit::commit() +{ + +#if FULL_SYSTEM + // Check for any interrupt, and start processing it. Or if we + // have an outstanding interrupt and are at a point when it is + // valid to take an interrupt, process it. + if (cpu->checkInterrupts(cpu->tcBase(0))) { + handleInterrupt(); + } +#endif // FULL_SYSTEM + + //////////////////////////////////// + // Check for any possible squashes, handle them first + //////////////////////////////////// + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + // Not sure which one takes priority. I think if we have + // both, that's a bad sign. + if (trapSquash[tid] == true) { + assert(!tcSquash[tid]); + squashFromTrap(tid); + } else if (tcSquash[tid] == true) { + assert(commitStatus[tid] != TrapPending); + squashFromTC(tid); + } + + // Squashed sequence number must be older than youngest valid + // instruction in the ROB. This prevents squashes from younger + // instructions overriding squashes from older instructions. + if (fromExecute->squash[tid] && + commitStatus[tid] != TrapPending && + fromExecute->squashedSeqNum[tid] <= youngestSeqNum[tid]) { + + DPRINTF(EdgeCommit, "[tid:%i]: Squashing due to PC %#x [sn:%i]\n", + tid, + fromExecute->mispredPC[tid], + fromExecute->squashedSeqNum[tid]); + + DPRINTF(EdgeCommit, "[tid:%i]: Redirecting to PC %#x\n", + tid, + fromExecute->nextPC[tid]); + + commitStatus[tid] = ROBSquashing; + + // If we want to include the squashing instruction in the squash, + // then use one older sequence number. + InstSeqNum squashed_inst_block = fromExecute->squashedSeqNum[tid]; + + if (fromExecute->includeSquashInstBlock[tid] == true) { + squashed_inst_block--; + } + + // All younger instructions will be squashed. Set the sequence + // number as the youngest instruction in the ROB. + youngestSeqNum[tid] = squashed_inst_block; + + rob->squash(squashed_inst_block, tid); + changedROBNumEntries[tid] = true; + + toExecute->commitInfo[tid].doneBlockID = squashed_inst_block; + + toExecute->commitInfo[tid].squash = true; + + // Send back the rob squashing signal so other stages know that + // the ROB is in the process of squashing. + toExecute->commitInfo[tid].robSquashing = true; + + toExecute->commitInfo[tid].branchMispredict = + fromExecute->branchMispredict[tid]; + + if (fromExecute->branchMispredict[tid] ) { + toExecute->commitInfo[tid].exitID = fromExecute->exitID; + toExecute->commitInfo[tid].exitType = fromExecute->exitType; + } + + toExecute->commitInfo[tid].branchTaken = + fromExecute->branchTaken[tid]; + + toExecute->commitInfo[tid].exitID = + fromExecute->exitID; + toExecute->commitInfo[tid].exitType = + fromExecute->exitType; + + toExecute->commitInfo[tid].nextPC = fromExecute->nextPC[tid]; + toExecute->commitInfo[tid].nextNPC = fromExecute->nextPC[tid] + + sizeof(TheISA::MachInst); + + toExecute->commitInfo[tid].mispredPC = fromExecute->mispredPC[tid]; + + //if (toExecute->commitInfo[tid].branchMispredict) { + //++branchMispredicts; + //} + } + + } + + setNextStatus(); + + if (squashCounter != numThreads) { + // If we're not currently squashing, then get instructions. + getInstBlocks(); + + // Try to commit any instructions. + commitInstBlocks(); + } + + //Check for any activity + threads = activeThreads->begin(); + + while (threads != end) { + ThreadID tid = *threads++; + + if (changedROBNumEntries[tid]) { + toExecute->commitInfo[tid].usedROB = true; + toExecute->commitInfo[tid].freeROBEntries = rob->numFreeEntries(tid); + + wroteToTimeBuffer = true; + changedROBNumEntries[tid] = false; + if (rob->isEmpty(tid)) + checkEmptyROB[tid] = true; + } + + // ROB is only considered "empty" for previous stages if: a) + // ROB is empty, b) there are no outstanding stores, c) IEW + // stage has received any information regarding stores that + // committed. + // c) is checked by making sure to not consider the ROB empty + // on the same cycle as when stores have been committed. + // @todo: Make this handle multi-cycle communication between + // commit and IEW. + if (checkEmptyROB[tid] && rob->isEmpty(tid) && + !executeStage->hasStoresToWB(tid) && !committedStores[tid]) { + checkEmptyROB[tid] = false; + toExecute->commitInfo[tid].usedROB = true; + toExecute->commitInfo[tid].emptyROB = true; + toExecute->commitInfo[tid].freeROBEntries = rob->numFreeEntries(tid); + wroteToTimeBuffer = true; + } + + } +} + +template +void +SimpleEdgeCommit::commitInstBlocks() +{ + //////////////////////////////////// + // Handle commit + // Note that commit will be handled prior to putting new + // instructions in the ROB so that the ROB only tries to commit + // instructions it has in this current cycle, and not instructions + // it is writing in during this cycle. Can't commit and squash + // things at the same time... + //////////////////////////////////// + + DPRINTF(EdgeCommit, "Trying to commit inst blocks in the ROB.\n"); + + unsigned num_committed = 0; + + BlockPtr head_inst_block; + + // Commit as many inst blocks as possible until the commit bandwidth + // limit is reached, or it becomes impossible to commit any more. + while (num_committed < commitWidth) { + int commit_thread = getCommittingThread(); + + if (commit_thread == -1 || !rob->isHeadReady(commit_thread)) + break; + + head_inst_block = rob->readHeadInstBlock(commit_thread); + + ThreadID tid = head_inst_block->getTid(); + + assert(tid == commit_thread); + + DPRINTF(EdgeCommit, "Trying to commit head inst block, [id:%i] [tid:%i]\n", + head_inst_block->getBlockID(), tid); + + // If the head instruction is squashed, it is ready to retire + // (be removed from the ROB) at any time. + if (head_inst_block->isSquashed()) { + + DPRINTF(EdgeCommit, "Retiring squashed inst block from " + "ROB.\n"); + + rob->retireHead(commit_thread); + + ++commitSquashedInsts; + + // Record that the number of ROB entries has changed. + changedROBNumEntries[tid] = true; + } else { + PC[tid] = head_inst_block->getStartPC(); + nextPC[tid] = head_inst_block->getBranchTarget(); + //nextNPC[tid] = head_inst_block->readNextNPC(); + + // Increment the total number of non-speculative instructions + // executed. + // Hack for now: it really shouldn't happen until after the + // commit is deemed to be successful, but this count is needed + // for syscalls. + thread[tid]->funcExeInst++; + + // Try to commit the head inst block. + bool commit_success = commitHead(head_inst_block, num_committed); + + if (commit_success) { + ++num_committed; + + changedROBNumEntries[tid] = true; +#if 0 + // For debug + #if 0 + + std::string sym_str; + Addr sym_addr; + if ( curTick > 900004978000ULL) { + debugSymbolTable->findNearestSymbol(head_inst_block->getStartPC(), sym_str, sym_addr); + std::cout<< "Simulator executing @" <getBlockID() << " tick: " << curTick << std::endl; + } + #endif +#endif + #if TRACING_ON + + std::string sym_str; + Addr sym_addr; + debugSymbolTable->findNearestSymbol(head_inst_block->getStartPC(), sym_str, sym_addr); + DPRINTFR(EdgeCommitResultSym, ".BLOCK %s\n", + sym_str.c_str()/*, head_inst_block->getStartPC(), head_inst_block->getBlockID()*/); + #endif + + #ifndef NDEBUG + head_inst_block->dumpOutput(); + #endif + + // Set the doneSeqNum to the youngest committed instruction. + DPRINTF(EdgeCommitResult, "Successfully commited inst block[Bid%lli], inform fetch" + "to redirect to %#x\n", head_inst_block->getBlockID(), + head_inst_block->getBranchTarget() ); + + toExecute->commitInfo[tid].doneBlockID = head_inst_block->getBlockID(); + toExecute->commitInfo[tid].blockPC = head_inst_block->getStartPC(); + toExecute->commitInfo[tid].exitID = head_inst_block->getExitID(); + toExecute->commitInfo[tid].exitType = head_inst_block->getExitType(); + toExecute->commitInfo[tid].nextPC = head_inst_block->getBranchTarget(); + toExecute->commitInfo[tid].nextNPC = + head_inst_block->getBranchTarget() + sizeof(TheISA::MachInst); + + if(head_inst_block->misPredicted()) + { + ++branchMispredicts; + toExecute->commitInfo[tid].branchMispredict = true; + }else{ + ++correctPredictions; + } + + if ( head_inst_block->isNeedSyscall() ) { + toExecute->commitInfo[tid].needSyscall = true; + } + + ++commitCommittedInstBlocks; + + commitCommittedInsts += head_inst_block->getNumAllInst(); + + commitCommittedNopInsts += head_inst_block->getNumNopInst(); + + cpu->instBlockDone(tid); + + PC[tid] = nextPC[tid]; + + int count = 0; + Addr oldpc; + // Debug statement. Checks to make sure we're not + // currently updating state while handling PC events. + assert(!thread[tid]->inSyscall && !thread[tid]->trapPending); + do { + oldpc = PC[tid]; + cpu->system->pcEventQueue.service(thread[tid]->getTC()); + count++; + } while (oldpc != PC[tid]); + if (count > 1) { + DPRINTF(EdgeCommit, + "PC skip function event, stopping commit\n"); + break; + } + } else { + DPRINTF(EdgeCommit, "Unable to commit head instruction PC:%#x " + "[tid:%i] [sn:%i].\n", + head_inst_block->getStartPC(), tid ,head_inst_block->getBlockID()); + break; + } + } + } + + DPRINTF(EdgeCommit, "%i\n", num_committed); + numCommittedDist.sample(num_committed); + + if (num_committed == commitWidth) { + commitEligibleSamples++; + } +} + +template +bool +SimpleEdgeCommit::commitHead(BlockPtr &head_inst_block, unsigned inst_block_num) +{ + assert(head_inst_block); + + ThreadID tid = head_inst_block->getTid(); + + // Check if the inst block caused a fault. If so, trap. + Fault inst_block_fault = head_inst_block->getFault(); + +#if USE_CHECKER + // Use checker prior to updating anything due to traps or PC + // based events. + if (cpu->checker) { + cpu->checker->verify(head_inst_block); + } +#endif + + // DTB will sometimes need the machine instruction for when + // faults happen. So we will set it here, prior to the DTB + // possibly needing it for its fault. + //thread[tid]->setInst( + //static_cast(head_inst_block->staticInst->machInst)); + + if (inst_block_fault != NoFault) { + DPRINTF(EdgeCommit, "Inst block[sn:%lli] PC %#x has a fault %s.\n", + head_inst_block->getBlockID(), head_inst_block->getStartPC(), + inst_block_fault->name()); + + if (executeStage->hasStoresToWB(tid) || inst_block_num > 0) { + DPRINTF(EdgeCommit, "Stores outstanding, fault must wait.\n"); + return false; + } + + head_inst_block->setCompleted(); + +#if USE_CHECKER + if (cpu->checker && head_inst_block->isStore()) { + cpu->checker->verify(head_inst_block); + } +#endif + + assert(!thread[tid]->inSyscall); + + // Mark that we're in state update mode so that the trap's + // execution doesn't generate extra squashes. + thread[tid]->inSyscall = true; + + // Execute the trap. Although it's slightly unrealistic in + // terms of timing (as it doesn't wait for the full timing of + // the trap event to complete before updating state), it's + // needed to update the state as soon as possible. This + // prevents external agents from changing any specific state + // that the trap need. + cpu->trap(inst_block_fault, tid); + + // Exit state update mode to avoid accidental updating. + thread[tid]->inSyscall = false; + + commitStatus[tid] = TrapPending; + + #if 0 + if (head_inst_block->traceData) { + if (DTRACE(ExecFaulting)) { + head_inst_block->traceData->setFetchSeq(head_inst_block->getBlockID()); + head_inst_block->traceData->setCPSeq(thread[tid]->numInst); + head_inst_block->traceData->dump(); + } + delete head_inst_block->traceData; + head_inst_block->traceData = NULL; + } + #endif + + // Generate trap squash event. + generateTrapEvent(tid); +// warn("%lli fault (%d) handled @ PC %08p", curTick, inst_fault->name(), head_inst->readPC()); + return false; + } + + updateComInstBlockStats(head_inst_block); + +#if FULL_SYSTEM + if (thread[tid]->profile) { +// bool usermode = TheISA::inUserMode(thread[tid]->getTC()); +// thread[tid]->profilePC = usermode ? 1 : head_inst->readPC(); + thread[tid]->profilePC = head_inst_block->getStartPC(); + ProfileNode *node = thread[tid]->profile->consume(thread[tid]->getTC(), + head_inst_block->staticInst); + + if (node) + thread[tid]->profileNode = node; + } + if (CPA::available()) { + if (head_inst_block->isControl()) { + ThreadContext *tc = thread[tid]->getTC(); + CPA::cpa()->swAutoBegin(tc, head_inst_block->readNextPC()); + } + } +#endif +#if 0 + if (head_inst_block->traceData) { + head_inst_block->traceData->setFetchSeq(head_inst_block->getBlockID()); + head_inst_block->traceData->setCPSeq(thread[tid]->numInst); + head_inst_block->traceData->dump(); + delete head_inst_block->traceData; + head_inst_block->traceData = NULL; + } +#endif +#if 0 + // Update the commit rename map + for (int i = 0; i < head_inst_block->numDestRegs(); i++) { + renameMap[tid]->setEntry(head_inst->flattenedDestRegIdx(i), + head_inst->renamedDestRegIdx(i)); + } +#endif + + // Finally clear the head ROB entry. + rob->retireHead(tid); + + // If this was a store, record it for this cycle. + if (head_inst_block->getNumStoreInst() != 0) + committedStores[tid] = true; + + // Return true to indicate that we have committed an instruction. + return true; +} + +template +void +SimpleEdgeCommit::getInstBlocks() +{ + DPRINTF(EdgeCommit, "Getting inst blocks from Map stage.\n"); + + // Read any maped inst blocks and place them into the ROB. + int inst_blocks_to_process = std::min((int)mapWidth, fromMap->size); + + for (int inst_block_num = 0; inst_block_num < inst_blocks_to_process; ++inst_block_num) { + BlockPtr inst_block; + + inst_block = fromMap->instBlocks[inst_block_num]; + ThreadID tid = inst_block->getTid(); + + if (!inst_block->isSquashed() && + commitStatus[tid] != ROBSquashing && + commitStatus[tid] != TrapPending) { + changedROBNumEntries[tid] = true; + + DPRINTF(EdgeCommit, "Inserting PC %#x [id:%i] [tid:%i] into ROB.\n", + inst_block->getStartPC(), inst_block->getBlockID(), tid); + + rob->insertInstBlock(inst_block); + + DPRINTF(EdgeCommit, "%i entries have been used in ROB. Max is %i\n", + rob->getThreadEntries(tid), rob->getMaxEntries(tid)); + + assert(rob->getThreadEntries(tid) <= rob->getMaxEntries(tid)); + + youngestSeqNum[tid] = inst_block->getBlockID(); + } else { + DPRINTF(EdgeCommit, "Inst block PC %#x [sn:%i] [tid:%i] was " + "squashed, skipping.\n", + inst_block->getStartPC(), inst_block->getBlockID(), tid); + } + } +} + +template +void +SimpleEdgeCommit::skidInsert() +{ + DPRINTF(EdgeCommit, "Attempting to any inst blocks from rename into " + "skidBuffer.\n"); + + for (int inst_block_num = 0; inst_block_num < fromMap->size; ++inst_block_num) { + BlockPtr inst_block = fromMap->instBlocks[inst_block_num]; + + if (!inst_block->isSquashed()) { + DPRINTF(EdgeCommit, "Inserting PC %#x [sn:%i] [tid:%i] into ", + "skidBuffer.\n", inst_block->getStartPC(), inst_block->getBlockID(), + inst_block->getTid()); + skidBuffer.push(inst_block); + } else { + DPRINTF(EdgeCommit, "Inst block PC %#x [sn:%i] [tid:%i] was " + "squashed, skipping.\n", + inst_block->getStartPC(), inst_block->getBlockID(), inst_block->getTid()); + } + } +} + +template +void +SimpleEdgeCommit::markCompletedInstBlocks() +{ + // Grab completed insts out of the Execute inst block queue, and mark + // inst blocks completed within the ROB. + for (int inst_block_num = 0; + inst_block_num < fromExecute->size && fromExecute->instBlocks[inst_block_num]; + ++inst_block_num) + { + if (!(fromExecute->instBlocks[inst_block_num])->isSquashed()) { + DPRINTF(EdgeCommit, "[tid:%i]: Marking PC %#x, [sn:%lli] ready " + "within ROB.\n", + fromExecute->instBlocks[inst_block_num]->getTid(), + fromExecute->instBlocks[inst_block_num]->getStartPC(), + fromExecute->instBlocks[inst_block_num]->getBlockID()); + + // Mark the inst block as ready to commit. + fromExecute->instBlocks[inst_block_num]->setCanCommit(); + } + } +} + +template +bool +SimpleEdgeCommit::robDoneSquashing() +{ + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + if (!rob->isDoneSquashing(tid)) + return false; + } + + return true; +} + +template +void +SimpleEdgeCommit::updateComInstBlockStats(BlockPtr &inst_block) +{ + ThreadID tid = inst_block->getTid(); + statComInstBlock[tid] ++; + +#if 0 + // + // Control Instructions + // + if (inst_block->isControl()) + statComBranches[tid]++; + + // + // Memory references + // + if (inst_block->isMemRef()) { + statComRefs[tid]++; + + if (inst_block->isLoad()) { + statComLoads[tid]++; + } + } + + if (inst_block->isMemBarrier()) { + statComMembars[tid]++; + } +#endif +} + +//////////////////////////////////////// +// // +// SMT COMMIT POLICY MAINTAINED HERE // +// // +//////////////////////////////////////// +template +ThreadID +SimpleEdgeCommit::getCommittingThread() +{ + if (numThreads > 1) { + switch (commitPolicy) { + + case Aggressive: + //If Policy is Aggressive, commit will call + //this function multiple times per + //cycle + return oldestReady(); + + case RoundRobin: + return roundRobin(); + + case OldestReady: + return oldestReady(); + + default: + return InvalidThreadID; + } + } else { + assert(!activeThreads->empty()); + ThreadID tid = activeThreads->front(); + + if (commitStatus[tid] == Running || + commitStatus[tid] == Idle || + commitStatus[tid] == FetchTrapPending) { + return tid; + } else { + return InvalidThreadID; + } + } +} + +template +ThreadID +SimpleEdgeCommit::roundRobin() +{ + list::iterator pri_iter = priority_list.begin(); + list::iterator end = priority_list.end(); + + while (pri_iter != end) { + ThreadID tid = *pri_iter; + + if (commitStatus[tid] == Running || + commitStatus[tid] == Idle || + commitStatus[tid] == FetchTrapPending) { + + if (rob->isHeadReady(tid)) { + priority_list.erase(pri_iter); + priority_list.push_back(tid); + + return tid; + } + } + + pri_iter++; + } + + return InvalidThreadID; +} + +template +ThreadID +SimpleEdgeCommit::oldestReady() +{ + unsigned oldest = 0; + bool first = true; + + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + if (!rob->isEmpty(tid) && + (commitStatus[tid] == Running || + commitStatus[tid] == Idle || + commitStatus[tid] == FetchTrapPending)) { + + if (rob->isHeadReady(tid)) { + + BlockPtr head_inst_block = rob->readHeadInstBlock(tid); + + if (first) { + oldest = tid; + first = false; + } else if (head_inst_block->getBlockID() < oldest) { + oldest = tid; + } + } + } + } + + if (!first) { + return oldest; + } else { + return InvalidThreadID; + } +} diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/cpu.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/cpu.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,741 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#ifndef __CPU_EDGE_CPU_HH__ +#define __CPU_EDGE_CPU_HH__ + +#include +#include +#include +#include +#include + +#include "arch/types.hh" +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "config/full_system.hh" +#include "config/the_isa.hh" +#include "config/use_checker.hh" +#include "cpu/activity.hh" +#include "cpu/base.hh" +#include "cpu/simple_thread.hh" +#include "cpu/edge/comm.hh" +#include "cpu/edge/cpu_policy.hh" +#include "cpu/edge/thread_state.hh" +#include "cpu/edge/insttracer.hh" +//#include "cpu/edge/thread_context.hh" +#include "sim/process.hh" +#include "cpu/edge/inst_queue.hh" +#include "params/SimpleEdgeCPU.hh" + +template +class Checker; +class ThreadContext; +template +class EdgeThreadContext; + +class Checkpoint; +class MemObject; +class Process; + +class BaseCPUParams; + +class BaseEdgeCPU : public BaseCPU +{ + //Stuff that's pretty ISA independent will go here. + public: + BaseEdgeCPU(BaseCPUParams *params); + + void regStats(); + + protected: + Trace::EdgeInstTracer * edge_tracer; + + public: + /// Provide access to the tracer pointer + Trace::EdgeInstTracer * getEdgeTracer() { return edge_tracer; } +}; + + +typedef uint64_t BlockSeqNum; + +template +class EdgeCPU : public BaseEdgeCPU +{ + public: + // Typedefs from the Impl here. + typedef typename Impl::CPUPol CPUPolicy; + typedef typename Impl::EdgeBlockPtr BlockPtr; + typedef EdgeThreadState ImplState; + typedef EdgeThreadState Thread; + typedef typename std::list::iterator ListIt; + + friend class EdgeThreadContext; + + public: + /** Five status of CPU */ + enum Status { + Running, + Idle, + Halted, + Blocked, + SwitchedOut + }; + + TheISA::TLB * itb; + TheISA::TLB * dtb; + + /** Overall CPU status. */ + Status _status; + + /** Per-thread status in CPU, used for SMT. */ + Status _threadStatus[Impl::MaxThreads]; + + private: + /** Tick-event of CPU, pushing CPU forward. */ + class TickEvent : public Event + { + private: + /** Pointer to the CPU. */ + EdgeCPU *cpu; + + public: + /** Constructs a tick event. */ + TickEvent(EdgeCPU *c); + + /** Processes a tick event, calling tick() on the CPU. */ + void process(); + /** Returns the description of the tick event. */ + const char *description() const; + }; + + /** The tick event used for scheduling CPU ticks. */ + TickEvent tickEvent; + + /** Schedule tick event, regardless of its current state. */ + void scheduleTickEvent(int delay) + { + if (tickEvent.squashed()) + reschedule(tickEvent, nextCycle(curTick + ticks(delay))); + else if (!tickEvent.scheduled()) + schedule(tickEvent, nextCycle(curTick + ticks(delay))); + } + + /** Unschedule tick event, regardless of its current state. */ + void unscheduleTickEvent() + { + if (tickEvent.scheduled()) + tickEvent.squash(); + } + + class ActivateThreadEvent : public Event + { + private: + /** Number of Thread to Activate */ + ThreadID tid; + + /** Pointer to the CPU. */ + EdgeCPU *cpu; + + public: + /** Constructs the event. */ + ActivateThreadEvent(); + + /** Initialize Event */ + void init(int thread_num, EdgeCPU *thread_cpu); + + /** Processes the event, calling activateThread() on the CPU. */ + void process(); + + /** Returns the description of the event. */ + const char *description() const; + }; + + /** Schedule thread to activate , regardless of its current state. */ + void + scheduleActivateThreadEvent(ThreadID tid, int delay) + { + // Schedule thread to activate, regardless of its current state. + if (activateThreadEvent[tid].squashed()) + reschedule(activateThreadEvent[tid], + nextCycle(curTick + ticks(delay))); + else if (!activateThreadEvent[tid].scheduled()) + schedule(activateThreadEvent[tid], + nextCycle(curTick + ticks(delay))); + } + + /** Unschedule actiavte thread event, regardless of its current state. */ + void + unscheduleActivateThreadEvent(ThreadID tid) + { + if (activateThreadEvent[tid].scheduled()) + activateThreadEvent[tid].squash(); + } + + /** The tick event used for scheduling CPU ticks. */ + ActivateThreadEvent activateThreadEvent[Impl::MaxThreads]; + + class DeallocateContextEvent : public Event + { + private: + /** Number of Thread to deactivate */ + ThreadID tid; + + /** Should the thread be removed from the CPU? */ + bool remove; + + /** Pointer to the CPU. */ + EdgeCPU *cpu; + + public: + /** Constructs the event. */ + DeallocateContextEvent(); + + /** Initialize Event */ + void init(int thread_num, EdgeCPU *thread_cpu); + + /** Processes the event, calling activateThread() on the CPU. */ + void process(); + + /** Sets whether the thread should also be removed from the CPU. */ + void setRemove(bool _remove) { remove = _remove; } + + /** Returns the description of the event. */ + const char *description() const; + }; + + /** Schedule cpu to deallocate thread context.*/ + void + scheduleDeallocateContextEvent(ThreadID tid, bool remove, int delay) + { + // Schedule thread to activate, regardless of its current state. + if (deallocateContextEvent[tid].squashed()) + reschedule(deallocateContextEvent[tid], + nextCycle(curTick + ticks(delay))); + else if (!deallocateContextEvent[tid].scheduled()) + schedule(deallocateContextEvent[tid], + nextCycle(curTick + ticks(delay))); + } + + /** Unschedule thread deallocation in CPU */ + void + unscheduleDeallocateContextEvent(ThreadID tid) + { + if (deallocateContextEvent[tid].scheduled()) + deallocateContextEvent[tid].squash(); + } + + /** The tick event used for scheduling CPU ticks. */ + DeallocateContextEvent deallocateContextEvent[Impl::MaxThreads]; + + public: + /** Constructs a CPU with the given parameters. */ + EdgeCPU(SimpleEdgeCPUParams *params); + /** Destructor. */ + ~EdgeCPU(); + + /** Registers statistics. */ + void regStats(); + + void demapPage(Addr vaddr, uint64_t asn) + { + this->itb->demapPage(vaddr, asn); + this->dtb->demapPage(vaddr, asn); + } + + void demapInstPage(Addr vaddr, uint64_t asn) + { + this->itb->demapPage(vaddr, asn); + } + + void demapDataPage(Addr vaddr, uint64_t asn) + { + this->dtb->demapPage(vaddr, asn); + } + + /** Returns a specific port. */ + Port *getPort(const std::string &if_name, int idx); + + /** Ticks CPU, calling tick() on each stage, and checking the overall + * activity to see if the CPU should deschedule itself. + */ + void tick(); + + /** Initialize the CPU */ + void init(); + + /** Returns the Number of Active Threads in the CPU */ + int numActiveThreads() + { return activeThreads.size(); } + + /** Add Thread to Active Threads List */ + void activateThread(ThreadID tid); + + /** Remove Thread from Active Threads List */ + void deactivateThread(ThreadID tid); + + /** Setup CPU to insert a thread's context */ + void insertThread(ThreadID tid); + + /** Remove all of a thread's context from CPU */ + void removeThread(ThreadID tid); + + /** Count the Total Instructions Committed in the CPU. */ + virtual Counter totalInstructions() const; + virtual Counter totalInstructionBlocks() const; + + /** Add Thread to Active Threads List. */ + void activateContext(ThreadID tid, int delay); + + /** Remove Thread from Active Threads List */ + void suspendContext(ThreadID tid); + + /** Remove Thread from Active Threads List && + * Possibly Remove Thread Context from CPU. + */ + bool deallocateContext(ThreadID tid, bool remove, int delay = 1); + + /** Remove Thread from Active Threads List && + * Remove Thread Context from CPU. + */ + void haltContext(ThreadID tid); + + /** Activate a Thread When CPU Resources are Available. */ + void activateWhenReady(ThreadID tid); + + /** Add or Remove a Thread Context in the CPU. */ + void doContextSwitch(); + + /** Update The Order In Which We Process Threads. */ + void updateThreadPriority(); + + /** Serialize state. */ + virtual void serialize(std::ostream &os); + + /** Unserialize from a checkpoint. */ + virtual void unserialize(Checkpoint *cp, const std::string §ion); + + public: +#if !FULL_SYSTEM + /** Executes a syscall. + * @todo: Determine if this needs to be virtual. + */ + void syscall(int64_t callnum, ThreadID tid); +#endif + + /** Starts draining the CPU's pipeline of all instructions in + * order to stop all memory accesses. */ + virtual unsigned int drain(Event *drain_event); + + /** Resumes execution after a drain. */ + virtual void resume(); + + /** Signals to this CPU that a stage has completed switching out. */ + void signalDrained(); + + /** Switches out this CPU. */ + virtual void switchOut(); + + /** Takes over from another CPU. */ + virtual void takeOverFrom(BaseCPU *oldCPU); + + /** Get the current instruction sequence number, and increment it. */ + InstSeqNum getAndIncrementInstSeq() + { return globalSeqNum++; } + + /** Traps to handle given fault. */ + void trap(Fault fault, ThreadID tid); + +#if FULL_SYSTEM + /** HW return from error interrupt. */ + Fault hwrei(ThreadID tid); + + bool simPalCheck(int palFunc, ThreadID tid); + + /** Returns the Fault for any valid interrupt. */ + Fault getInterrupts(); + + /** Processes any an interrupt fault. */ + void processInterrupts(Fault interrupt); + + /** Halts the CPU. */ + void halt() { panic("Halt not implemented!\n"); } + + /** Update the Virt and Phys ports of all ThreadContexts to + * reflect change in memory connections. */ + void updateMemPorts(); + + /** Check if this address is a valid instruction address. */ + bool validInstAddr(Addr addr) { return true; } + + /** Check if this address is a valid data address. */ + bool validDataAddr(Addr addr) { return true; } +#endif + + /** Register accessors. Index refers to the physical register index. */ + /** In Edge, reserve the register methods to allow register operations + * between inst blocks. + */ + + /** Reads a miscellaneous register. */ + TheISA::MiscReg readMiscRegNoEffect(int misc_reg, ThreadID tid); + + /** Reads a misc. register, including any side effects the read + * might have as defined by the architecture. + */ + TheISA::MiscReg readMiscReg(int misc_reg, ThreadID tid); + + /** Sets a miscellaneous register. */ + void setMiscRegNoEffect(int misc_reg, const TheISA::MiscReg &val, + ThreadID tid); + + /** Sets a misc. register, including any side effects the write + * might have as defined by the architecture. + */ + void setMiscReg(int misc_reg, const TheISA::MiscReg &val, + ThreadID tid); + + uint64_t readIntReg(int reg_idx); + + TheISA::FloatReg readFloatReg(int reg_idx); + + TheISA::FloatRegBits readFloatRegBits(int reg_idx); + + void setIntReg(int reg_idx, uint64_t val); + + void setFloatReg(int reg_idx, TheISA::FloatReg val); + + void setFloatRegBits(int reg_idx, TheISA::FloatRegBits val); + + uint64_t readArchIntReg(int reg_idx, ThreadID tid); + + float readArchFloatReg(int reg_idx, ThreadID tid); + + uint64_t readArchFloatRegInt(int reg_idx, ThreadID tid); + + /** Architectural register accessors. Looks up in the commit + * rename table to obtain the true physical index of the + * architected register first, then accesses that physical + * register. + */ + void setArchIntReg(int reg_idx, uint64_t val, ThreadID tid); + + void setArchFloatReg(int reg_idx, float val, ThreadID tid); + + void setArchFloatRegInt(int reg_idx, uint64_t val, ThreadID tid); + + /** Block PC is the start address of each inst block. + * In Edge architecture, fetch stage will maintain a + * inst-length PC used to fetch inst from memory while + * block PC will be maintained in commit stage indicating + * the block commit status. + * + */ + Addr getBlockPC(ThreadID tid); + + void setBlockPC(Addr val, ThreadID tid); + + /** Reads the commit PC of a specific thread. */ + Addr readPC(ThreadID tid); + + /** Sets the commit PC of a specific thread. */ + void setPC(Addr new_PC, ThreadID tid); + + /** Reads the next PC of a specific thread. */ + Addr readNextPC(ThreadID tid); + + /** Sets the next PC of a specific thread. */ + void setNextPC(Addr val, ThreadID tid); + + /** Reads the next NPC of a specific thread. */ + Addr readNextNPC(ThreadID tid); + + /** Sets the next NPC of a specific thread. */ + void setNextNPC(Addr val, ThreadID tid); + + + /** Initiates a squash of all in-flight instructions for a given + * thread. The source of the squash is an external update of + * state through the TC. + */ + void squashFromTC(ThreadID tid); + + /** Function to add instruction onto the head of the list of the + * instruction blocks. Used when new instruction blocks are fetched. + */ + ListIt addInstBlock(BlockPtr &block); + + /** Function to tell the CPU that an instruction block has completed. */ + void instBlockDone(ThreadID tid ); + + /** Add Instruction block to the CPU Remove List*/ + void addToBlockRemoveList(BlockPtr &block); + + /** Remove an instruction block from the front end of the list. There's + * no restriction on location of the instruction. + */ + void removeFrontBlock(BlockPtr &block); + + /** Remove all instruction blocks that are not currently in the ROB. */ + void removeBlocksNotInROB(ThreadID tid); + + /** Remove all instruction blocks younger than the given sequence number. */ + void removeBlocksUntil(const BlockSeqNum &seq_num, ThreadID tid); + + /** Removes the instruction blocks pointed to by the iterator. */ + inline void squashBlockIt(const ListIt &blockIt, ThreadID tid); + + /** Cleans up all instruction blocks on the remove list. */ + void cleanUpRemovedBlocks(); + + /** Get the block ID of head inst block in ROB. */ + TheISA::BlockID readHeadInstBlockID( ThreadID tid); + + /** Debug function to print all blocks on the list. */ + void dumpBlocks(); + + public: +#ifndef NDEBUG + int instcount; + int blockcount; +#endif + + /** List of all the instruction blocks in flight. */ + std::list blockList; + + /** List of all the instruction blocks that will be removed at the end of this + * cycle. + */ + std::queue blockRemoveList; + +#ifdef DEBUG + /** Debug structure to keep track of the sequence numbers still in + * flight. + */ + std::set snList; +#endif + + /** Records if instructions need to be removed this cycle due to + * being retired or squashed. + */ + bool removeBlocksThisCycle; + + protected: + + /** Fetch stage of Edge CPU. */ + typename CPUPolicy::Fetch fetch; + + /** Map stage of Edge CPU. */ + typename CPUPolicy::Map map; + + /** Execute stage of Edge CPU. */ + typename CPUPolicy::Execute execute; + + /** Commit stage of Edge CPU. */ + typename CPUPolicy::Commit commit; + + /** Global register file of Edge CPU. */ + typename CPUPolicy::GlobalRegFile globalRegFile; + + /** RoB of Edge CPU. */ + typename CPUPolicy::EdgeROB rob; + + /** Active Threads List */ + std::list activeThreads; + + /** Instruction Set Architecture related stuffs. */ + TheISA::ISA isa[Impl::MaxThreads]; + + public: + /** Enum to give each stage a specific index, so when calling + * activateStage() or deactivateStage(), they can specify which stage + * is being activated/deactivated. + */ + enum StageIdx { + FetchIdx, + MapIdx, + ExecuteIdx, + CommitIdx, + NumStages }; + + /** Typedefs from the Impl to get the structs that each of the + * time buffers should use. + */ + typedef typename CPUPolicy::TimeStruct TimeStruct; + + typedef typename CPUPolicy::Fetch2Map Fetch2Map; + + typedef typename CPUPolicy::Map2Execute Map2Execute; + + typedef typename CPUPolicy::Execute2Commit Execute2Commit; + + + /** The main time buffer to do backwards communication. */ + TimeBuffer timeBuffer; + + /** The fetch stage's instruction block queue. */ + TimeBuffer fetch2mapQueue; + + /** The map stage's instruction block queue. */ + TimeBuffer map2executeQueue; + + /** The commit stage's instruction block queue. */ + TimeBuffer execute2commitQueue; + + private: + /** The activity recorder; used to tell if the CPU has any + * activity remaining or if it can go to idle and deschedule + * itself. + */ + ActivityRecorder activityRec; + + public: + /** Records that there was time buffer activity this cycle. */ + void activityThisCycle() { activityRec.activity(); } + + /** Changes a stage's status to active within the activity recorder. */ + void activateStage(const StageIdx idx) + { activityRec.activateStage(idx); } + + /** Changes a stage's status to inactive within the activity recorder. */ + void deactivateStage(const StageIdx idx) + { activityRec.deactivateStage(idx); } + + /** Wakes the CPU, rescheduling the CPU if it's not already active. */ + void wakeCPU(); + +#if FULL_SYSTEM + virtual void wakeup(); +#endif + + /** Gets a free thread id. Use if thread ids change across system. */ + ThreadID getFreeTid(); + + public: + /** Returns a pointer to a thread context. */ + ThreadContext * + tcBase(ThreadID tid) + { + return thread[tid]->getTC(); + } + + /** The global sequence number counter. */ + BlockSeqNum globalSeqNum; + +#if USE_CHECKER + /** Pointer to the checker, which can dynamically verify + * instruction results at run time. This can be set to NULL if it + * is not being used. + */ + Checker *checker; +#endif + +#if FULL_SYSTEM + /** Pointer to the system. */ + System *system; +#endif + + /** Event to call process() on once draining has completed. */ + Event *drainEvent; + + /** Counter of how many stages have completed draining. */ + int drainCount; + + /** Pointers to all of the threads in the CPU. */ + std::vector thread; + + /** Whether or not the CPU should defer its registration. */ + bool deferRegistration; + + /** Is there a context switch pending? */ + bool contextSwitch; + + /** Threads Scheduled to Enter CPU */ + std::list cpuWaitList; + + /** The cycle that the CPU was last running, used for statistics. */ + Tick lastRunningCycle; + + /** The cycle that the CPU was last activated by a new thread*/ + Tick lastActivatedCycle; + + /** Mapping for system thread id to cpu id */ + std::map threadMap; + + /** Available thread ids in the cpu*/ + std::vector tids; + + /** CPU read function, forwards read to LSQ. */ + template + Fault read(RequestPtr &req, T &data, int load_idx) + { + this->execute.ldstQueue.read(req, data, load_idx); + return NoFault; + } + + /** CPU write function, forwards write to LSQ. */ + template + Fault write(RequestPtr &req, T &data, int store_idx) + { + this->execute.ldstQueue.write(req, data, store_idx); + return NoFault; + } + + Addr lockAddr; + + /** Temporary fix for the lock flag, works in the UP case. */ + bool lockFlag; + + /** Statistics statements. */ + + /** Stat for total number of times the CPU is descheduled. */ + Stats::Scalar timesIdled; + /** Stat for total number of cycles the CPU spends descheduled. */ + Stats::Scalar idleCycles; + /** Stat for the number of committed blocks per thread. */ + Stats::Vector committedBlocks; + /** Stat for the total number of committed blocks. */ + Stats::Scalar totalCommittedBlocks; + /** Stat for the CPI per thread. */ + Stats::Formula cpi; + /** Stat for the total CPI. */ + Stats::Formula totalCpi; + /** Stat for the IPC per thread. */ + Stats::Formula ipc; + /** Stat for the total IPC. */ + Stats::Formula totalIpc; +}; + +#endif // __CPU_EDGE_CPU_HH__ diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/cpu.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/cpu.cc Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,1452 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#include "config/full_system.hh" +#include "config/the_isa.hh" +#include "config/use_checker.hh" +#include "cpu/activity.hh" +#include "cpu/simple_thread.hh" +#include "cpu/thread_context.hh" +#include "cpu/edge/isa_specific.hh" +#include "cpu/edge/cpu.hh" +#include "cpu/edge/thread_context.hh" +#include "enums/MemoryMode.hh" +#include "sim/core.hh" +#include "sim/stat_control.hh" + +#if FULL_SYSTEM +#include "cpu/quiesce_event.hh" +#include "sim/system.hh" +#else +#include "sim/process.hh" +#endif + +#if USE_CHECKER +#include "cpu/checker/cpu.hh" +#endif + +#if THE_ISA == ALPHA_ISA +#include "arch/alpha/osfpal.hh" +#endif + +class BaseCPUParams; + +using namespace TheISA; +using namespace std; + +BaseEdgeCPU::BaseEdgeCPU(BaseCPUParams *params) + : BaseCPU(params) +{ + edge_tracer = params->edge_tracer; +} + +void +BaseEdgeCPU::regStats() +{ + BaseCPU::regStats(); +} + +template +EdgeCPU::TickEvent::TickEvent(EdgeCPU *c) + : Event(CPU_Tick_Pri), cpu(c) +{ +} + +template +void +EdgeCPU::TickEvent::process() +{ + cpu->tick(); +} + +template +const char * +EdgeCPU::TickEvent::description() const +{ + return "EdgeCPU tick"; +} + +template +EdgeCPU::ActivateThreadEvent::ActivateThreadEvent() + : Event(CPU_Switch_Pri) +{ +} + +template +void +EdgeCPU::ActivateThreadEvent::init(int thread_num, + EdgeCPU *thread_cpu) +{ + tid = thread_num; + cpu = thread_cpu; +} + +template +void +EdgeCPU::ActivateThreadEvent::process() +{ + cpu->activateThread(tid); +} + +template +const char * +EdgeCPU::ActivateThreadEvent::description() const +{ + return "EdgeCPU \"Activate Thread\""; +} + +template +EdgeCPU::DeallocateContextEvent::DeallocateContextEvent() + : Event(CPU_Tick_Pri), tid(0), remove(false), cpu(NULL) +{ +} + +template +void +EdgeCPU::DeallocateContextEvent::init(int thread_num, + EdgeCPU *thread_cpu) +{ + tid = thread_num; + cpu = thread_cpu; + remove = false; +} + +template +void +EdgeCPU::DeallocateContextEvent::process() +{ + cpu->deactivateThread(tid); + if (remove) + cpu->removeThread(tid); +} + +template +const char * +EdgeCPU::DeallocateContextEvent::description() const +{ + return "EdgeCPU \"Deallocate Context\""; +} + +template +EdgeCPU::EdgeCPU(SimpleEdgeCPUParams *params) + : BaseEdgeCPU(params), + itb(params->itb), + dtb(params->dtb), + tickEvent(this), + +#ifndef NDEBUG + instcount(0), + blockcount(0), +#endif + + removeBlocksThisCycle(false), + fetch(this, params), + map(this, params), + execute(this, params), + commit(this,params), + + globalRegFile(this, params->numPhysIntRegs, + params->numPhysFloatRegs), + + rob(this, + params->numROBEntries, params->squashWidth, + params->smtROBPolicy, params->smtROBThreshold, + params->numThreads), + + timeBuffer(params->backComSize, params->forwardComSize), + fetch2mapQueue(params->backComSize, params->forwardComSize), + map2executeQueue(params->backComSize, params->forwardComSize), + execute2commitQueue(params->backComSize, params->forwardComSize), + + activityRec(name(), NumStages, + params->backComSize + params->forwardComSize, + params->activity), + + // Init the global sequence number to be 1. + globalSeqNum(1), + +#if FULL_SYSTEM + system(params->system), +#endif // FULL_SYSTEM + + drainCount(0), + deferRegistration(params->defer_registration) +{ + if (!deferRegistration) { + _status = Running; + } else { + _status = Idle; + } + +#if USE_CHECKER + if (params->checker) { + BaseCPU *temp_checker = params->checker; + //checker = dynamic_cast *>(temp_checker); + checker = dynamic_cast*> (temp_checker); +#if FULL_SYSTEM + checker->setSystem(params->system); +#endif + } else { + checker = NULL; + } +#endif // USE_CHECKER + +#if !FULL_SYSTEM + thread.resize(numThreads); + tids.resize(numThreads); +#endif + + // The stages also need their CPU pointer setup. However this + // must be done at the upper level CPU because they have pointers + // to the upper level CPU, and not this FullEdgeCPU. + + // Set up Pointers to the activeThreads list for each stage + fetch.setActiveThreads(&activeThreads); + map.setActiveThreads(&activeThreads); + execute.setActiveThreads(&activeThreads); + commit.setActiveThreads(&activeThreads); + + // Give each of the stages the time buffer they will use. + fetch.setTimeBuffer(&timeBuffer); + map.setTimeBuffer(&timeBuffer); + execute.setTimeBuffer(&timeBuffer); + commit.setTimeBuffer(&timeBuffer); + + // Also setup each of the stages' queues. + fetch.setFetchQueue(&fetch2mapQueue); + + map.setMapQueue(&map2executeQueue); + map.setFetchQueue(&fetch2mapQueue); + + execute.setExecuteQueue(&execute2commitQueue); + execute.setMapQueue(&map2executeQueue); + + commit.setExecuteQueue(&execute2commitQueue); + commit.setFetchQueue(&fetch2mapQueue); + commit.setMapQueue(&map2executeQueue); + commit.setExecuteStage(&execute); + +#if !FULL_SYSTEM + ThreadID active_threads = params->workload.size(); + + if (active_threads > Impl::MaxThreads) { + panic("Workload Size too large. Increase the 'MaxThreads'" + "constant in your EdgeCPU impl. file (e.g. cpu/edge/impl.hh) or " + "edit your workload size."); + } +#else + ThreadID active_threads = 1; +#endif + + //Make Sure That this a Valid Architeture + assert(params->numPhysIntRegs >= numThreads * TheISA::NumIntRegs); + assert(params->numPhysFloatRegs >= numThreads * TheISA::NumFloatRegs); + + for (ThreadID tid = 0; tid < numThreads; tid++) { + activateThreadEvent[tid].init(tid, this); + deallocateContextEvent[tid].init(tid, this); + } + // Setup the ROB for whichever stages need it. + commit.setROB(&rob); + + lastRunningCycle = curTick; + + lastActivatedCycle = -1; + + contextSwitch = false; + DPRINTF(EdgeCPU, "Creating EdgeCPU object.\n"); + + // Setup any thread state. + this->thread.resize(this->numThreads); + + for (ThreadID tid = 0; tid < this->numThreads; ++tid) { +#if FULL_SYSTEM + // SMT is not supported in FS mode yet. + assert(this->numThreads == 1); + this->thread[tid] = new Thread(this, 0); +#else + if (tid < params->workload.size()) { + DPRINTF(EdgeCPU, "Workload[%i] process is %#x", + tid, this->thread[tid]); + this->thread[tid] = new typename EdgeCPU::Thread( + (typename Impl::CPU *)(this), + tid, params->workload[tid]); + } else { + //Allocate Empty thread so M5 can use later + //when scheduling threads to CPU + Process* dummy_proc = NULL; + + this->thread[tid] = new typename EdgeCPU::Thread( + (typename Impl::CPU *)(this), + tid, dummy_proc); + } +#endif // !FULL_SYSTEM + ThreadContext *tc; + // Setup the TC that will serve as the interface to the threads/CPU. + EdgeThreadContext *edge_tc = new EdgeThreadContext; + tc = edge_tc; + + // If we're using a checker, then the TC should be the + // CheckerThreadContext. +#if USE_CHECKER + if (params->checker) { + tc = new CheckerThreadContext >( + edge_tc, this->checker); + } +#endif + + edge_tc->cpu = (typename Impl::CPU *)(this); + assert(edge_tc->cpu); + edge_tc->thread = this->thread[tid]; + +#if FULL_SYSTEM + // Setup quiesce event. + this->thread[tid]->quiesceEvent = new EndQuiesceEvent(tc); +#endif + // Give the thread the TC. + this->thread[tid]->tc = tc; + // Add the TC to the CPU's list of TC's. + this->threadContexts.push_back(tc); + } + + for (ThreadID tid = 0; tid < this->numThreads; tid++) + this->thread[tid]->setFuncExeInst(0); + + lockAddr = 0; + lockFlag = false; + +} + +template +EdgeCPU::~EdgeCPU() +{ +} + +template +void +EdgeCPU::regStats() +{ + BaseEdgeCPU::regStats(); + + // Register any of the EdgeCPU's stats here. + timesIdled + .name(name() + ".timesIdled") + .desc("Number of times that the entire CPU went into an idle state and" + " unscheduled itself") + .prereq(timesIdled); + + idleCycles + .name(name() + ".idleCycles") + .desc("Total number of cycles that the CPU has spent unscheduled due " + "to idling") + .prereq(idleCycles); + + committedBlocks + .init(numThreads) + .name(name() + ".committedInstBlocks") + .desc("Number of Instruction Blocks Simulated"); + + totalCommittedBlocks + .name(name() + ".committedInstBlocks_total") + .desc("Number of Instruction Blocks Simulated"); + + cpi + .name(name() + ".cpi") + .desc("CPI: Cycles Per Instruction") + .precision(6); + cpi = numCycles / committedBlocks; + + totalCpi + .name(name() + ".cpi_total") + .desc("CPI: Total CPI of All Threads") + .precision(6); + totalCpi = numCycles / totalCommittedBlocks; + + ipc + .name(name() + ".ipc") + .desc("IPC: Instructions Per Cycle") + .precision(6); + ipc = committedBlocks / numCycles; + + totalIpc + .name(name() + ".ipc_total") + .desc("IPC: Total IPC of All Threads") + .precision(6); + totalIpc = totalCommittedBlocks / numCycles; + + this->fetch.regStats(); + this->map.regStats(); + this->execute.regStats(); + this->commit.regStats(); +} + +template +Port * +EdgeCPU::getPort(const std::string &if_name, int idx) +{ + if (if_name == "dcache_port") + return execute.getDcachePort(); + else if (if_name == "icache_port") + return fetch.getIcachePort(); + else + panic("No Such Port\n"); +} + +template +void +EdgeCPU::tick() +{ + DPRINTF(EdgeCPU, "\n\nEdgeCPU: Ticking main, EdgeCPU.\n"); + + ++numCycles; + +// activity = false; + + //Tick each of the stages + fetch.tick(); + map.tick(); + execute.tick(); + commit.tick(); + +#if !FULL_SYSTEM + doContextSwitch(); +#endif + + // Now advance the time buffers + timeBuffer.advance(); + + fetch2mapQueue.advance(); + map2executeQueue.advance(); + execute2commitQueue.advance(); + + activityRec.advance(); + + if (removeBlocksThisCycle) { + cleanUpRemovedBlocks(); + } + + if (!tickEvent.scheduled()) { + if (_status == SwitchedOut || + getState() == SimObject::Drained) { + DPRINTF(EdgeCPU, "Switched out!\n"); + // increment stat + lastRunningCycle = curTick; + } else if (!activityRec.active() || _status == Idle) { + DPRINTF(EdgeCPU, "Idle!\n"); + lastRunningCycle = curTick; + timesIdled++; + } else { + schedule(tickEvent, nextCycle(curTick + ticks(1))); + DPRINTF(EdgeCPU, "Scheduling next tick!\n"); + } + } + +#if !FULL_SYSTEM + updateThreadPriority(); +#endif +} + +template +void +EdgeCPU::init() +{ + BaseCPU::init(); + + // Set inSyscall so that the CPU doesn't squash when initially + // setting up registers. + for (ThreadID tid = 0; tid < numThreads; ++tid) + thread[tid]->inSyscall = true; + +#if FULL_SYSTEM + for (ThreadID tid = 0; tid < numThreads; tid++) { + ThreadContext *src_tc = threadContexts[tid]; + TheISA::initCPU(src_tc, src_tc->contextId()); + } +#endif + + // Clear inSyscall. + for (int tid = 0; tid < numThreads; ++tid) + thread[tid]->inSyscall = false; + + // Initialize stages. + fetch.initStage(); + //map.initStage(); + execute.initStage(); + commit.initStage(); + + commit.setThreads(thread); + +} + +template +void +EdgeCPU::activateThread(ThreadID tid) +{ + list::iterator isActive = + std::find(activeThreads.begin(), activeThreads.end(), tid); + + DPRINTF(EdgeCPU, "[tid:%i]: Calling activate thread.\n", tid); + + if (isActive == activeThreads.end()) { + DPRINTF(EdgeCPU, "[tid:%i]: Adding to active threads list\n", + tid); + + activeThreads.push_back(tid); + } +} + +template +void +EdgeCPU::deactivateThread(ThreadID tid) +{ + //Remove From Active List, if Active + list::iterator thread_it = + std::find(activeThreads.begin(), activeThreads.end(), tid); + + DPRINTF(EdgeCPU, "[tid:%i]: Calling deactivate thread.\n", tid); + + if (thread_it != activeThreads.end()) { + DPRINTF(EdgeCPU,"[tid:%i]: Removing from active threads list\n", + tid); + activeThreads.erase(thread_it); + } +} + +template +Counter +EdgeCPU::totalInstructionBlocks() const +{ + Counter total(0); + + ThreadID size = thread.size(); + for (ThreadID i = 0; i < size; i++) + total += thread[i]->numBlock; + + return total; +} + +template +Counter +EdgeCPU::totalInstructions() const +{ + Counter total(0); + + ThreadID size = thread.size(); + for (ThreadID i = 0; i < size; i++) + total += thread[i]->numInst; + + return total; +} + +template +void +EdgeCPU::activateContext(ThreadID tid, int delay) +{ + // Needs to set each stage to running as well. + if (delay){ + DPRINTF(EdgeCPU, "[tid:%i]: Scheduling thread context to activate " + "on cycle %d\n", tid, curTick + ticks(delay)); + scheduleActivateThreadEvent(tid, delay); + } else { + activateThread(tid); + } + + if (lastActivatedCycle < curTick) { + scheduleTickEvent(delay); + + // Be sure to signal that there's some activity so the CPU doesn't + // deschedule itself. + activityRec.activity(); + fetch.wakeFromQuiesce(); + + lastActivatedCycle = curTick; + + _status = Running; + } +} + +template +bool +EdgeCPU::deallocateContext(ThreadID tid, bool remove, int delay) +{ + // Schedule removal of thread data from CPU + if (delay){ + DPRINTF(EdgeCPU, "[tid:%i]: Scheduling thread context to deallocate " + "on cycle %d\n", tid, curTick + ticks(delay)); + scheduleDeallocateContextEvent(tid, remove, delay); + return false; + } else { + deactivateThread(tid); + if (remove) + removeThread(tid); + return true; + } +} + +template +void +EdgeCPU::suspendContext(ThreadID tid) +{ + DPRINTF(EdgeCPU,"[tid: %i]: Suspending Thread Context.\n", tid); + bool deallocated = deallocateContext(tid, false, 1); + // If this was the last thread then unschedule the tick event. + if ((activeThreads.size() == 1 && !deallocated) || + activeThreads.size() == 0) + unscheduleTickEvent(); + _status = Idle; +} + +template +void +EdgeCPU::haltContext(ThreadID tid) +{ + //For now, this is the same as deallocate + DPRINTF(EdgeCPU,"[tid:%i]: Halt Context called. Deallocating", tid); + deallocateContext(tid, true, 1); +} + +template +void +EdgeCPU::insertThread(ThreadID tid) +{ + DPRINTF(EdgeCPU,"[tid:%i] Initializing thread into CPU"); + panic("insertThread method Unimplemented yet!\n"); +} + +template +void +EdgeCPU::removeThread(ThreadID tid) +{ + DPRINTF(EdgeCPU,"[tid:%i] Removing thread context from CPU.\n", tid); + panic("removeThread method unimplemented yet!\n"); +} + + +template +void +EdgeCPU::activateWhenReady(ThreadID tid) +{ + DPRINTF(EdgeCPU,"[tid:%i]: Checking if resources are available for incoming" + "(e.g. PhysRegs/ROB/IQ/LSQ) \n", + tid); + + panic("activeWhenReady method unimplemented yet!\n"); +} + +#if FULL_SYSTEM +template +Fault +EdgeCPU::hwrei(ThreadID tid) +{ + return NoFault; +} + +template +Fault +EdgeCPU::getInterrupts() +{ + // Check if there are any outstanding interrupts + return this->interrupts->getInterrupt(this->threadContexts[0]); +} + +template +void +EdgeCPU::processInterrupts(Fault interrupt) +{ + // Check for interrupts here. For now can copy the code that + // exists within isa_fullsys_traits.hh. Also assume that thread 0 + // is the one that handles the interrupts. + // @todo: Possibly consolidate the interrupt checking code. + // @todo: Allow other threads to handle interrupts. + + assert(interrupt != NoFault); + this->interrupts->updateIntrInfo(this->threadContexts[0]); + + DPRINTF(EdgeCPU, "Interrupt %s being handled\n", interrupt->name()); + this->trap(interrupt, 0); +} + +template +void +EdgeCPU::updateMemPorts() +{ + // Update all ThreadContext's memory ports (Functional/Virtual + // Ports) + ThreadID size = thread.size(); + for (ThreadID i = 0; i < size; ++i) + thread[i]->connectMemPorts(thread[i]->getTC()); +} +#endif + +template +void +EdgeCPU::trap(Fault fault, ThreadID tid) +{ + // Pass the thread's TC into the invoke method. + fault->invoke(this->threadContexts[tid]); +} + +#if !FULL_SYSTEM + +template +void +EdgeCPU::syscall(int64_t callnum, ThreadID tid) +{ + DPRINTF(EdgeCPU, "[tid:%i] Executing syscall().\n\n", tid); + + DPRINTF(Activity,"Activity: syscall() called.\n"); + + // Temporarily increase this by one to account for the syscall + // instruction. + ++(this->thread[tid]->funcExeInst); + + // Execute the actual syscall. + this->thread[tid]->syscall(callnum); + + // Decrease funcExeInst by one as the normal commit will handle + // incrementing it. + --(this->thread[tid]->funcExeInst); +} + +#endif + +template +void +EdgeCPU::serialize(std::ostream &os) +{ + SimObject::State so_state = SimObject::getState(); + SERIALIZE_ENUM(so_state); + BaseCPU::serialize(os); + nameOut(os, csprintf("%s.tickEvent", name())); + tickEvent.serialize(os); + + // Use SimpleThread's ability to checkpoint to make it easier to + // write out the registers. Also make this static so it doesn't + // get instantiated multiple times (causes a panic in statistics). + static SimpleThread temp; + + ThreadID size = thread.size(); + for (ThreadID i = 0; i < size; i++) { + nameOut(os, csprintf("%s.xc.%i", name(), i)); + temp.copyTC(thread[i]->getTC()); + temp.serialize(os); + } +} + +template +void +EdgeCPU::unserialize(Checkpoint *cp, const std::string §ion) +{ + SimObject::State so_state; + UNSERIALIZE_ENUM(so_state); + BaseCPU::unserialize(cp, section); + tickEvent.unserialize(cp, csprintf("%s.tickEvent", section)); + + // Use SimpleThread's ability to checkpoint to make it easier to + // read in the registers. Also make this static so it doesn't + // get instantiated multiple times (causes a panic in statistics). + static SimpleThread temp; + + ThreadID size = thread.size(); + for (ThreadID i = 0; i < size; i++) { + temp.copyTC(thread[i]->getTC()); + temp.unserialize(cp, csprintf("%s.xc.%i", section, i)); + thread[i]->getTC()->copyArchRegs(temp.getTC()); + } +} + +template +unsigned int +EdgeCPU::drain(Event *drain_event) +{ + DPRINTF(EdgeCPU, "Switching out\n"); + + // If the CPU isn't doing anything, then return immediately. + if (_status == Idle || _status == SwitchedOut) { + return 0; + } + + drainCount = 0; + fetch.drain(); + + // Wake the CPU and record activity so everything can drain out if + // the CPU was not able to immediately drain. + if (getState() != SimObject::Drained) { + // A bit of a hack...set the drainEvent after all the drain() + // calls have been made, that way if all of the stages drain + // immediately, the signalDrained() function knows not to call + // process on the drain event. + drainEvent = drain_event; + + wakeCPU(); + activityRec.activity(); + + return 1; + } else { + return 0; + } +} + +template +void +EdgeCPU::resume() +{ + fetch.resume(); + map.resume(); + execute.resume(); + commit.resume(); + + changeState(SimObject::Running); + + if (_status == SwitchedOut || _status == Idle) + return; + +#if FULL_SYSTEM + assert(system->getMemoryMode() == Enums::timing); +#endif + + if (!tickEvent.scheduled()) + schedule(tickEvent, nextCycle()); + _status = Running; +} + +template +void +EdgeCPU::signalDrained() +{ + if (++drainCount == NumStages) { + if (tickEvent.scheduled()) + tickEvent.squash(); + + changeState(SimObject::Drained); + + BaseCPU::switchOut(); + + if (drainEvent) { + drainEvent->process(); + drainEvent = NULL; + } + } + assert(drainCount <= 5); +} + +template +void +EdgeCPU::switchOut() +{ + fetch.switchOut(); + map.switchOut(); + execute.switchOut(); + commit.switchOut(); + + blockList.clear(); + while (!blockRemoveList.empty()) { + blockRemoveList.pop(); + } + + _status = SwitchedOut; +#if USE_CHECKER + if (checker) + checker->switchOut(); +#endif + if (tickEvent.scheduled()) + tickEvent.squash(); +} + +template +void +EdgeCPU::takeOverFrom(BaseCPU *oldCPU) +{ + // Flush out any old data from the time buffers. + for (int i = 0; i < timeBuffer.getSize(); ++i) { + timeBuffer.advance(); + + fetch2mapQueue.advance(); + map2executeQueue.advance(); + execute2commitQueue.advance(); + + } + + activityRec.reset(); + + BaseCPU::takeOverFrom(oldCPU, fetch.getIcachePort(), NULL); // Fix me! + + fetch.takeOverFrom(); + map.takeOverFrom(); + execute.takeOverFrom(); + commit.takeOverFrom(); + + assert(!tickEvent.scheduled()); + + // @todo: Figure out how to properly select the tid to put onto + // the active threads list. + ThreadID tid = 0; + + list::iterator isActive = + std::find(activeThreads.begin(), activeThreads.end(), tid); + + if (isActive == activeThreads.end()) { + //May Need to Re-code this if the delay variable is the delay + //needed for thread to activate + DPRINTF(EdgeCPU, "Adding Thread %i to active threads list\n", + tid); + + activeThreads.push_back(tid); + } + + // Set all statuses to active, schedule the CPU's tick event. + // @todo: Fix up statuses so this is handled properly + ThreadID size = threadContexts.size(); + for (ThreadID i = 0; i < size; ++i) { + ThreadContext *tc = threadContexts[i]; + if (tc->status() == ThreadContext::Active && _status != Running) { + _status = Running; + schedule(tickEvent, nextCycle()); + } + } + if (!tickEvent.scheduled()) + schedule(tickEvent, nextCycle()); +} + +template +TheISA::MiscReg +EdgeCPU::readMiscRegNoEffect(int misc_reg, ThreadID tid) +{ + return this->isa[tid].readMiscRegNoEffect(misc_reg); +} + +template +TheISA::MiscReg +EdgeCPU::readMiscReg(int misc_reg, ThreadID tid) +{ + return this->isa[tid].readMiscReg(misc_reg, tcBase(tid)); +} + +template +void +EdgeCPU::setMiscRegNoEffect(int misc_reg, + const TheISA::MiscReg &val, ThreadID tid) +{ + this->isa[tid].setMiscRegNoEffect(misc_reg, val); +} + +template +void +EdgeCPU::setMiscReg(int misc_reg, + const TheISA::MiscReg &val, ThreadID tid) +{ + this->isa[tid].setMiscReg(misc_reg, val, tcBase(tid)); +} + +template +uint64_t +EdgeCPU::readIntReg(int reg_idx) +{ + return globalRegFile.readIntReg(reg_idx); +} + +template +FloatReg +EdgeCPU::readFloatReg(int reg_idx) +{ + return globalRegFile.readFloatReg(reg_idx); +} + +template +FloatRegBits +EdgeCPU::readFloatRegBits(int reg_idx) +{ + return globalRegFile.readFloatRegBits(reg_idx); +} + +template +void +EdgeCPU::setIntReg(int reg_idx, uint64_t val) +{ + globalRegFile.setIntReg(reg_idx, val); +} + +template +void +EdgeCPU::setFloatReg(int reg_idx, FloatReg val) +{ + globalRegFile.setFloatReg(reg_idx, val); +} + +template +void +EdgeCPU::setFloatRegBits(int reg_idx, FloatRegBits val) +{ + globalRegFile.setFloatRegBits(reg_idx, val); +} + +template +uint64_t +EdgeCPU::readArchIntReg(int reg_idx, ThreadID tid) +{ + return globalRegFile.readIntReg(reg_idx); +} + +template +float +EdgeCPU::readArchFloatReg(int reg_idx, ThreadID tid) +{ + int idx = reg_idx + TheISA::NumIntRegs; + + return globalRegFile.readFloatReg(idx); +} + +template +uint64_t +EdgeCPU::readArchFloatRegInt(int reg_idx, ThreadID tid) +{ + int idx = reg_idx + TheISA::NumIntRegs; + + return globalRegFile.readFloatRegBits(idx); +} + +template +void +EdgeCPU::setArchIntReg(int reg_idx, uint64_t val, ThreadID tid) +{ + PhysRegIndex phys_reg = reg_idx; + + globalRegFile.setIntReg(phys_reg, val); +} + +template +void +EdgeCPU::setArchFloatReg(int reg_idx, float val, ThreadID tid) +{ + int idx = reg_idx + TheISA::NumIntRegs; + + globalRegFile.setFloatReg(idx, val); +} + +template +void +EdgeCPU::setArchFloatRegInt(int reg_idx, uint64_t val, ThreadID tid) +{ + int idx = reg_idx + TheISA::NumIntRegs; + + globalRegFile.setFloatRegBits(idx, val); +} + +template +uint64_t +EdgeCPU::getBlockPC(ThreadID tid) +{ + return commit.readPC(tid); +} + +template +void +EdgeCPU::setBlockPC(Addr val, ThreadID tid) +{ + commit.setPC(val, tid); +} + +template +uint64_t +EdgeCPU::readPC(ThreadID tid) +{ + return commit.readPC(tid); +} + +template +void +EdgeCPU::setPC(Addr new_PC, ThreadID tid) +{ + commit.setPC(new_PC, tid); +} + +template +uint64_t +EdgeCPU::readNextPC(ThreadID tid) +{ + return commit.readNextPC(tid); +} + +template +void +EdgeCPU::setNextPC(uint64_t val, ThreadID tid) +{ + commit.setNextPC(val, tid); +} + +// @todo: Next NPC should not be handled through fetch +// stage. +template +uint64_t +EdgeCPU::readNextNPC(ThreadID tid) +{ + return fetch.readNextNPC(tid); +} + +template +void +EdgeCPU::setNextNPC(uint64_t val, ThreadID tid) +{ + fetch.setNextNPC(val, tid); +} + +template +void +EdgeCPU::squashFromTC(ThreadID tid) +{ + this->thread[tid]->inSyscall = true; + this->commit.generateTCEvent(tid); +} + +template +typename EdgeCPU::ListIt +EdgeCPU::addInstBlock(BlockPtr &block) +{ + blockList.push_back(block); + + DPRINTF(EdgeCPU, "Adding inst block[Bid:%lli] into block list.\n", + block->getBlockID()); + + return --(blockList.end()); +} + +template +void +EdgeCPU::instBlockDone(ThreadID tid ) +{ + + thread[tid]->numBlock++; + //thread[tid]->numInsts++; + thread[tid]->numBlocks++; + + committedBlocks[tid]++; + + totalCommittedBlocks++; + + // Check for inst-block-count-based events. + comInstEventQueue[tid]->serviceEvents(thread[tid]->numBlock); +} + +template +void +EdgeCPU::addToBlockRemoveList(BlockPtr &block) +{ + removeBlocksThisCycle = true; + + blockRemoveList.push(block->getBlockListIt()); +} + +template +void +EdgeCPU::removeFrontBlock(BlockPtr &block) +{ + DPRINTF(EdgeCPU, "Removing committed block [tid:%i] PC %#x " + "[id:%lli]\n", + block->getTid(), block->getStartPC(), block->getBlockID()); + + removeBlocksThisCycle = true; + + // Remove the front instruction. + blockRemoveList.push(block->getBlockListIt()); +} + + +template +void +EdgeCPU::removeBlocksNotInROB(ThreadID tid) +{ + DPRINTF(EdgeCPU, "Thread %i: Deleting inst blocks from inst block" + " list.\n", tid); + + ListIt end_it; + + bool rob_empty = false; + + if (blockList.empty()) { + return; + } else if (rob.isEmpty(/*tid*/)) { + DPRINTF(EdgeCPU, "ROB is empty, squashing all inst blocks.\n"); + end_it = blockList.begin(); + rob_empty = true; + } else { + end_it = (rob.readTailInstBlock(tid))->getBlockListIt(); + DPRINTF(EdgeCPU, "ROB is not empty, squashing inst blocks not in ROB.\n"); + } + + removeBlocksThisCycle = true; + + ListIt inst_block_it = blockList.end(); + + inst_block_it--; + + // Walk through the instruction list, removing any instructions + // that were inserted after the given instruction iterator, end_it. + while (inst_block_it != end_it) { + assert(!blockList.empty()); + + squashBlockIt(inst_block_it, tid); + + inst_block_it--; + } + + // If the ROB was empty, then we actually need to remove the first + // instruction as well. + if (rob_empty) { + squashBlockIt(inst_block_it, tid); + } +} + +template +void +EdgeCPU::removeBlocksUntil(const BlockSeqNum &seq_num, ThreadID tid) +{ + assert(!blockList.empty()); + + removeBlocksThisCycle = true; + + ListIt block_iter = blockList.end(); + + block_iter--; + + DPRINTF(EdgeCPU, "Deleting blocks from block " + "list that are from [tid:%i] and above [id:%lli] (end=%lli).\n", + tid, seq_num, (*block_iter)->getBlockID()); + + while ((*block_iter)->getBlockID() > seq_num) { + + bool break_loop = (block_iter == blockList.begin()); + + squashBlockIt(block_iter, tid); + + block_iter--; + + if (break_loop) + break; + } +} + +template +inline void +EdgeCPU::squashBlockIt(const ListIt &blockIt, ThreadID tid) +{ + if ((*blockIt)->getTid() == tid) { + DPRINTF(EdgeCPU, "Squashing block, " + "[tid:%i] [id:%lli] PC %#x\n", + (*blockIt)->getTid(), + (*blockIt)->getBlockID(), + (*blockIt)->getStartPC()); + + // Mark it as squashed. + (*blockIt)->setSquashed(); + + // @todo: Formulate a consistent method for deleting + // inst blocks from the inst block list + // Remove the instruction from the list. + blockRemoveList.push(blockIt); + } +} + +template +void +EdgeCPU::cleanUpRemovedBlocks() +{ + while (!blockRemoveList.empty()) { + DPRINTF(EdgeCPU, "Removing block, " + "[tid:%i] [Bid:%lli] start PC %#x, current ref count is %i.\n", + (*blockRemoveList.front())->getTid(), + (*blockRemoveList.front())->getBlockID(), + (*blockRemoveList.front())->getStartPC(), + (*blockRemoveList.front())->getCount()); + + ListIt it = blockRemoveList.front(); + + (*it)->removeAllInsts(); + + DPRINTF(EdgeCPU, "Current ref count is %i.\n", + (*blockRemoveList.front())->getCount()); + + blockList.erase(it); + + blockRemoveList.pop(); + } + + removeBlocksThisCycle = false; +} + +template +TheISA::BlockID +EdgeCPU::readHeadInstBlockID( ThreadID tid) +{ + return rob.readHeadInstBlockID(tid); +} +/* +template +void +EdgeCPU::removeAllInsts() +{ + instList.clear(); +} +*/ +template +void +EdgeCPU::dumpBlocks() +{ + int num = 0; + + ListIt block_list_it = blockList.begin(); + + cprintf("Dumping block List\n"); + + while (block_list_it != blockList.end()) { + cprintf("Block[id:%lli] start at 0x%lli\nBlock has %i insts\n", + num, (*block_list_it)->getBlockID(), + (*block_list_it)->getStartPC(), + (*block_list_it)->getNumInst()); +#ifndef NDEBUG + (*block_list_it)->dumpOutput(); +#endif + block_list_it++; + ++num; + } +} +/* +template +void +EdgeCPU::wakeDependents(DynInstPtr &inst) +{ + iew.wakeDependents(inst); +} +*/ +template +void +EdgeCPU::wakeCPU() +{ + if (activityRec.active() || tickEvent.scheduled()) { + DPRINTF(Activity, "CPU already running.\n"); + return; + } + + DPRINTF(Activity, "Waking up CPU\n"); + + idleCycles += tickToCycles((curTick - 1) - lastRunningCycle); + numCycles += tickToCycles((curTick - 1) - lastRunningCycle); + + schedule(tickEvent, nextCycle()); +} + +#if FULL_SYSTEM +template +void +EdgeCPU::wakeup() +{ + if (this->thread[0]->status() != ThreadContext::Suspended) + return; + + this->wakeCPU(); + + DPRINTF(Quiesce, "Suspended Processor woken\n"); + this->threadContexts[0]->activate(); +} +#endif + +template +ThreadID +EdgeCPU::getFreeTid() +{ + for (ThreadID tid = 0; tid < numThreads; tid++) { + if (!tids[tid]) { + tids[tid] = true; + return tid; + } + } + + return InvalidThreadID; +} + +template +void +EdgeCPU::doContextSwitch() +{ + if (contextSwitch) { + + //ADD CODE TO DEACTIVE THREAD HERE (???) + + ThreadID size = cpuWaitList.size(); + for (ThreadID tid = 0; tid < size; tid++) { + activateWhenReady(tid); + } + + if (cpuWaitList.size() == 0) + contextSwitch = true; + } +} + +template +void +EdgeCPU::updateThreadPriority() +{ + if (activeThreads.size() > 1) { + //DEFAULT TO ROUND ROBIN SCHEME + //e.g. Move highest priority to end of thread list + list::iterator list_begin = activeThreads.begin(); + list::iterator list_end = activeThreads.end(); + + unsigned high_thread = *list_begin; + + activeThreads.erase(list_begin); + + activeThreads.push_back(high_thread); + } +} + +/* +template +void +EdgeCPU::updateOldestBlockPointer(){ + if((++oldestBlockPointer) == Impl::FrameNum) + oldestBlockPointer = 0; +} + +template +void +EdgeCPU::updateYoungestBlockPointer(){ + if((++youngestBlockPointer) == Impl::FrameNum) + youngestBlockPointer = 0; +} +*/ + +// Forward declaration of EdgeCPU. +template class EdgeCPU; + diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/cpu_builder.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/cpu_builder.cc Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,86 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#include + +#include "config/full_system.hh" +#include "config/use_checker.hh" +#include "cpu/edge/cpu.hh" +#include "cpu/edge/impl.hh" +#include "params/SimpleEdgeCPU.hh" + +class SimpleEdgeCPU : public EdgeCPU +{ + public: + SimpleEdgeCPU(SimpleEdgeCPUParams *p) + : EdgeCPU(p) + { } +}; + +SimpleEdgeCPU * +SimpleEdgeCPUParams::create() +{ +#if FULL_SYSTEM + // Full-system only supports a single thread for the moment. + ThreadID actual_num_threads = 1; +#else + if (workload.size() > numThreads) { + fatal("Workload Size (%i) > Max Supported Threads (%i) on This CPU", + workload.size(), numThreads); + } else if (workload.size() == 0) { + fatal("Must specify at least one workload!"); + } + + // In non-full-system mode, we infer the number of threads from + // the workload if it's not explicitly specified. + ThreadID actual_num_threads = + (numThreads >= workload.size()) ? numThreads : workload.size(); +#endif + + numThreads = actual_num_threads; + + // Default smtFetchPolicy to "RoundRobin", if necessary. + std::string round_robin_policy = "RoundRobin"; + std::string single_thread = "SingleThread"; + + if (actual_num_threads > 1 && single_thread.compare(smtFetchPolicy) == 0) + smtFetchPolicy = round_robin_policy; + else + smtFetchPolicy = smtFetchPolicy; + + instShiftAmt = 2; + + return new SimpleEdgeCPU(this); +} diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/cpu_policy.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/cpu_policy.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#ifndef __CPU_EDGE_CPU_POLICY_HH__ +#define __CPU_EDGE_CPU_POLICY_HH__ + +#include "cpu/edge/global_regfile.hh" +#include "cpu/edge/fetch.hh" +#include "cpu/edge/map.hh" +#include "cpu/edge/execute.hh" +#include "cpu/edge/commit.hh" +#include "cpu/edge/inst_queue.hh" +#include "cpu/edge/lsq_unit.hh" +#include "cpu/edge/lsq.hh" +#include "cpu/edge/rob.hh" +#include "cpu/edge/mem_dep_unit.hh" +#include "cpu/edge/store_set.hh" +#include "cpu/edge/bpred_unit.hh" +#include "cpu/edge/comm.hh" + +template +struct SimpleEdgeCPUPolicy +{ + typedef SimpleEdgeFetch Fetch; + typedef SimpleEdgeMap Map; + typedef SimpleEdgeExecute Execute; + typedef SimpleEdgeCommit Commit; + + typedef EdgePhysRegFile GlobalRegFile; + typedef MemDepUnit MemDepUnit; + typedef InstructionQueue IQ; + typedef LSQ LSQ; + typedef LSQUnit LSQUnit; + typedef EdgeROB EdgeROB; + typedef BPredUnit BPredUnit; + + typedef CommFetch2Map Fetch2Map; + typedef CommMap2Execute Map2Execute; + typedef CommExecute2Commit Execute2Commit; + typedef CommIssue2Execute Issue2Execute; + + typedef TimeBufStruct TimeStruct; +}; + +#endif //__CPU_EDGE_CPU_POLICY_HH__ diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/dep_graph.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/dep_graph.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,484 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#ifndef __CPU_EDGE_DEP_GRAPH_HH__ +#define __CPU_EDGE_DEP_GRAPH_HH__ + +#include "cpu/edge/comm.hh" + +class EdgeCPUImpl; +template class InstructionQueue; +typedef InstructionQueue IQ; + +/** Node in a linked list. Each node contains a live producers + * which is followed by all of its live consumers using a list + * data structure. + */ +template +class ProducerEntry +{ + public: + ProducerEntry() + : producer(NULL), canForward(false), blockID(0) + { } + + ~ProducerEntry() + { + producer = NULL; + consumers.clear(); + } + + /** Producer of this entry. */ + DynInstPtr producer; + + /** Flag indicating whether this producer can forward or not. */ + bool canForward; + + /** Block id of this entry. */ + TheISA::BlockID blockID; + + /** All the consumers dependent on the producer. */ + std::list consumers; +}; + +/** Array of linked list that maintains the dependencies between + * producing instructions and consuming instructions. Each linked + * list represents a single physical register, having the future + * producer of the register's value, and all consumers waiting on that + * value on the list. The head node of each linked list represents + * the producing instruction of that register. Instructions are put + * on the list upon reaching the IQ, and are removed from the list + * either when the producer completes, or the instruction is squashed. +*/ +template +class DependencyGraph +{ + public: + typedef ProducerEntry DepEntry; + typedef typename std::list::iterator ListIt; + + /** Default construction. Must call resize() prior to use. */ + DependencyGraph() + : instQueuePtr(NULL), numEntries(0), memAllocCounter(0), + nodesTraversed(0), nodesRemoved(0) + { } + + ~DependencyGraph(); + + /** Resize the dependency graph to have num_entries registers. */ + void resize(int num_entries); + + /** Clears all of the linked lists. */ + void reset(); + + /** Set inst queue pointer of this graph. */ + void setInstQueuePtr(IQ * ptr ) { + instQueuePtr = ptr; + } + + /** Inserts a producer to the graph. */ + void insertProducer(PhysRegIndex idx, DynInstPtr &new_inst); + + /** Inserts a consumer to the graph. Return TRUE means the inst can be issued immediatly + * while FALSE means the inst has been added into the graph, and should wait for dependency. + */ + bool insertConsumer(PhysRegIndex idx, DynInstPtr &new_inst); + + /** Clears the producing instruction. */ + void clearInst(PhysRegIndex idx); + + /** Remove instructions younger than remove_id. */ + void remove(PhysRegIndex idx, TheISA::BlockID remove_id); + + /** Complete an entry in reg dep graph. Forwarding values from completed write + * to already added reads and mark this entry as able to farward. + */ + void complete(PhysRegIndex idx, DynInstPtr complete_inst); + + /** Commit an entry and remove it.*/ + void commit(PhysRegIndex idx, DynInstPtr commit_inst ); + + /** Checks if there are any dependents on a specific register. */ + bool empty(PhysRegIndex idx) { return !dependGraph[idx].empty(); } + + /** Debugging function to dump out the dependency graph. + */ + void dump(); + + private: + /** Array of linked lists. Each linked list is a list of all the + * instructions that depend upon a given register. The actual + * register's index is used to index into the graph; ie all + * instructions in flight that are dependent upon r34 will be + * in the linked list of dependGraph[34]. + */ + std::vector < std::list < DepEntry* > > dependGraph; + + IQ *instQueuePtr; + + /** Forwarding from producer to consumer. */ + void forward(DynInstPtr &producer, DynInstPtr& consumer); + + /** Number of linked lists; identical to the number of registers. */ + int numEntries; + + // Debug variable, remove when done testing. + unsigned memAllocCounter; + + public: + // Debug variable, remove when done testing. + uint64_t nodesTraversed; + // Debug variable, remove when done testing. + uint64_t nodesRemoved; +}; + +template +DependencyGraph::~DependencyGraph() +{ + dependGraph.clear(); +} + +template +void +DependencyGraph::resize(int num_entries) +{ + numEntries = num_entries; + dependGraph.resize(numEntries); +} + +template +void +DependencyGraph::reset() +{ + DepEntry * ptr = NULL; + // Clear the dependency graph + + for (int i = 0; i < numEntries; ++i) { + + while (!dependGraph[i].empty()) { + memAllocCounter--; + + ptr = dependGraph[i].front(); + ptr->producer = NULL; + (ptr->consumers).clear(); + + delete ptr; + dependGraph[i].pop_front(); + } + } +} + +template +void +DependencyGraph::insertProducer(PhysRegIndex idx, + DynInstPtr &new_inst) +{ + // Only reg-write inst can be producer. + assert(new_inst->isGRegWR() && new_inst->isGRegWriteValid()); + assert(instQueuePtr); + + // Create an entry, add it to tail of the list. + DepEntry *new_entry = new DepEntry; + + new_entry->producer = new_inst; + new_entry->blockID = new_inst->getBlockID(); + + DPRINTF(EdgeRegDep, "Creating a depend entry, adding it into producer list.\n" ); + + if (!dependGraph[idx].empty()) { + // The younger block should be in the tail of the list. + assert((dependGraph[idx].back())->blockID < new_inst->getBlockID()); + } + + dependGraph[idx].push_back(new_entry); + + ++memAllocCounter; +} + +template +bool +DependencyGraph::insertConsumer(PhysRegIndex idx, + DynInstPtr &new_inst) +{ + // Only reg-read inst should be the consumer + assert(new_inst->isGRegWR() && new_inst->isGRegReadValid()); + assert(instQueuePtr); + + if (dependGraph[idx].empty()){ + DPRINTF(EdgeRegDep, "Read inst has no producer.\n"); + return true; + } + + ListIt curr = dependGraph[idx].end(); + ListIt begin = dependGraph[idx].begin(); + TheISA::BlockID bid = new_inst->getBlockID(); + + --curr; + // Find the last old write entry. + do{ + + if ( (*curr)->blockID < bid ) { + + if ( !(*curr)->canForward ) { + DPRINTF(EdgeRegDep, "Adding consumer to dep graph.\n"); + + ((*curr)->consumers).push_back(new_inst); + return false; + } else { + DPRINTF(EdgeRegDep, "Forwarding from write inst. \n"); + + this->forward((*curr)->producer, new_inst); + return true; + } + } + + }while ( (curr--) != begin); + // Here means no producer entry matchs. + return true; +} + +template +void +DependencyGraph::forward(DynInstPtr &producer, + DynInstPtr &consumer) +{ + // Reg-write can be producer and Reg-read can be consumer + assert(producer->isGRegWR() && producer->isGRegWriteValid()); + assert(consumer->isGRegWR() && consumer->isGRegReadValid()); + + DPRINTF(EdgeRegDep, "Forwarding from producer[Bid:%lli][Iid:%lli]" + " to consumer[Bid:%lli][Iid:%lli].\n", + producer->getBlockID(), + producer->getInstID(), + consumer->getBlockID(), + consumer->getInstID()); + + consumer->setRegForward(producer->getIntIQOperand(0)); +} + +template +void +DependencyGraph::remove(PhysRegIndex idx, + TheISA::BlockID remove_id) +{ + if(dependGraph[idx].empty()){ + DPRINTF(EdgeRegDep, "Nothing to remove in reg dep graph.\n"); + return; + } + + ListIt curr = dependGraph[idx].end(); + ListIt begin = dependGraph[idx].begin(); + --curr; + + do{ + if ( (*curr)->blockID <= remove_id){ + // Find a entry is not supposed to be removed. + // Check its consumer list to find consuemrs that + // should be removed. + while ( !( (*curr)->consumers ).empty() ){ + if ( ( (*curr)->consumers ).back()->getBlockID() > remove_id ) { + // Find a consumer that should be removed. + ((*curr)->consumers).pop_back(); + continue; + } + // Because this list is sequenced in block age, + // we should stop search consumer list here. + break; + } + // We can hit an older entry only once, so break here. + break; + } + + // Find a younger entry, remove it. + (*curr)->producer = NULL; + ((*curr)->consumers).clear(); + delete (*curr); + dependGraph[idx].erase(curr); + --memAllocCounter; + + }while((curr--) != begin ); + + return; +} + +template +void +DependencyGraph::complete(PhysRegIndex idx, + DynInstPtr complete_inst ) +{ + // Only reg-write inst can complete in reg dep graph. + assert(complete_inst->isGRegWR() && complete_inst->isGRegWriteValid()); + // Only completed reg-write inst can reach here. + assert(complete_inst->isWriteRegExecuted()); + // Something should be places into list. + assert(!dependGraph[idx].empty()); + // Inst queue pointer should have been initialized. + assert(instQueuePtr); + + ListIt curr = dependGraph[idx].begin(); + ListIt end = dependGraph[idx].end(); + + DPRINTF(EdgeRegDep, "Complete reg-write to reg[%i] in reg dep graph.\n", + idx); + + while ( curr != end ) { + if ( (*curr)->producer->seqNum == complete_inst->seqNum ) { + assert((*curr)->blockID == complete_inst->getBlockID()); + + if(complete_inst->isNullified()){ + // Wake consumers without forwarding + while(!((*curr)->consumers).empty()){ + DynInstPtr consumer = (*curr)->consumers.front(); + + consumer->setCanIssue(); + consumer->setReadRegIssuing(); + this->instQueuePtr->addIfReady(consumer); + ((*curr)->consumers).pop_front(); + } + // commit it immediatly. + this->commit(idx, complete_inst); + return; + } + + // Matched entry, set it as able to forward. + (*curr)->canForward = true; + // Wake consumers + while(!((*curr)->consumers).empty()){ + DynInstPtr consumer = (*curr)->consumers.front(); + + this->forward((*curr)->producer, consumer); + consumer->setCanIssue(); + consumer->setReadRegIssuing(); + this->instQueuePtr->addIfReady(consumer); + ((*curr)->consumers).pop_front(); + } + // Only one inst can be matched. + return; + } + ++curr; + } + + assert(0&&"Complete non existed inst in reg dep graph."); +} + +template +void +DependencyGraph::commit(PhysRegIndex idx, + DynInstPtr commit_inst) +{ + // Only reg-write inst can complete in reg dep graph. + assert(commit_inst->isGRegWR() && commit_inst->isGRegWriteValid()); + // Only reg-write inst that has been both in executed and block + // completed status could be commited in dep reg graph. + assert( commit_inst->isWriteRegExecuted() && + (commit_inst->isNullified() || commit_inst->isBlockCompleted())); + + ListIt curr = dependGraph[idx].begin(); + ListIt end = dependGraph[idx].end(); + + while(curr != end ){ + if ( (*curr)->producer->seqNum == commit_inst->seqNum ){ + // Consumer list should be empty because it should + // have been cleaned when the entry complete. + assert(((*curr)->consumers).empty()); + + (*curr)->producer = NULL; + delete (*curr); + dependGraph[idx].erase(curr); + --memAllocCounter; + + // We are supposed to find only one entry in this list, + // so break here. + + return; + } + // The committed entry should always in the head + // of entry list in EDGE architecture. However, I don't + // intend to enforce this due to some special concerns. + DPRINTF(EdgeRegDep, "Commit inst is not in the head of entry list.\n" ); + ++curr; + } + + assert(0&&"Commit non-existed inst in reg dep map."); +} + +template +void +DependencyGraph::dump() +{ + ListIt curr; + ListIt end; + + for (int i = 0; i < numEntries; ++i) + { + if ( dependGraph[i].empty() ){ + cprintf("dependGraph[%i] has no producer.\n"); + continue; + } + + curr = dependGraph[i].begin(); + end = dependGraph[i].end(); + + while ( curr != end) { + + if ((*curr)->producer) { + cprintf("dependGraph[%i]: producer: %#x [Bid:%lli][Iid:%lli] consumer: ", + i, (*curr)->producer->readPC(), + (*curr)->producer->getBlockID(), + (*curr)->producer->getInstID()); + } else { + cprintf("dependGraph[%i]: Invalid entry: consumer:", i); + } + + typename std::list::iterator c_curr = (curr->consumers).begin(); + typename std::list::iterator c_end = (curr->consumers).end(); + + while (c_curr != c_end ) { + + cprintf("%#x [Bid:%lli][Iid:%lli] ", + (*c_curr)->readPC(), + (*c_curr)->getBlockID(), + (*c_curr)->getInstID()); + + ++c_curr; + } + + cprintf("\n"); + } + } + cprintf("memAllocCounter: %i\n", memAllocCounter); +} + +#endif // __CPU_EDGE_DEP_GRAPH_HH__ diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/dyn_inst.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/dyn_inst.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,555 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#ifndef __CPU_EDGE_DYN_INST_HH__ +#define __CPU_EDGE_DYN_INST_HH__ + +#include "arch/isa_traits.hh" +#include "config/the_isa.hh" +#include "cpu/edge/base_dyn_inst.hh" +#include "cpu/inst_seq.hh" +#include "cpu/edge/cpu.hh" +#include "cpu/edge/isa_specific.hh" +#include "cpu/edge/static_inst.hh" + +class Packet; + +template +class SimpleEdgeDynInst : public BaseEdgeDynInst +{ + public: + /** Typedef for the CPU. */ + typedef typename Impl::CPU CPU; + + /** Binary machine instruction type. */ + typedef TheISA::MachInst MachInst; + /** Extended machine instruction type. */ + typedef TheISA::ExtMachInst ExtMachInst; + /** Logical register index type. */ + typedef TheISA::RegIndex RegIndex; + /** Integer register index type. */ + typedef TheISA::IntReg IntReg; + typedef TheISA::FloatReg FloatReg; + typedef TheISA::FloatRegBits FloatRegBits; + /** Misc register index type. */ + typedef TheISA::MiscReg MiscReg; + + typedef TheISA::ConsumerID ConsumerID; + typedef TheISA::OpSize OpSize; + typedef TheISA::BlockID BlockID; + typedef TheISA::ChunkID ChunkID; + typedef TheISA::InstID InstID; + typedef TheISA::PredStatus PredStatus; + typedef TheISA::DataflowTokenType DataflowTokenType; + + /// Operand of this instruction + struct Operand { + // We only need one 64-bit buffer to store operand + OpSize int_value; + //double float_value; + bool valid; + }; + + /// Instruction result + struct Result { + // All of the result will be set into this 64-bit buffer + // no matter it's an integer or float + OpSize int_result; + //double float_result; + bool valid; + }; + + struct PredicationResult{ + PredStatus pred_status; + bool valid; + }; + + struct Consumer{ + uint8_t type; + uint8_t subtype; + uint32_t id; + }; + + /// Size of instruction block and instruction chunk + enum{ + MaxBlockSize = TheISA::MaxBlockSize, // How many chunks a block can hold + ChunkSize = TheISA::ChunkSize // How many instructions a chunk can hold + }; + + enum { + MaxInstConsumers = TheISA::MaxInstConsumers, + MaxInstOperands = TheISA::MaxInstOperands, + MaxInstSrcRegs = TheISA::MaxInstSrcRegs, + MaxInstDestRegs = TheISA::MaxInstDestRegs + }; + + public: + /** BaseDynInst constructor given a binary instruction. */ + SimpleEdgeDynInst(EDGEStaticInstPtr staticInst, Addr PC, Addr NPC, + Addr Pred_PC, Addr Pred_NPC, + InstSeqNum seq_num, CPU *cpu, TheISA::BlockStatus blockstatus); + + /** BaseDynInst constructor given a binary instruction. */ + SimpleEdgeDynInst(ExtMachInst inst, Addr PC, Addr NPC, + Addr Pred_PC, Addr Pred_NPC, + InstSeqNum seq_num, CPU *cpu, TheISA::BlockStatus blockstatus); + + /** Executes the instruction.*/ + Fault execute(); + + /** Initiates the access. Only valid for memory operations. */ + Fault initiateAcc(); + + /** Completes the access. Only valid for memory operations. */ + Fault completeAcc(PacketPtr pkt); + + private: + /** Initializes variables. */ + void initVars(); + + protected: + + /// Block ID this instruction belongs to + BlockID _blockID; + + /// Chunk ID this instruction belongs to + ChunkID _chunkID; + + /// Instruction ID inside a chunk + InstID _instID; + + /// Instruction position + uint32_t _ETNum; + + /// Instruction position + uint32_t _slotNum; + + /// Data-flow token type indicates the output type of this inst + DataflowTokenType _dataflowTokenType; + + PredicationResult _predicationResult; + + Consumer _consumer[MaxInstConsumers]; + + /// See operand() + Operand _operand[MaxInstOperands]; + /// Execute result of this inst. + // This should be more sophiscated than just define an member variable here ... + Result _result; + + /** Number of ready operands. */ + uint8_t readyOperands; + + /** Data structure for load forwarding. */ + bool isForwarded[Impl::MaxByteNum]; + uint8_t forwardedData[Impl::MaxByteNum]; + + /** Data structure for register forwarding. */ + bool isRegForwarded; + OpSize regForwardedData; + + public: + /** Reads a miscellaneous register. */ + MiscReg readMiscRegNoEffect(int misc_reg) + { + return this->cpu->readMiscRegNoEffect(misc_reg, this->threadNumber); + } + + /** Reads a misc. register, including any side-effects the read + * might have as defined by the architecture. + */ + MiscReg readMiscReg(int misc_reg) + { + return this->cpu->readMiscReg(misc_reg, this->threadNumber); + } + + /** Sets a misc. register. */ + void setMiscRegNoEffect(int misc_reg, const MiscReg &val) + { + this->instResult.integer = val; + return this->cpu->setMiscRegNoEffect(misc_reg, val, this->threadNumber); + } + + /** Sets a misc. register, including any side-effects the write + * might have as defined by the architecture. + */ + void setMiscReg(int misc_reg, const MiscReg &val) + { + return this->cpu->setMiscReg(misc_reg, val, + this->threadNumber); + } + + /** Reads a miscellaneous register. */ + TheISA::MiscReg readMiscRegOperandNoEffect(const EDGEStaticInst *si, int idx) + { + return this->cpu->readMiscRegNoEffect( + si->srcRegIdx(idx) - TheISA::Ctrl_Base_DepTag, + this->threadNumber); + } + + /** Reads a misc. register, including any side-effects the read + * might have as defined by the architecture. + */ + TheISA::MiscReg readMiscRegOperand(const EDGEStaticInst *si, int idx) + { + return this->cpu->readMiscReg( + si->srcRegIdx(idx) - TheISA::Ctrl_Base_DepTag, + this->threadNumber); + } + + /** Sets a misc. register. */ + void setMiscRegOperandNoEffect(const EDGEStaticInst * si, int idx, const MiscReg &val) + { + this->instResult.integer = val; + return this->cpu->setMiscRegNoEffect( + si->destRegIdx(idx) - TheISA::Ctrl_Base_DepTag, + val, this->threadNumber); + } + + /** Sets a misc. register, including any side-effects the write + * might have as defined by the architecture. + */ + void setMiscRegOperand(const EDGEStaticInst *si, int idx, + const MiscReg &val) + { + return this->cpu->setMiscReg( + si->destRegIdx(idx) - TheISA::Ctrl_Base_DepTag, + val, this->threadNumber); + } + + /** Get frame number of this instruction. */ + int getFrameID() + { + return this->instBlock->getFrameID(); + } + + uint8_t getConsumerType( uint8_t idx) const { + return _consumer[idx].type; + } + + uint8_t getConsumerSubType ( uint8_t idx ) const { + return _consumer[idx].subtype; + } + uint32_t getConsumerID ( uint8_t idx ) const { + return _consumer[idx].id; + } + + void setConsumerType ( uint8_t idx, uint8_t type ) { + _consumer[idx].type = type; + } + + void setConsumerSubType ( uint8_t idx, uint8_t sub_type ) { + _consumer[idx].subtype = sub_type; + } + + void setConsumerID ( uint8_t idx, uint32_t id ) { + _consumer[idx].id = id; + } + + OpSize getIntIQOperand(int i ) const { + assert(_operand[i].valid); + return _operand[i].int_value; + } + + double getFloatIQOperand(int i) { + assert(_operand[i].valid); + TheISA::checkFloat64(_operand[i].int_value); + // Reinterpret the operand to double + double * fp_result = reinterpret_cast(&(_operand[i].int_value)); + DPRINTF(EdgeIQ, "Reinterpret int operand %llx to double %f\n", + _operand[i].int_value, *fp_result); + return *fp_result; + } + + float getFloat32IQOperand(int i) { + assert(_operand[i].valid); + TheISA::checkFloat32(_operand[i].int_value); + // Reinterpret the operand to float + float * fp_result = reinterpret_cast(&(_operand[i].int_value)); + DPRINTF(EdgeIQ, "Reinterpret int operand %llx to float %f\n", + _operand[i].int_value, *fp_result); + return *fp_result; + } + + ConsumerID getRawConsumerID(uint8_t idx) const + { + return this->staticInst->getRawConsumerID(idx); + } + + uint32_t getNumConsumers() const + { + return this->staticInst->getNumConsumers(); + } + + uint32_t getNumOperands() const + { + return this->staticInst->getNumOperands(); + } + + void setIntResult( OpSize result ) + { + _result.int_result = result; + _result.valid = true; + } + + void setFloatResult( double result ) { + //Reinterpret the result to int + + OpSize * raw_result = reinterpret_cast(&result); + _result.int_result = *raw_result; + _result.valid = true; + + DPRINTF(EdgeIQ, "Reinterpret double result %lf to raw bit %llx\n", result, *raw_result); + } + + void setFloat32Result( float result ) { + //Reinterpret the result to int + + uint32_t * raw_result = reinterpret_cast(&result); + _result.int_result = (OpSize)(*raw_result); + _result.valid = true; + + DPRINTF(EdgeIQ, "Reinterpret float 32 result %lf to raw bit %llx\n", result, *raw_result); + } + + void setPredStatus(PredStatus status ); + + void setIntOperand(int i, OpSize value) { + _operand[i].int_value = value; + _operand[i].valid = true; + } + + // Set float operand is the same as set Int operand + // because we have the same buffer + void setFloatOperand(int i, OpSize value) { + _operand[i].int_value = value; + _operand[i].valid = true; + } + + void setForward(int i, uint8_t data){ + isForwarded[i] = true; + forwardedData[i] = data; + } + + void setRegForward(OpSize forward_data){ + isRegForwarded = true; + regForwardedData = forward_data; + } + + void clearForward(int i) { + isForwarded[i] = false; + forwardedData[i] = 0; + } + + void clearForward() { + for(int i = 0; i < Impl::MaxByteNum; ++i){ + isForwarded[i] = false; + forwardedData[i] = 0; + } + } + + bool isForward(int i){ + return isForwarded[i]; + } + + bool isForward(){ + for(int i = 0;i < Impl::MaxByteNum; ++i){ + if(isForwarded[i]) + return true; + } + return false; + } + + uint8_t getForwardedData(int i){ + assert(isForwarded[i]); + return forwardedData[i]; + } + void markOperandReady(); + void markPredReady(); + + int getReadyOperands() const { return readyOperands; } + + PredStatus getPredStatus() const { + if (_predicationResult.valid ) return _predicationResult.pred_status; + return TheISA::NoPred; + } + OpSize getIntResult() const{ + assert(_result.valid); + + if(_result.valid) + return _result.int_result; + + return 0; + } + OpSize getFloatResult() const{ + assert(_result.valid); + + if(_result.valid) + return _result.int_result; + return 0; + } + + /// Set EDGE Inst block related status + void setInstID( InstID id) { _instID = id; } + + void setChunkID(ChunkID id) { _chunkID = id; } + + void setBlockID(BlockID id) { _blockID = id; } + + void setPosition(uint32_t ETNum, uint32_t slotNum) + { + _ETNum = ETNum; + _slotNum = slotNum; + } + + void setEDGEInstStatus(InstID inst_id, ChunkID chunk_id, BlockID block_id) + { + _instID = inst_id; + _chunkID = chunk_id; + _blockID = block_id; + } + InstID getInstID() const { return _instID; } + + ChunkID getChunkID() const { return _chunkID; } + + BlockID getBlockID() const { return _blockID; } + + uint32_t getETNum() const { return _ETNum; } + + uint32_t getSlotNum() const { return _slotNum; } + + void setDataflowTokenType( DataflowTokenType type ) + { + _dataflowTokenType = type; + } + + DataflowTokenType getDataflowTokenType() const { return _dataflowTokenType; } + +#if FULL_SYSTEM + /** Calls hardware return from error interrupt. */ + Fault hwrei(); + /** Traps to handle specified fault. */ + void trap(Fault fault); + bool simPalCheck(int palFunc); +#else + /** Calls a syscall. */ + void syscall(int64_t callnum); +#endif + + public: + + // The register accessor methods provide the index of the + // instruction's operand (e.g., 0 or 1), not the architectural + // register index, to simplify the implementation of register + // renaming. We find the architectural register index by indexing + // into the instruction's own operand index table. Note that a + // raw pointer to the StaticInst is provided instead of a + // ref-counted StaticInstPtr to redice overhead. This is fine as + // long as these methods don't copy the pointer into any long-term + // storage (which is pretty hard to imagine they would have reason + // to do). + + uint64_t readIntRegOperand(const EDGEStaticInst *si, int idx) + { + return this->cpu->readIntReg(this->_srcRegIdx[idx]); + } + + FloatReg readFloatRegOperand(const EDGEStaticInst *si, int idx) + { + return this->cpu->readFloatReg(this->_srcRegIdx[idx]); + } + + FloatRegBits readFloatRegOperandBits(const EDGEStaticInst *si, int idx) + { + return this->cpu->readFloatRegBits(this->_srcRegIdx[idx]); + } + + /** @todo: Make results into arrays so they can handle multiple dest + * registers. + */ + void setIntRegOperand(const EDGEStaticInst *si, int idx, uint64_t val) + { + this->cpu->setIntReg(this->_destRegIdx[idx], val); + BaseEdgeDynInst::setIntRegOperand(si, idx, val); + } + + void setFloatRegOperand(const EDGEStaticInst *si, int idx, FloatReg val) + { + this->cpu->setFloatReg(this->_destRegIdx[idx], val); + BaseEdgeDynInst::setFloatRegOperand(si, idx, val); + } + + void setFloatRegOperandBits(const EDGEStaticInst *si, int idx, + FloatRegBits val) + { + this->cpu->setFloatRegBits(this->_destRegIdx[idx], val); + BaseEdgeDynInst::setFloatRegOperandBits(si, idx, val); + } + +#if THE_ISA == MIPS_ISA + uint64_t readRegOtherThread(int misc_reg) + { + panic("MIPS MT not defined for Edge CPU.\n"); + return 0; + } + + void setRegOtherThread(int misc_reg, const TheISA::MiscReg &val) + { + panic("MIPS MT not defined for Edge CPU.\n"); + } +#endif + + public: + /** Calculates EA part of a memory instruction. Currently unused, + * though it may be useful in the future if we want to split + * memory operations into EA calculation and memory access parts. + */ + Fault calcEA() + { + return this->staticInst->eaCompInst()->execute(this, this->traceData); + } + + /** Does the memory access part of a memory instruction. Currently unused, + * though it may be useful in the future if we want to split + * memory operations into EA calculation and memory access parts. + */ + Fault memAccess() + { + return this->staticInst->memAccInst()->execute(this, this->traceData); + } +}; + +#endif // __CPU_EDGE_DYN_INST_HH__ + diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/dyn_inst.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/dyn_inst.cc Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#include "cpu/edge/dyn_inst_impl.hh" +#include "cpu/edge/impl.hh" + +// Force instantiation of BaseO3DynInst for all the implementations that +// are needed. +template class SimpleEdgeDynInst; diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/dyn_inst_impl.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/dyn_inst_impl.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,315 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#include "base/cp_annotate.hh" +#include "cpu/edge/dyn_inst.hh" + +template +SimpleEdgeDynInst::SimpleEdgeDynInst(EDGEStaticInstPtr staticInst, + Addr PC, Addr NPC, + Addr Pred_PC, Addr Pred_NPC, + InstSeqNum seq_num, CPU *cpu, TheISA::BlockStatus blockstatus) + : BaseEdgeDynInst(staticInst, PC, NPC, 0, + Pred_PC, Pred_NPC, 0, seq_num, cpu, blockstatus) +{ + initVars(); +} + +template +SimpleEdgeDynInst::SimpleEdgeDynInst(ExtMachInst inst, + Addr PC, Addr NPC, + Addr Pred_PC, Addr Pred_NPC, + InstSeqNum seq_num, CPU *cpu, TheISA::BlockStatus blockstatus) + : BaseEdgeDynInst(inst, PC, NPC, 0, + Pred_PC, Pred_NPC, 0, seq_num, cpu, blockstatus) +{ + initVars(); +} + +template +void +SimpleEdgeDynInst::initVars() +{ + // Make sure to have the renamed register entries set to the same + // as the normal register entries. It will allow the IQ to work + // without any modifications. + for (int i = 0; i < this->staticInst->numDestRegs(); i++) { + this->_destRegIdx[i] = this->staticInst->destRegIdx(i); + } + + for (int i = 0; i < this->staticInst->numSrcRegs(); i++) { + this->_srcRegIdx[i] = this->staticInst->srcRegIdx(i); + this->_readySrcRegIdx[i] = 0; + } + + for ( int i = 0; i < getNumConsumers(); i ++ ) { + _consumer[i].id = 0; + _consumer[i].subtype = TheISA::InvalidSubType; + _consumer[i].type = TheISA::InvalidType; + } + + for ( int i = 0; i < Impl::MaxByteNum ; i++) { + isForwarded[i] = false; + forwardedData[i] = 0; + } + + isRegForwarded = false; + regForwardedData = 0; + + _result.valid = false; + + readyOperands = 0; + + _predicationResult.pred_status = TheISA::NoPred; + + _predicationResult.valid = false; + + // All of the inst output will be defaulted to general + // unless they receive NULL or EXCEPTION token. + _dataflowTokenType = TheISA::General; +} + +template +void +SimpleEdgeDynInst::setPredStatus( PredStatus status ) +{ + _predicationResult.pred_status = status; + _predicationResult.valid = true; + + if ( status == TheISA::PredTrue && + this->staticInst->getPredication()== + TheISA::PredUponTrue ) { + this->setPredMatched(); + } + + if ( status == TheISA::PredFalse && + this->staticInst->getPredication()== + TheISA::PredUponFalse ) { + this->setPredMatched(); + } +} + +template +void +SimpleEdgeDynInst::markPredReady() +{ + assert(this->staticInst->getPredication() == TheISA::PredUponTrue || + this->staticInst->getPredication() == TheISA::PredUponFalse ); + + DPRINTF(EdgeIQ, "Mark pred ready on inst[Bid:%lli][Iid:%lli].\n", + this->getBlockID(), this->getInstID()); + + if ( readyOperands == getNumOperands() ) { + if ( _predicationResult.valid && this->isPredMatched() ) { + DPRINTF(EdgeIQ, "Pred ready on inst[Bid:%lli][Iid:%lli].\n", + this->getBlockID(), this->getInstID()); + this->setCanIssue(); + return; + } + } + return; +} + +template +void +SimpleEdgeDynInst::markOperandReady() +{ + DPRINTF(EdgeIQ, "readyOperands = %d, this inst has %d operands\n", + readyOperands, getNumOperands() ); + assert(readyOperands <= getNumOperands() ); + + if ( ++readyOperands == getNumOperands() ) { + + if ( _predicationResult.valid && this->isPredMatched() ) { + this->setCanIssue(); + return; + } + + if ( this->staticInst->getPredication() == TheISA::Disable || \ + this->staticInst->getPredication() == TheISA::Reserved ) { + this->setCanIssue(); + return; + } + } + + return; +} + +template +Fault +SimpleEdgeDynInst::execute() +{ + // @todo: Pretty convoluted way to avoid squashing from happening + // when using the TC during an instruction's execution + // (specifically for instructions that have side-effects that use + // the TC). Fix this. + bool in_syscall = this->thread->inSyscall; + this->thread->inSyscall = true; + + // If there's already a fault storing in this inst, it means the producer + // of this inst has generated some kind of fault, so just keep the + // fault and don't need to call execute. The fault will be propagate to + // the consumers in wake dependent process. + if ( this->fault != NoFault ) { + // Don't execute, set the result to 0 directly + this->setIntResult(0); + } else { + if ( isRegForwarded && (!this->isExecutingWriteReg()) ) { + DPRINTF(EdgeIQ, "Reg-read result forwarded from write.\n"); + + assert(this->staticInst->isGRegWR() && + this->staticInst->isGRegReadValid()); + + this->setIntResult(regForwardedData); + + if ( this->traceData ) { + this->traceData->setData(regForwardedData); + } + + } else { + this->fault = this->staticInst->execute(this, this->traceData); + } + } + + this->thread->inSyscall = in_syscall; + + return this->fault; +} + +template +Fault +SimpleEdgeDynInst::initiateAcc() +{ + // @todo: Pretty convoluted way to avoid squashing from happening + // when using the TC during an instruction's execution + // (specifically for instructions that have side-effects that use + // the TC). Fix this. + bool in_syscall = this->thread->inSyscall; + this->thread->inSyscall = true; + + // If there's already a fault storing in this inst, it means the producer + // of this inst has generated some kind of fault, so just keep the + // fault and don't need to call execute. The fault will be propagate to + // the consumers in wake dependent process. + if ( this->fault != NoFault ) { + // Don't execute, set the result to 0 directly + this->setIntResult(0); + } else { + this->fault = this->staticInst->initiateAcc(this, this->traceData); + } + + this->thread->inSyscall = in_syscall; + + return this->fault; +} + +template +Fault +SimpleEdgeDynInst::completeAcc(PacketPtr pkt) +{ + // If there's already a fault storing in this inst, it means the producer + // of this inst has generated some kind of fault, so just keep the + // fault and don't need to call execute. The fault will be propagate to + // the consumers in wake dependent process. + if ( this->fault != NoFault ) { + // Don't execute, set the result to 0 directly + this->setIntResult(0); + } else { + this->fault = this->staticInst->completeAcc(pkt, this, this->traceData); + } + + return this->fault; +} + +#if FULL_SYSTEM +template +Fault +SimpleEdgeDynInst::hwrei() +{ +#if THE_ISA == ALPHA_ISA + // Can only do a hwrei when in pal mode. + if (!(this->readPC() & 0x3)) + return new AlphaISA::UnimplementedOpcodeFault; + + // Set the next PC based on the value of the EXC_ADDR IPR. + this->setNextPC(this->cpu->readMiscRegNoEffect(AlphaISA::IPR_EXC_ADDR, + this->threadNumber)); + if (CPA::available()) { + ThreadContext *tc = this->cpu->tcBase(this->threadNumber); + CPA::cpa()->swAutoBegin(tc, this->readNextPC()); + } + + // Tell CPU to clear any state it needs to if a hwrei is taken. + this->cpu->hwrei(this->threadNumber); +#else + +#endif + // FIXME: XXX check for interrupts? XXX + return NoFault; +} + +template +void +SimpleEdgeDynInst::trap(Fault fault) +{ + this->cpu->trap(fault, this->threadNumber); +} + +template +bool +SimpleEdgeDynInst::simPalCheck(int palFunc) +{ +#if THE_ISA != ALPHA_ISA + panic("simPalCheck called, but PAL only exists in Alpha!\n"); +#endif + return this->cpu->simPalCheck(palFunc, this->threadNumber); +} +#else +template +void +SimpleEdgeDynInst::syscall(int64_t callnum) +{ + // HACK: check CPU's nextPC before and after syscall. If it + // changes, update this instruction's nextPC because the syscall + // must have changed the nextPC. + Addr cpu_next_pc = this->cpu->readNextPC(this->threadNumber); + this->cpu->syscall(callnum, this->threadNumber); + Addr new_next_pc = this->cpu->readNextPC(this->threadNumber); + if (cpu_next_pc != new_next_pc) { + this->setNextPC(new_next_pc); + } +} +#endif + diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/execute.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/execute.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,572 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#ifndef __CPU_EDGE_EXE_HH__ +#define __CPU_EDGE_EXE_HH__ + +#include +#include +#include + +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "config/full_system.hh" +#include "cpu/edge/comm.hh" +#include "cpu/edge/lsq.hh" + +class SimpleEdgeCPUParams; +class FUPool; + +template +class SimpleEdgeExecute +{ + private: + //Typedefs from Impl + typedef typename Impl::CPUPol CPUPol; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::EdgeBlockPtr BlockPtr; + typedef typename Impl::CPU CPU; + + typedef typename CPUPol::IQ IQ; + typedef typename CPUPol::LSQ LSQ; + + typedef typename CPUPol::TimeStruct TimeStruct; + typedef typename CPUPol::Map2Execute Map2Execute; + typedef typename CPUPol::Execute2Commit Execute2Commit; + typedef typename CPUPol::Issue2Execute Issue2Execute; + + typedef TheISA::BlockID BlockID; + typedef typename std::map::iterator ExeBlockIt; + + friend class Impl::CPU; + friend class CPUPol::IQ; + + public: + /** Overall Execute stage status. Used to determine if the CPU can + * deschedule itself due to a lack of activity. + */ + enum Status { + Active, + Inactive + }; + + /** Status for Issue, Execute, and Writeback stages. */ + enum StageStatus { + Running, + Blocked, + Idle, + StartSquash, + Squashing, + Unblocking + }; + + private: + /** Overall stage status. */ + Status _status; + /** Dispatch status. */ + StageStatus dispatchStatus[Impl::MaxThreads]; + /** Execute status. */ + StageStatus exeStatus; + /** Writeback status. */ + StageStatus wbStatus; + + public: + /** Constructs a SimpleEdgeExecute with the given parameters. */ + SimpleEdgeExecute(CPU *_cpu, SimpleEdgeCPUParams *params); + + /** Returns the name of the SimpleEdgeExecute stage. */ + std::string name() const; + + /** Registers statistics. */ + void regStats(); + + /** Initializes stage; sends back the number of free IQ and LSQ entries. */ + void initStage(); + + /** Returns the dcache port. */ + Port *getDcachePort() { return ldstQueue.getDcachePort(); } + + /** Sets main time buffer used for backwards communication. */ + void setTimeBuffer(TimeBuffer *tb_ptr); + + /** Sets time buffer to pass on instructions to commit. */ + void setExecuteQueue(TimeBuffer *iq_ptr); + + /** Sets time buffer to receive instructions from map. */ + void setMapQueue(TimeBuffer * iq_ptr); + + /** Sets pointer to list of active threads. */ + void setActiveThreads(std::list *at_ptr); + + /** Sets pointer to the scoreboard. */ + //void setScoreboard(Scoreboard *sb_ptr); + + /** Drains Execute stage. */ + bool drain(); + + /** Resumes execution after a drain. */ + void resume(); + + /** Completes switch out of Execute stage. */ + void switchOut(); + + /** Takes over from another CPU's thread. */ + void takeOverFrom(); + + /** Returns if Execute is switched out. */ + bool isSwitchedOut() { return switchedOut; } + + /** Squashes instructions in Execute for a specific thread. */ + void squash(ThreadID tid); + + /** Wakes all dependents of a completed instruction. */ + void wakeDependents(DynInstPtr &inst); + + /** Tells memory dependence unit that a memory instruction needs to be + * rescheduled. It will re-execute once replayMemInst() is called. + */ + void rescheduleMemInst(DynInstPtr &inst); + + /** Re-executes all rescheduled memory instructions. */ + void replayMemInst(DynInstPtr &inst); + + /** Sends an instruction to commit through the time buffer. */ + //void instToCommit(DynInstPtr &inst); + void instBlockToCommit(BlockPtr &block); + + /** Inserts unused instructions of a thread into the skid buffer. */ + void skidInsert(ThreadID tid); + + /** Returns the max of the number of entries in all of the skid buffers. */ + int skidCount(); + + /** Returns if all of the skid buffers are empty. */ + bool skidsEmpty(); + + /** Updates overall Execute status based on all of the stages' statuses. */ + void updateStatus(); + + /** Resets entries of the IQ and the LSQ. */ + void resetEntries(); + + /** Tells the CPU to wakeup if it has descheduled itself due to no + * activity. Used mainly by the LdWritebackEvent. + */ + void wakeCPU(); + + /** Reports to the CPU that there is activity this cycle. */ + void activityThisCycle(); + + /** Tells CPU that the Execute stage is active and running. */ + inline void activateStage(); + + /** Tells CPU that the Execute stage is inactive and idle. */ + inline void deactivateStage(); + + /** Returns if the LSQ has any stores to writeback. */ + bool hasStoresToWB() { return ldstQueue.hasStoresToWB(); } + + /** Returns if the LSQ has any stores to writeback. */ + bool hasStoresToWB(ThreadID tid) { return ldstQueue.hasStoresToWB(tid); } + + void incrWb(InstSeqNum &sn) + { + if (++wbOutstanding == wbMax) + ableToIssue = false; + DPRINTF(EdgeExe, "wbOutstanding: %i\n", wbOutstanding); + assert(wbOutstanding <= wbMax); +#ifdef DEBUG + wbList.insert(sn); +#endif + } + + void decrWb(InstSeqNum &sn) + { + if (wbOutstanding-- == wbMax) + ableToIssue = true; + DPRINTF(EdgeExe, "wbOutstanding: %i\n", wbOutstanding); + assert(wbOutstanding >= 0); +#ifdef DEBUG + assert(wbList.find(sn) != wbList.end()); + wbList.erase(sn); +#endif + } + +#ifdef DEBUG + std::set wbList; + + void dumpWb() + { + std::set::iterator wb_it = wbList.begin(); + while (wb_it != wbList.end()) { + cprintf("[sn:%lli]\n", + (*wb_it)); + wb_it++; + } + } +#endif + + bool canIssue() { return ableToIssue; } + + bool ableToIssue; + + private: + /** Sends commit proper information for a squash due to a syscall + * requirement. + */ + void squashDueToSyscall(BlockPtr &inst_block, ThreadID tid); + + /** Sends commit proper information for a squash due to a branch + * mispredict. + */ + void squashDueToBranch(BlockPtr &inst_block, ThreadID tid); + + /** Sends commit proper information for a squash due to a memory order + * violation. + */ + void squashDueToMemOrder(BlockPtr &inst_block, ThreadID tid); + + /** Sends commit proper information for a squash due to memory becoming + * blocked (younger issued instructions must be retried). + */ + void squashDueToMemBlocked(BlockPtr &inst_block, ThreadID tid); + + /** Sets Dispatch to blocked, and signals back to other stages to block. */ + void block(ThreadID tid); + + /** Unblocks Dispatch if the skid buffer is empty, and signals back to + * other stages to unblock. + */ + void unblock(ThreadID tid); + + /** Determines proper actions to take given Dispatch's status. */ + void dispatch(ThreadID tid); + + /** Dispatches instructions to IQ and LSQ. */ + void dispatchInsts(ThreadID tid); + + /** Executes instructions. In the case of memory operations, it informs the + * LSQ to execute the instructions. Also handles any redirects that occur + * due to the executed instructions. + */ + void executeInstBlocks(); + + /** Writebacks instructions. In our model, the instruction's execute() + * function atomically reads registers, executes, and writes registers. + * Thus this writeback only wakes up dependent instructions, and informs + * the scoreboard of registers becoming ready. + */ + void writebackInstBlocks(); + + /** Detect the completed blocks. Completion of inst blocks will be judged + * according to the completion conditions of this block. Block completed + * means all the insts in this block have received its operands and all but + * the write-reg insts and store insts have been executed. Write-reg and + * store will write back after commit stage confirm the commit condition + * of this block. When we complete one block, we should wait the commit + * signal from commit stage to write back write-reg and store. Moreover, + * misprediction of one block will be detected once the block completes. + */ + void completeInstBlocks(); + + /** Returns the number of valid, non-squashed inst blocks coming from + * map to dispatch. + */ + unsigned validInstBlocksFromMap(); + + /** Reads the stall signals. */ + void readStallSignals(ThreadID tid); + + /** Checks if any of the stall conditions are currently true. */ + bool checkStall(ThreadID tid); + + /** Processes inputs and changes state accordingly. */ + void checkSignalsAndUpdate(ThreadID tid); + + /** Removes inst blocks from map from a thread's inst block list. */ + void emptyMapInstBlocks(ThreadID tid); + + /** Sorts inst blocks coming from rename into lists separated by thread. */ + void sortInstBlocks(); + + /** Get the current frame id for dispatching inst blocks into inst queue. */ + int getCurrFrameID(); + + /** Decrease the frame id if an inst block is squashed. */ + void decCurrFrameID(); + + public: + /** Ticks Execute stage, causing Dispatch, the IQ, the LSQ, Execute, and + * Writeback to run for one cycle. + */ + void tick(); + + private: + /** Updates execution stats based on the inst blocks. */ + void updateExeInstBlockStats(BlockPtr &inst_block); + + /** Pointer to main time buffer used for backwards communication. */ + TimeBuffer *timeBuffer; + + /** Wire to write information heading to previous stages. */ + typename TimeBuffer::wire toFetch; + + /** Wire to write information heading to previous stages. */ + typename TimeBuffer::wire toMap; + + /** Wire to get commit's output from backwards time buffer. */ + typename TimeBuffer::wire fromCommit; + + /** Issue stage queue. */ + TimeBuffer issueToExecQueue; + + /** Wire to read information from the issue stage time queue. */ + typename TimeBuffer::wire fromIssue; + + /** + * Execute stage time buffer. + */ + TimeBuffer *execute2commitQueue; + + TimeBuffer *map2executeQueue; + + /** Wire to write infromation heading to commit. */ + typename TimeBuffer::wire toCommit; + + /** Wire to get inst blocks from map stage time buffer. */ + typename TimeBuffer::wire fromMap; + + /** Queue of all inst blocks coming from map this cycle. */ + std::queue instBlocks[Impl::MaxThreads]; + + /** Hold inst block objects currently in executing. */ + std::map instBlocksInExecuting; + + /** Current frame id for dispatching inst blocks to inst queue. */ + int currFrameID; + + /** Skid buffer between Map and Execute. */ + std::queue skidBuffer[Impl::MaxThreads]; + + private: + /** CPU pointer. */ + CPU *cpu; + + /** Records if Execute has written to the time buffer this cycle, so that the + * CPU can deschedule itself if there is no activity. + */ + bool wroteToTimeBuffer; + + /** Source of possible stalls. */ + struct Stalls { + bool commit; + }; + + /** Stages that are telling Execute to stall. */ + Stalls stalls[Impl::MaxThreads]; + + /** Debug function to print instructions that are issued this cycle. */ + void printAvailableInsts(); + + public: + /** Instruction queue. */ + IQ instQueue; + + /** Load / store queue. */ + LSQ ldstQueue; + + /** Pointer to the functional unit pool. */ + FUPool *fuPool; + + /** Records if the LSQ needs to be updated on the next cycle, so that + * Execute knows if there will be activity on the next cycle. + */ + bool updateLSQNextCycle; + + private: + /** Records if there is a fetch redirect on this cycle for each thread. */ + bool fetchRedirect[Impl::MaxThreads]; + + /** Records if the queues have been changed (inserted or issued insts), + * so that Execute knows to broadcast the updated amount of free entries. + */ + bool updatedQueues; + + /** Commit to Execute delay, in ticks. */ + unsigned commitToExecuteDelay; + + /** Map to Execute delay, in ticks. */ + unsigned mapToExecuteDelay; + + /** + * Issue to execute delay, in ticks. What this actually represents is + * the amount of time it takes for an instruction to wake up, be + * scheduled, and sent to a FU for execution. + */ + unsigned issueToExecuteDelay; + + /** Width of dispatch, in instructions. */ + unsigned dispatchWidth; + + /** Width of issue, in instructions. */ + unsigned issueWidth; + + /** Index into queue of instructions being written back. */ + unsigned wbNumInstBlock; + + /** Cycle number within the queue of instructions being written back. + * Used in case there are too many instructions writing back at the current + * cycle and writesbacks need to be scheduled for the future. See comments + * in instToCommit(). + */ + unsigned wbCycle; + + /** Number of instructions in flight that will writeback. */ + + /** Number of instructions in flight that will writeback. */ + int wbOutstanding; + + /** Writeback width. */ + unsigned wbWidth; + + /** Writeback width * writeback depth, where writeback depth is + * the number of cycles of writing back instructions that can be + * buffered. */ + unsigned wbMax; + + /** Number of active threads. */ + ThreadID numThreads; + + /** Pointer to list of active threads. */ + std::list *activeThreads; + + /** Maximum size of the skid buffer. */ + unsigned skidBufferMax; + + /** Is this stage switched out. */ + bool switchedOut; + + /** Stat for total number of idle cycles. */ + Stats::Scalar executeIdleCycles; + /** Stat for total number of squashing cycles. */ + Stats::Scalar executeSquashCycles; + /** Stat for total number of blocking cycles. */ + Stats::Scalar executeBlockCycles; + /** Stat for total number of unblocking cycles. */ + Stats::Scalar executeUnblockCycles; + + /** Stat for total number of instructions dispatched. */ + Stats::Scalar executeDispatchedInsts; + /** Stat for total number of NOPs dispatched. */ + Stats::Scalar executeDispNopInsts; + /** Stat for total number of useful insts dispatched. */ + Stats::Formula executeDispUsefulInsts; + /** Stat for total number of inst blocks dispatched. */ + Stats::Scalar executeDispatchedInstBlocks; + /** Stat for total number of squashed instructions dispatch skips. */ + Stats::Scalar executeDispSquashedInsts; + /** Stat for total number of squashed inst blocks dispatch skips. */ + Stats::Scalar executeDispSquashedInstBlocks; + /** Stat for total number of dispatched load instructions. */ + Stats::Scalar executeDispLoadInsts; + /** Stat for total number of dispatched store instructions. */ + Stats::Scalar executeDispStoreInsts; + /** Stat for total number of dispatched non speculative instructions. */ + Stats::Formula executeDispSpecInstBlocks; + /** Stat for total number of dispatched non speculative inst blocks. */ + Stats::Scalar executeDispNonSpecInstBlocks; + /** Stat for total number of dispatched non speculative insts. */ + Stats::Scalar executeDispNonSpecInsts; + /** Stat for total number of dispatched non speculative insts. */ + Stats::Formula executeDispSpecInsts; + + /** Stat for number of times the IQ becomes full. */ + Stats::Scalar executeIQFullEvents; + /** Stat for number of times the LSQ becomes full. */ + Stats::Scalar executeLSQFullEvents; + /** Stat for number of times the Max in-flight inst blocks reached. */ + Stats::Scalar executeInFlightFullEvents; + /** Stat for total number of memory ordering violation events. */ + Stats::Scalar memOrderViolationEvents; + /** Stat for total number of load blocked events. */ + Stats::Scalar memLoadBlockedEvents; + /** Stat for total number of mispredicted branches detected at execute. */ + Stats::Scalar branchMispredicts; + + /** Stat for total number of executed instructions. */ + Stats::Scalar executeExecutedInsts; + /** Stat for total number of executed inst blocks. */ + Stats::Scalar executeExecutedInstBlocks; + /** Number of executed memory references. */ + Stats::Scalar executeExecutedRefs; + /** Stat for total number of executed load instructions. */ + Stats::Scalar executeExecLoadInsts; + /** Number of executed store instructions. */ + Stats::Formula executeExecStoreInsts; + /** Stat for total number of squashed instructions skipped at execute. */ + Stats::Scalar executeExecSquashedInsts; + /** Stat for total number of squashed inst blocks skipped at execute. */ + Stats::Scalar executeExecSquashedInstBlocks; + /** Stat for total number of executed reg-read instructions. */ + Stats::Scalar executeExecRegReads; + /** Stat for total number of executed reg-write instructions. */ + Stats::Scalar executeExecRegWrites; + /** Number of executed register accesses. */ + Stats::Formula executeExecRegAcc; + /** Stat for total number of nullified instructions. */ + Stats::Formula executeExecNullifiedInsts; + /** Stat for total number of nullified stores. */ + Stats::Scalar executeExecNullifiedStores; + /** Stat for total number of nullified loads. */ + Stats::Scalar executeExecNullifiedLoads; + /** Stat for total number of nullified reg-writes. */ + Stats::Scalar executeExecNullifiedWrites; + /** Stat for total number of nullified branches. */ + Stats::Scalar executeExecNullifiedBranches; + /** Stat for total number of nullified normal insts. */ + Stats::Scalar executeExecNullifiedNormals; + /** Number of instructions executed per cycle. */ + Stats::Formula executeExecRate; + /** Number of cycles have available insts. */ + Stats::Scalar executeExecAvailCycles; + + /** Number of instructions sent to commit. */ + Stats::Scalar executeInstBlocksToCommit; + + /** Distribution of in-flight blocks. */ + Stats::Distribution executeInflightInstBlocks; +}; + +#endif // __CPU_EDGE_EXE_HH__ diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/execute.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/execute.cc Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#include "cpu/edge/isa_specific.hh" +#include "cpu/edge/execute_impl.hh" +#include "cpu/edge/inst_queue.hh" + +template class SimpleEdgeExecute; diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/execute_impl.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/execute_impl.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,1897 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#include + +#include "base/timebuf.hh" +#include "config/the_isa.hh" +#include "cpu/edge/fu_pool.hh" +#include "cpu/edge/execute.hh" +#include "params/SimpleEdgeCPU.hh" + +using namespace std; + +template +SimpleEdgeExecute::SimpleEdgeExecute(CPU *_cpu, SimpleEdgeCPUParams *params) + : issueToExecQueue(params->backComSize, params->forwardComSize), + cpu(_cpu), + instQueue(_cpu, this, params), + ldstQueue(_cpu, this, params), + fuPool(params->fuPool), + commitToExecuteDelay(params->commitToExecuteDelay), + mapToExecuteDelay(params->mapToExecuteDelay), + issueToExecuteDelay(params->issueToExecuteDelay), + dispatchWidth(params->dispatchWidth), + issueWidth(params->issueWidth), + wbOutstanding(0), + wbWidth(params->wbWidth), + numThreads(params->numThreads), + switchedOut(false) +{ + _status = Active; + exeStatus = Running; + wbStatus = Idle; + + // Setup wire to read instructions coming from issue. + fromIssue = issueToExecQueue.getWire(-issueToExecuteDelay); + + // Instruction queue needs the queue between issue and execute. + instQueue.setIssueToExecuteQueue(&issueToExecQueue); + + for (ThreadID tid = 0; tid < numThreads; tid++) { + dispatchStatus[tid] = Running; + stalls[tid].commit = false; + fetchRedirect[tid] = false; + } + + wbMax = wbWidth * params->wbDepth; + + updateLSQNextCycle = false; + + ableToIssue = true; + + skidBufferMax = (3 * (mapToExecuteDelay * params->mapWidth)) + issueWidth; + +} + +template +std::string +SimpleEdgeExecute::name() const +{ + return cpu->name() + ".execute"; +} + +template +void +SimpleEdgeExecute::regStats() +{ + using namespace Stats; + + instQueue.regStats(); + ldstQueue.regStats(); + + executeIdleCycles + .name(name() + ".executeIdleCycles") + .desc("Number of cycles execute is idle"); + + executeSquashCycles + .name(name() + ".executeSquashCycles") + .desc("Number of cycles execute is squashing"); + + executeBlockCycles + .name(name() + ".executeBlockCycles") + .desc("Number of cycles execute is blocking"); + + executeUnblockCycles + .name(name() + ".executeUnblockCycles") + .desc("Number of cycles execute is unblocking"); + + executeDispatchedInsts + .name(name() + ".executeDispatchedInsts") + .desc("Number of instructions dispatched to IQ"); + + executeDispNopInsts + .name(name() + ".executeDispatchedNopInsts") + .desc("Number of NOPs dispatched to IQ"); + + executeDispUsefulInsts + .name(name() + ".executeDispatchedUsefulInsts") + .desc("Number of instructions without NOP dispatched to IQ"); + executeDispUsefulInsts = executeDispatchedInsts - executeDispNopInsts; + + executeDispatchedInstBlocks + .name(name() + ".executeDispatchedInstBlocks") + .desc("Number of inst blocks dispatched to IQ"); + + executeDispSquashedInsts + .name(name() + ".executeDispSquashedInsts") + .desc("Number of squashed insts skipped by dispatch"); + + executeDispSquashedInstBlocks + .name(name() + ".executeDispSquashedInstBlocks") + .desc("Number of squashed inst blocks skipped by dispatch"); + + executeDispLoadInsts + .name(name() + ".executeDispLoadInsts") + .desc("Number of dispatched load instructions"); + + executeDispStoreInsts + .name(name() + ".executeDispStoreInsts") + .desc("Number of dispatched store instructions"); + + executeDispNonSpecInstBlocks + .name(name() + ".executeDispNonSpecInstBlocks") + .desc("Number of dispatched non-speculative inst blocks"); + + executeDispSpecInstBlocks + .name(name() + ".executeDispSpecInstBlocks") + .desc("Number of dispatched speculative inst blocks"); + executeDispSpecInstBlocks = executeDispatchedInstBlocks + - executeDispNonSpecInstBlocks; + + executeDispNonSpecInsts + .name(name() + ".executeDispNonSpecInsts") + .desc("Number of dispatched non-speculative instructions"); + + executeDispSpecInsts + .name(name() + ".executeDispSpecInsts") + .desc("Number of dispatched speculative instructions"); + executeDispSpecInsts = executeDispatchedInsts - executeDispNonSpecInsts; + + executeIQFullEvents + .name(name() + ".executeIQFullEvents") + .desc("Number of times the IQ has become full, causing a stall"); + + executeLSQFullEvents + .name(name() + ".executeLSQFullEvents") + .desc("Number of times the LSQ has become full, causing a stall"); + + executeInFlightFullEvents + .name(name() + ".executeInflightFullEvents") + .desc("Number of times the max number of in-flight inst blocks reached, causing a stall"); + + memOrderViolationEvents + .name(name() + ".memOrderViolationEvents") + .desc("Number of memory order violations"); + + memLoadBlockedEvents + .name(name() + ".memLoadBlockedEvents") + .desc("Number of load blocked times"); + + branchMispredicts + .name(name() + ".branchMispredicts") + .desc("Number of inst block mispredicts detected at execute"); + + executeExecutedInsts + .name(name() + ".executeExecutedInsts") + .desc("Number of executed instructions"); + + executeExecutedInstBlocks + .name(name() + ".executeExecutedInstBlocks") + .desc("Number of executed inst blocks"); + + executeExecutedRefs + .name(name() + ".EXEC:refs") + .desc("number of memory reference insts executed"); + + executeExecLoadInsts + .name(name() + ".executeExecLoadInsts") + .desc("Number of load instructions executed"); + + executeExecStoreInsts + .name(name() + ".EXEC:stores") + .desc("Number of stores executed"); + executeExecStoreInsts = executeExecutedRefs - executeExecLoadInsts; + + executeExecSquashedInsts + .name(name() + ".executeExecSquashedInsts") + .desc("Number of squashed instructions skipped in execute"); + + executeExecSquashedInstBlocks + .name(name() + ".executeExecSquashedInstBlocks") + .desc("Number of squashed inst blocks skipped in execute"); + + executeExecRegReads + .name(name() + ".EXEC:reg-reads") + .desc("Number of register read executed"); + + executeExecRegWrites + .name(name() + ".EXEC:reg-writes") + .desc("Number of register write executed"); + + executeExecRegAcc + .name(name() + ".EXEC:reg-Acc") + .desc("Number of register accesses executed"); + executeExecRegAcc = executeExecRegReads + executeExecRegWrites; + + executeExecAvailCycles + .name(name() + ".EXEC:availableCycles") + .desc("Number of available cycles"); + + executeExecRate + .name(name() + ".EXEC:rate") + .desc("Inst execution rate"); + executeExecRate = executeExecutedInsts / executeExecAvailCycles; + + executeExecNullifiedInsts + .name(name() + ".EXEC:nullifiedInsts") + .desc("Number of nullified insts"); + executeExecNullifiedInsts = executeExecNullifiedNormals + + executeExecNullifiedBranches + + executeExecNullifiedLoads + + executeExecNullifiedStores + + executeExecNullifiedWrites; + + executeExecNullifiedStores + .name(name() + ".EXEC:nullified-stores") + .desc("Number of nullified stores"); + + executeExecNullifiedLoads + .name(name() + ".EXEC:nullified-loads") + .desc("Number of nullified loads"); + + executeExecNullifiedWrites + .name(name() + ".EXEC:nullified-writes") + .desc("Number of nullified writes"); + + executeExecNullifiedBranches + .name(name() + ".EXEC:nullified-branches") + .desc("Number of nullified branches"); + + executeExecNullifiedNormals + .name(name() + ".EXEC:nullified-normals") + .desc("Number of nullified normals"); + + executeInstBlocksToCommit + .name(name() + ".instBlocksToCommit") + .desc("Number of inst blocks sent to commit"); + + executeInflightInstBlocks + .init(0,Impl::MaxInFlightBlockNum,1) + .name(name() + ".EXE: In-flightInstBlocks") + .desc("Number of inst blocks in flight each cycle") + .flags(Stats::pdf) + ; +} + +template +void +SimpleEdgeExecute::initStage() +{ + for (ThreadID tid = 0; tid < numThreads; tid++) { + toMap->executeInfo[tid].usedIQ = true; + toMap->executeInfo[tid].freeIQEntries = + instQueue.numFreeEntries(tid); + + toMap->executeInfo[tid].usedLSQ = true; + toMap->executeInfo[tid].freeLSQEntries = + ldstQueue.numFreeEntries(tid); + } + + cpu->activateStage(CPU::ExecuteIdx); + + // Init frame id to 0 + currFrameID = 0; +} + +template +void +SimpleEdgeExecute::setTimeBuffer(TimeBuffer *tb_ptr) +{ + timeBuffer = tb_ptr; + + // Setup wire to read information from time buffer, from commit. + fromCommit = timeBuffer->getWire(-commitToExecuteDelay); + + // Setup wire to write information back to previous stages. + toMap = timeBuffer->getWire(0); + + toFetch = timeBuffer->getWire(0); + + // Instruction queue also needs main time buffer. + instQueue.setTimeBuffer(tb_ptr); +} + +template +void +SimpleEdgeExecute::setMapQueue(TimeBuffer *rq_ptr) +{ + map2executeQueue = rq_ptr; + + // Setup wire to read information from rename queue. + fromMap = map2executeQueue->getWire(-mapToExecuteDelay); +} + +template +void +SimpleEdgeExecute::setExecuteQueue(TimeBuffer *iq_ptr) +{ + execute2commitQueue = iq_ptr; + + // Setup wire to write instructions to commit. + toCommit = execute2commitQueue->getWire(0); +} + +template +void +SimpleEdgeExecute::setActiveThreads(list *at_ptr) +{ + activeThreads = at_ptr; + + ldstQueue.setActiveThreads(at_ptr); + instQueue.setActiveThreads(at_ptr); +} + +template +bool +SimpleEdgeExecute::drain() +{ + // IEW is ready to drain at any time. + cpu->signalDrained(); + return true; +} + +template +void +SimpleEdgeExecute::resume() +{ +} + +template +void +SimpleEdgeExecute::switchOut() +{ + // Clear any state. + switchedOut = true; + assert(instBlocks[0].empty()); + assert(skidBuffer[0].empty()); + + instQueue.switchOut(); + ldstQueue.switchOut(); + fuPool->switchOut(); + + for (ThreadID tid = 0; tid < numThreads; tid++) { + while (!instBlocks[tid].empty()) + instBlocks[tid].pop(); + while (!skidBuffer[tid].empty()) + skidBuffer[tid].pop(); + } +} + +template +void +SimpleEdgeExecute::takeOverFrom() +{ + // Reset all state. + _status = Active; + exeStatus = Running; + wbStatus = Idle; + switchedOut = false; + + instQueue.takeOverFrom(); + ldstQueue.takeOverFrom(); + fuPool->takeOverFrom(); + + initStage(); + cpu->activityThisCycle(); + + for (ThreadID tid = 0; tid < numThreads; tid++) { + dispatchStatus[tid] = Running; + stalls[tid].commit = false; + fetchRedirect[tid] = false; + } + + updateLSQNextCycle = false; + + for (int i = 0; i < issueToExecQueue.getSize(); ++i) { + issueToExecQueue.advance(); + } +} + +template +void +SimpleEdgeExecute::squash(ThreadID tid) +{ + DPRINTF(EdgeExe, "[tid:%i]: Squashing all Blocks.\n", tid); + + // Tell the IQ to start squashing. + instQueue.squash(tid); + + // Tell the LDSTQ to start squashing. + ldstQueue.squash(fromCommit->commitInfo[tid].doneBlockID, tid); + + ExeBlockIt block_it = instBlocksInExecuting.begin(); + ExeBlockIt end = instBlocksInExecuting.end(); + + while ( block_it != end ) { + if ( block_it->first > fromCommit->commitInfo[tid].doneBlockID ) { + DPRINTF(EdgeExe, "[tid:%i]: Removing inst blocks[Bid:%lli] in executing data structure.\n", + tid, block_it->first); + + executeExecSquashedInsts += (block_it->second->getNumAllInst()); + + // See Nicolai M Josuttis_The C++ Standard Library + instBlocksInExecuting.erase(block_it++); + // Decrease the frame id to ensure the consistency in IQ frames. + decCurrFrameID(); + + executeExecSquashedInstBlocks ++; + } else { + ++ block_it; + } + } + + updatedQueues = true; + + // Clear the skid buffer in case it has any data in it. + DPRINTF(EdgeExe, "[tid:%i]: Removing skidbuffer Blocks until [id:%i].\n", + tid, fromCommit->commitInfo[tid].doneBlockID); + + while (!skidBuffer[tid].empty()) { + + toMap->executeInfo[tid].dispatched++; + + skidBuffer[tid].pop(); + } + + emptyMapInstBlocks(tid); + +} + +template +void +SimpleEdgeExecute::squashDueToSyscall(BlockPtr &inst_block, ThreadID tid) +{ + DPRINTF(EdgeExe, "[tid:%i]: Squashing due to syscall, PC: %#x " + "[id:%i].\n", tid, inst_block->getStartPC(), inst_block->getBlockID()); + + assert(inst_block->isNeedSyscall()); + + toCommit->squash[tid] = true; + toCommit->squashedSeqNum[tid] = inst_block->getBlockID(); + toCommit->nextPC[tid] = inst_block->getBranchTarget(); + + toCommit->branchMispredict[tid] = false; + + toCommit->includeSquashInstBlock[tid] = false; + + wroteToTimeBuffer = true; +} + +template +void +SimpleEdgeExecute::squashDueToBranch(BlockPtr &inst_block, ThreadID tid) +{ + DPRINTF(EdgeExe, "[tid:%i]: Squashing from a specific block, PC: %#x " + "[id:%i].\n", tid, inst_block->getStartPC(), inst_block->getBlockID()); + + toCommit->squash[tid] = true; + toCommit->squashedSeqNum[tid] = inst_block->getBlockID(); + toCommit->mispredPC[tid] = inst_block->getStartPC(); + toCommit->branchMispredict[tid] = true; + toCommit->exitID = inst_block->getExitID(); + toCommit->exitType = inst_block->getExitType(); + + toCommit->exitID = inst_block->getExitID(); + toCommit->exitType = inst_block->getExitType(); + + toCommit->branchTaken[tid] = + inst_block->getNextBlockPC() != inst_block->getBranchTarget(); + + toCommit->nextPC[tid] = inst_block->getBranchTarget(); + + toCommit->includeSquashInstBlock[tid] = false; + + wroteToTimeBuffer = true; +} + +template +void +SimpleEdgeExecute::squashDueToMemOrder(BlockPtr &inst_block, ThreadID tid) +{ + DPRINTF(EdgeExe, "[tid:%i]: Squashing from a specific block, " + "PC: %#x [id:%i].\n", tid, inst_block->getStartPC(), inst_block->getBlockID()); + + toCommit->squash[tid] = true; + toCommit->squashedSeqNum[tid] = inst_block->getBlockID(); + toCommit->nextPC[tid] = inst_block->getNextBlockPC(); + + toCommit->branchMispredict[tid] = false; + + toCommit->includeSquashInstBlock[tid] = false; + + wroteToTimeBuffer = true; +} + +template +void +SimpleEdgeExecute::squashDueToMemBlocked(BlockPtr &inst_block, ThreadID tid) +{ + DPRINTF(EdgeExe, "[tid:%i]: Memory blocked, squashing this inst block, " + "PC: %#x [id:%i].\n", tid, inst_block->getStartPC(), inst_block->getBlockID()); + + toCommit->squash[tid] = true; + toCommit->squashedSeqNum[tid] = inst_block->getBlockID(); + toCommit->nextPC[tid] = inst_block->getStartPC(); + + toCommit->branchMispredict[tid] = false; + + // Must include the broadcasted SN in the squash. + toCommit->includeSquashInstBlock[tid] = true; + + ldstQueue.setLoadBlockedHandled(tid); + + wroteToTimeBuffer = true; + + // Set the inst block as squashed + inst_block->setSquashed(); + +} + +template +void +SimpleEdgeExecute::block(ThreadID tid) +{ + DPRINTF(EdgeExe, "[tid:%u]: Blocking.\n", tid); + + if (dispatchStatus[tid] != Blocked && + dispatchStatus[tid] != Unblocking) { + toMap->executeBlock[tid] = true; + wroteToTimeBuffer = true; + } + + // Add the current inputs to the skid buffer so they can be + // reprocessed when this stage unblocks. + skidInsert(tid); + + dispatchStatus[tid] = Blocked; +} + +template +void +SimpleEdgeExecute::unblock(ThreadID tid) +{ + DPRINTF(EdgeExe, "[tid:%i]: Reading instructions out of the skid " + "buffer %u.\n",tid, tid); + + // If the skid bufffer is empty, signal back to previous stages to unblock. + // Also switch status to running. + if (skidBuffer[tid].empty()) { + toMap->executeUnblock[tid] = true; + wroteToTimeBuffer = true; + DPRINTF(EdgeExe, "[tid:%i]: Done unblocking.\n",tid); + dispatchStatus[tid] = Running; + } +} + +// +// @todo: Operand network should be placed here. +// +template +void +SimpleEdgeExecute::wakeDependents(DynInstPtr &inst) +{ + instQueue.wakeDependents(inst); +} + +template +void +SimpleEdgeExecute::rescheduleMemInst(DynInstPtr &inst) +{ + instQueue.rescheduleMemInst(inst); +} + +template +void +SimpleEdgeExecute::replayMemInst(DynInstPtr &inst) +{ + instQueue.replayMemInst(inst); +} + +template +int +SimpleEdgeExecute::getCurrFrameID() +{ + if ( (++currFrameID) >= Impl::MaxFrameNum ) { + currFrameID = 0; + } + + return currFrameID; +} + +template +void +SimpleEdgeExecute::decCurrFrameID() +{ + if ( (--currFrameID) < 0 ){ + currFrameID = Impl::MaxFrameNum - 1; + } +} + +template +void +SimpleEdgeExecute::instBlockToCommit(BlockPtr &inst_block) +{ + // Right now, most of the codes is inherited from O3 model. + // It works right however, we should re-consider all of this + // codes when I am free. + while ((*execute2commitQueue)[wbCycle].instBlocks[wbNumInstBlock]) { + ++wbNumInstBlock; + if (wbNumInstBlock == wbWidth) { + ++wbCycle; + wbNumInstBlock = 0; + } + + assert((wbCycle * wbWidth + wbNumInstBlock) <= wbMax); + } + + DPRINTF(EdgeExe, "Current wb cycle: %i, width: %i, numInstB lock: %i\nwbActual:%i\n", + wbCycle, wbWidth, wbNumInstBlock, wbCycle * wbWidth + wbNumInstBlock); + // Add finished inst block to queue to commit. + (*execute2commitQueue)[wbCycle].instBlocks[wbNumInstBlock] = inst_block; + (*execute2commitQueue)[wbCycle].size++; + + executeInstBlocksToCommit ++; + +} + +template +unsigned +SimpleEdgeExecute::validInstBlocksFromMap() +{ + unsigned inst_count = 0; + + for (int i=0; isize; i++) { + if (!fromMap->instBlocks[i]->isSquashed()) + inst_count++; + } + + return inst_count; +} + +template +void +SimpleEdgeExecute::skidInsert(ThreadID tid) +{ + BlockPtr inst_block = NULL; + + while (!instBlocks[tid].empty()) { + inst_block = instBlocks[tid].front(); + + instBlocks[tid].pop(); + + DPRINTF(EdgeExe,"[tid:%i]: Inserting [id:%lli] PC:%#x into " + "dispatch skidBuffer %i\n",tid, inst_block->getBlockID(), + inst_block->getStartPC(),tid); + + skidBuffer[tid].push(inst_block); + } + + assert(skidBuffer[tid].size() <= skidBufferMax && + "Skidbuffer Exceeded Max Size"); +} + +template +int +SimpleEdgeExecute::skidCount() +{ + int max=0; + + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + unsigned thread_count = skidBuffer[tid].size(); + if (max < thread_count) + max = thread_count; + } + + return max; +} + +template +bool +SimpleEdgeExecute::skidsEmpty() +{ + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + if (!skidBuffer[tid].empty()) + return false; + } + + return true; +} + +template +void +SimpleEdgeExecute::updateStatus() +{ + bool any_unblocking = false; + + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + if (dispatchStatus[tid] == Unblocking) { + any_unblocking = true; + break; + } + } + + // If there are no ready instructions waiting to be scheduled by the IQ, + // and there's no stores waiting to write back, and dispatch is not + // unblocking, then there is no internal activity for the Execute stage. + if (_status == Active && !instQueue.hasReadyInsts() && + !ldstQueue.willWB() && !any_unblocking) { + DPRINTF(EdgeExe, "Execute switching to idle\n"); + + deactivateStage(); + + _status = Inactive; + } else if (_status == Inactive && (instQueue.hasReadyInsts() || + ldstQueue.willWB() || + any_unblocking)) { + // Otherwise there is internal activity. Set to active. + DPRINTF(EdgeExe, "Execute switching to active\n"); + + activateStage(); + + _status = Active; + } +} + +template +void +SimpleEdgeExecute::resetEntries() +{ + instQueue.resetEntries(); + ldstQueue.resetEntries(); +} + +template +void +SimpleEdgeExecute::readStallSignals(ThreadID tid) +{ + if (fromCommit->commitBlock[tid]) { + stalls[tid].commit = true; + } + + if (fromCommit->commitUnblock[tid]) { + assert(stalls[tid].commit); + stalls[tid].commit = false; + } +} + +template +bool +SimpleEdgeExecute::checkStall(ThreadID tid) +{ + bool ret_val(false); + + if (stalls[tid].commit) { + DPRINTF(EdgeExe,"[tid:%i]: Stall from Commit stage detected.\n",tid); + ret_val = true; + } else if (instQueue.isFull(tid)) { + DPRINTF(EdgeExe,"[tid:%i]: Stall: IQ is full.\n",tid); + ret_val = true; + } else if (ldstQueue.isFull(tid)) { + DPRINTF(EdgeExe,"[tid:%i]: Stall: LSQ is full\n",tid); + + if (ldstQueue.numLoads(tid) > 0 ) { + + DPRINTF(EdgeExe,"[tid:%i]: LSQ oldest load: [sn:%i] \n", + tid,ldstQueue.getLoadHeadSeqNum(tid)); + } + + if (ldstQueue.numStores(tid) > 0) { + + DPRINTF(EdgeExe,"[tid:%i]: LSQ oldest store: [sn:%i] \n", + tid,ldstQueue.getStoreHeadSeqNum(tid)); + } + + ret_val = true; + } else if (ldstQueue.isStalled(tid)) { + DPRINTF(EdgeExe,"[tid:%i]: Stall: LSQ stall detected.\n",tid); + ret_val = true; + } + + return ret_val; +} + +template +void +SimpleEdgeExecute::checkSignalsAndUpdate(ThreadID tid) +{ + // Check if there's a squash signal, squash if there is + // Check stall signals, block if there is. + // If status was Blocked + // if so then go to unblocking + // If status was Squashing + // check if squashing is not high. Switch to running this cycle. + + readStallSignals(tid); + + if (fromCommit->commitInfo[tid].squash) { + squash(tid); + + if (dispatchStatus[tid] == Blocked || + dispatchStatus[tid] == Unblocking) { + toMap->executeUnblock[tid] = true; + wroteToTimeBuffer = true; + } + + dispatchStatus[tid] = Squashing; + + fetchRedirect[tid] = false; + return; + } + + if (fromCommit->commitInfo[tid].robSquashing) { + DPRINTF(EdgeExe, "[tid:%i]: ROB is still squashing.\n", tid); + + dispatchStatus[tid] = Squashing; + + emptyMapInstBlocks(tid); + wroteToTimeBuffer = true; + return; + } + + if (checkStall(tid)) { + block(tid); + dispatchStatus[tid] = Blocked; + return; + } + + if (dispatchStatus[tid] == Blocked) { + // Status from previous cycle was blocked, but there are no more stall + // conditions. Switch over to unblocking. + DPRINTF(EdgeExe, "[tid:%i]: Done blocking, switching to unblocking.\n", + tid); + + dispatchStatus[tid] = Unblocking; + + unblock(tid); + + return; + } + + if (dispatchStatus[tid] == Squashing) { + // Switch status to running if rename isn't being told to block or + // squash this cycle. + DPRINTF(EdgeExe, "[tid:%i]: Done squashing, switching to running.\n", + tid); + + dispatchStatus[tid] = Running; + + return; + } +} + +template +void +SimpleEdgeExecute::sortInstBlocks() +{ + int blocks_from_map = fromMap->size; + +#ifdef DEBUG + for (ThreadID tid = 0; tid < numThreads; tid++) + assert(instBlocks[tid].empty()); +#endif + + for (int i = 0; i < blocks_from_map; ++i) { + instBlocks[(fromMap->instBlocks[i])->getTid()].push(fromMap->instBlocks[i]); + } + +} + +template +void +SimpleEdgeExecute::emptyMapInstBlocks(ThreadID tid) +{ + DPRINTF(EdgeExe, "[tid:%i]: Removing incoming map blocks\n", tid); + + while (!instBlocks[tid].empty()) { + instBlocks[tid].pop(); + } +} + +template +void +SimpleEdgeExecute::wakeCPU() +{ + cpu->wakeCPU(); +} + +template +void +SimpleEdgeExecute::activityThisCycle() +{ + DPRINTF(Activity, "Activity this cycle.\n"); + cpu->activityThisCycle(); +} + +template +inline void +SimpleEdgeExecute::activateStage() +{ + DPRINTF(Activity, "Activating stage.\n"); + cpu->activateStage(CPU::ExecuteIdx); +} + +template +inline void +SimpleEdgeExecute::deactivateStage() +{ + DPRINTF(Activity, "Deactivating stage.\n"); + cpu->deactivateStage(CPU::ExecuteIdx); +} + +template +void +SimpleEdgeExecute::dispatch(ThreadID tid) +{ + // If status is Running or idle, + // call dispatchInsts() + // If status is Unblocking, + // buffer any instructions coming from rename + // continue trying to empty skid buffer + // check if stall conditions have passed + + if (dispatchStatus[tid] == Blocked) { + ++executeBlockCycles; + + } else if (dispatchStatus[tid] == Squashing) { + ++executeSquashCycles; + } + + // Dispatch should try to dispatch as many instructions as its bandwidth + // will allow, as long as it is not currently blocked. + if (dispatchStatus[tid] == Running || + dispatchStatus[tid] == Idle) { + DPRINTF(EdgeExe, "[tid:%i] Not blocked, so attempting to run " + "dispatch.\n", tid); + + dispatchInsts(tid); + } else if (dispatchStatus[tid] == Unblocking) { + // Make sure that the skid buffer has something in it if the + // status is unblocking. + assert(!skidsEmpty()); + + // If the status was unblocking, then instructions from the skid + // buffer were used. Remove those instructions and handle + // the rest of unblocking. + dispatchInsts(tid); + + ++executeUnblockCycles; + + if (validInstBlocksFromMap()) { + // Add the current inputs to the skid buffer so they can be + // reprocessed when this stage unblocks. + skidInsert(tid); + } + + unblock(tid); + } +} + +template +void +SimpleEdgeExecute::dispatchInsts(ThreadID tid) +{ + // Obtain instructions from skid buffer if unblocking, or queue from map + // otherwise. + std::queue &inst_blocks_to_dispatch = + dispatchStatus[tid] == Unblocking ? + skidBuffer[tid] : instBlocks[tid]; + + int inst_blocks_to_add = inst_blocks_to_dispatch.size(); + + BlockPtr inst_block; + int dis_num_inst_block = 0; + + // Loop through the instructions, putting them in the instruction + // queue. + for ( ; dis_num_inst_block < inst_blocks_to_add && + dis_num_inst_block < dispatchWidth; + ++dis_num_inst_block) { + + inst_block = inst_blocks_to_dispatch.front(); + + if (dispatchStatus[tid] == Unblocking) { + DPRINTF(EdgeExe, "[tid:%i]: Issue: Examining instruction from skid " + "buffer\n", tid); + } + + // Make sure there's a valid instruction block there. + assert(inst_block); + + DPRINTF(EdgeExe, "[tid:%i]: Issue: Adding PC %#x [id:%lli] [tid:%i] to " + "IQ.\n", + tid, inst_block->getStartPC(), + inst_block->getBlockID(), + inst_block->getTid()); + + // Be sure to mark these instructions as ready so that the + // commit stage can go ahead and execute them, and mark + // them as issued so the IQ doesn't reprocess them. + + // Check for squashed instructions. + if (inst_block->isSquashed()) { + DPRINTF(EdgeExe, "[tid:%i]: Issue: Squashed instruction encountered, " + "not adding to IQ.\n", tid); + + ++executeDispSquashedInstBlocks; + + executeDispSquashedInsts += inst_block->getNumAllInst(); + + inst_blocks_to_dispatch.pop(); + + toMap->executeInfo[tid].dispatched++; + + continue; + } + + // Check for full conditions. + if (instQueue.numFreeEntries(tid) < inst_block->getNumInst() ) { + + DPRINTF(EdgeExe, "[tid:%i]: Issue: Not enough IQ entries for inst block. " + "IQ has %i freeentries, while %i insts to dispatch.\n", + tid,instQueue.numFreeEntries(tid), + inst_block->getNumInst()); + + // Call function to start blocking. + block(tid); + + // Set unblock to false. Special case where we are using + // skidbuffer (unblocking) instructions but then we still + // get full in the IQ. + toMap->executeUnblock[tid] = false; + + ++executeIQFullEvents; + break; + + } else if (ldstQueue.numFreeEntries(tid) < inst_block->getNumMemRefInst() ) { + + DPRINTF(EdgeExe, "[tid:%i]: Issue: Not enough LSQ entries for inst block\n",tid); + + // Call function to start blocking. + block(tid); + + // Set unblock to false. Special case where we are using + // skidbuffer (unblocking) instructions but then we still + // get full in the IQ. + toMap->executeUnblock[tid] = false; + + ++executeLSQFullEvents; + break; + } else if ( instBlocksInExecuting.size() >= Impl::MaxInFlightBlockNum ) { + + DPRINTF(EdgeExe, "[tid:%i]: Issue: Max num of inflight blocks is reached. \n",tid); + + block(tid); + + toMap->executeUnblock[tid] = false; + + ++executeInFlightFullEvents; + break; + } + + int dis_num_inst_in_block = 0; + int dis_num_mem_ref_inst_in_block = 0; + int num_inst_in_block = inst_block->getNumInst(); + int num_mem_ref_inst_in_block = inst_block->getNumMemRefInst(); + + // Allocate a frame to this inst block. + inst_block->setFrameID ( this->getCurrFrameID() ); + DPRINTF(EdgeExe,"Issue to frame [id:%i].\n", inst_block->getFrameID()); + + // Put instructions of the dispatching inst block into + // IQ and LSQ respectively. + while ( dis_num_inst_in_block < num_inst_in_block) { + + DynInstPtr inst = inst_block->getInst(); + if ( inst->isNop() ) { + + // Add nop to inst queue for simplicity of wakup process + instQueue.insert(inst); + + executeDispNopInsts ++; + } else { + + // All inst should be inserted into inst queue. + instQueue.insert(inst); + } + + toMap->executeInfo[tid].dispatched++; + dis_num_inst_in_block ++; + + executeDispatchedInsts ++; + } + + while ( dis_num_mem_ref_inst_in_block < num_mem_ref_inst_in_block ) { + + DynInstPtr inst = inst_block->getMemRefInst(); + + assert(inst->isMemRef()); + + instQueue.buildMemDependent(inst); + + if ( inst->isLoad() ) { + + // Add load inst into ldst queue + ldstQueue.insertLoad(inst); + executeDispLoadInsts ++; + + } else if ( inst->isStore() ) { + + // Add store inst into ldst queue + ldstQueue.insertStore(inst); + executeDispStoreInsts ++; + + } + + dis_num_mem_ref_inst_in_block ++; + + } + + //Add dispatched inst block into executing list + inst_block->setExecuting(); + + //ldstQueue.setStoreMask(inst_block); + + instBlocksInExecuting.insert( + std::make_pair(inst_block->getBlockID(), inst_block)); + + inst_blocks_to_dispatch.pop(); + + ++ executeDispatchedInstBlocks; + + if (inst_block->isNonSpeculative() ){ + executeDispNonSpecInstBlocks ++; + executeDispNonSpecInsts += num_inst_in_block; + } + } + + if (!inst_blocks_to_dispatch.empty()) { + + DPRINTF(EdgeExe,"[tid:%i]: Issue: Bandwidth Full. Blocking.\n", tid); + + block(tid); + toMap->executeUnblock[tid] = false; + + } + + if (dispatchStatus[tid] == Idle && dis_num_inst_block) { + dispatchStatus[tid] = Running; + + updatedQueues = true; + } + +} + +template +void +SimpleEdgeExecute::printAvailableInsts() +{ + int inst = 0; + + std::cout << "Available Instructions: "; + + while (fromIssue->insts[inst]) { + + if (inst%3==0) std::cout << "\n\t"; + + std::cout << "PC: " << fromIssue->insts[inst]->readPC() + << " TN: " << fromIssue->insts[inst]->threadNumber + << " SN: " << fromIssue->insts[inst]->seqNum << " | "; + + inst++; + + } + + std::cout << "\n"; +} + +template +void +SimpleEdgeExecute::executeInstBlocks() +{ + wbNumInstBlock = 0; + wbCycle = 0; + + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + fetchRedirect[tid] = false; + } + + // Uncomment this if you want to see all available instructions. + //printAvailableInsts(); + + // Execute any instructions that are available. + int insts_to_execute = fromIssue->size; + + if ( insts_to_execute > 0 ) { + ++executeExecAvailCycles; + } + + int inst_num = 0; + + for (; inst_num < insts_to_execute; + ++inst_num) { + + DPRINTF(EdgeExe, "Execute: Executing instructions from IQ.\n"); + + // Get instructions from IQ to execute. + DynInstPtr inst = instQueue.getInstToExecute(); + + // Can never execute nop inst. + assert(!inst->isNop()); + + DPRINTF(EdgeExe, "Execute: Processing PC %#x, [Tid:%i][Bid:%i] [Iid:%i].\n", + inst->readPC(), inst->threadNumber, + inst->getBlockID(), inst->getInstID()); + + // Insts are from inst blocks, so maybe they can never + // be set as squashed.. + // But I still reserve this codes for consistency with + // M5 and future use. + if (inst->isSquashed()) { + DPRINTF(EdgeExe, "Execute: Instruction was squashed.\n"); + + executeExecSquashedInsts ++; + + continue; + } + + Fault fault = NoFault; + bool can_wakeup = true; + + // Execute instructions. + // Note that if the instruction faults, it will be handled + // at the commit stage. + + // Note that the data token type will be set during wakeup + // if the inst is the propagator rather than the token + // generator + + // @todo: Arithmatic, reg-op and control insts could generate + // fault, need handling. + + if (inst->isMemRef()) { + DPRINTF(EdgeExe, "Execute: Mem-ref instructions, calculating address for memory " + "reference.\n"); + + // Tell the LDSTQ to execute this instruction (if it is a load). + if (inst->isLoad()) { + + // If load inst has been nullified, ignore it. + if ( !inst->isNullified() ) { + + // For load insts, wake dependents of consumers will be handled + // in the write-back event of load inst. + // if load has been nullified, wakeup its + // consumer and propagate the token as well. + can_wakeup = false; + + fault = ldstQueue.executeLoad(inst); + } else { + // @todo: Maybe we should propagate the null token rather than + // ignore it. + DPRINTF(EdgeExe, "Nullified load, ignore it\n"); + + executeExecNullifiedLoads ++; + + inst->setIntResult(0); + //inst->setExecuted(); + } + + // Do not mark load inst as executed here. + // It will be marked as executed once upon + // its writeback event happens. + //inst->setExecuted(); + + // If load caused a fault, we should set its output token + // to exception and set a fake result to wake up dependence. + if ( fault != NoFault ) { + DPRINTF(EdgeExe, "Load has fault!\n"); + + // A execption token generated. + inst->setDataflowTokenType ( TheISA::Exception ); + inst->setIntResult(0); + + // If the load got fault it will never write-back, + // so wake its consumers to propagate the fault. + can_wakeup = true; + } + + executeExecLoadInsts ++; + + } else if (inst->isStore()) { + + if ( !inst->isNullified()) { + fault = ldstQueue.executeStore(inst); + } else { + DPRINTF(EdgeExe, "Nullified store, ignore it\n"); + + // If store is nullified, some operations should be taken in + // LSQ so as to avoid write-back while committing. + ldstQueue.nullifyStore(inst); + + executeExecNullifiedStores ++; + } + + // Mark store inst as executed. + inst->setExecuted(); + + // Get current block of this store inst in executing + ExeBlockIt block_it = instBlocksInExecuting.find(inst->getBlockID()); + + // Increment executed number of stores in this block + (block_it->second)->incReceivedStore(); + + // This is for output debug + #ifndef NDEBUG + (block_it->second)->setStoreInstIt(inst->getInstQueueIt()); + #endif + + DPRINTF(EdgeExe, "Set store mask with lsid %i\n", inst->staticInst->getLSID() ); + + // Set store mask for block completion conditions + (block_it->second)->setStoreMask(inst->staticInst->getLSID()); + + if (fault != NoFault) { + // If the instruction faulted, then we need to set the fault + // to its block class so that commit can handle it. + + // There's no need for changing the store data token type + // to execption because it's the leaf of whole graph. + DPRINTF(EdgeExe, "Store has fault %s! [Bid:%lli][Iid:%lli]\n", + fault->name(), inst->getBlockID(), inst->getInstID() ); + + (block_it->second)->setFault(fault); + + inst->setExecuted(); + + activityThisCycle(); + + } else { + + // Store recevie fault from other insts. + + fault = inst->getFault(); + + if ( fault != NoFault ) { + + (block_it->second)->setFault(fault ); + + DPRINTF(EdgeExe, "Store receive fault %s [Bid%lli][Iid%lli], set it to block.\n", + fault->name(), inst->getBlockID(), inst->getInstID() ); + } + } + } else { + + panic("Unexpected memory type!\n"); + + } + + executeExecutedRefs ++; + + }else if ( inst->isGRegWR()) { + + // Executing register read. + if ( inst->isReadRegIssued() && !inst->isReadRegExecuted() ) { + assert(inst->isGRegReadValid()); + + // Reg read will never be nullified, no need to handle it. + + DPRINTF(EdgeExe, "Executing reg-read.\n" ); + + inst->setExecutingReadReg(); + inst->execute(); + + // Mark reg read as executed. + inst->setReadRegExecuted(); + + executeExecRegReads ++; + } else if ( inst->isWriteRegIssued() && !inst->isWriteRegExecuted() ) { + assert(inst->isGRegWriteValid()); + + // Write reg will always be leaf in graph and + // it will be executed during write back phase, + // so if it has been nullified, we will handle + // it in write-back phase. + // Write back will be casted after commit stages comfirmed it. + DPRINTF(EdgeExe, "Executing reg-write.\n"); + + // Mark the inst as executed for forwarding. + inst->setWriteRegExecuted(); + inst->setCanCommit(); + + // Complete it in reg dep graph. + instQueue.completeInRegDepGraph(inst); + + // Get current block in executing + ExeBlockIt block_it = instBlocksInExecuting.find(inst->getBlockID()); + + // Increment executed number of writes in this block + (block_it->second)->incReceivedWrite(); + + // This is for output debug + #ifndef NDEBUG + (block_it->second)->setWriteInstIt(inst->getInstQueueIt()); + #endif + + fault = inst->getFault(); + + if ( fault != NoFault ) { + + (block_it->second)->setFault(fault ); + + DPRINTF(EdgeExe, "Reg-write has fault %s [Bid%lli][Iid%lli], set it to block.\n", + fault->name(), inst->getBlockID(), inst->getInstID() ); + } + + // Reg-write has no consumer, no need to wakeup. + can_wakeup = false; + + executeExecRegWrites ++; + + if ( inst->isNullified() ) executeExecNullifiedWrites ++; + + } else { + panic("Don't know how to execute register inst.\n"); + } + + } else if (inst->isControl()) { + + ExeBlockIt block_it = instBlocksInExecuting.find(inst->getBlockID()); + + if ( !inst->isNullified() ) { + // Execute pc control inst + inst->execute(); + + // branch target has been set in execute of this control inst + //(block_it->second)->setBranchTarget(inst->blockBranchTarget()); + (block_it->second)->incReceivedExit(); + (block_it->second)->recordExitID(inst->staticInst->getEXIT()); + (block_it->second)->recordExitType(inst->staticInst->isCall(), + inst->staticInst->isReturn(),inst->staticInst->isIndirectCtrl()); + + if (inst->isSyscall()) { + + DPRINTF(EdgeExe, "Syscall inst, mark block as need syscall.\n"); + (block_it->second)->setNeedSyscall(); + + } + }else { + // Nullified control inst, ignore it. + DPRINTF(EdgeExe, "Control inst has been nullified\n"); + executeExecNullifiedBranches ++; + } + + inst->setExecuted(); + + fault = inst->getFault(); + if ( fault != NoFault ) { + + // Control inst receive fault from others, mark it in block + (block_it->second)->setFault(fault ); + + DPRINTF(EdgeExe, "Control has fault %s [Bid%lli][Iid%lli], set it to block.\n", + fault->name(), inst->getBlockID(), inst->getInstID() ); + } + } else { + + if ( !inst->isNullified() ) { + + inst->execute(); + + } else { + + // For consistency + inst->setIntResult(0); + DPRINTF(EdgeExe, "Normal inst nullified\n"); + + executeExecNullifiedNormals ++; + + } + + inst->setExecuted(); + + } + + // Wakeup consumers and propagate token with respect to + // different situations. + if ( can_wakeup ) { + + instQueue.wakeDependents(inst); + + } + + ThreadID tid = inst->threadNumber; + + if (ldstQueue.violation(tid)) { + + assert(inst->isMemRef()); + // If there was an ordering violation, then get the + // DynInst that caused the violation. Note that this + // clears the violation signal. + DynInstPtr violator; + violator = ldstQueue.getMemDepViolator(tid); + + DPRINTF(EdgeExe, "LDSTQ detected a violation. Violator PC: " + "%#x, inst PC: %#x. Addr is: %#x.\n", + violator->readPC(), inst->readPC(), inst->physEffAddr); + + //fetchRedirect[tid] = true; + + // Tell the instruction queue that a violation has occured. + instQueue.violation(inst, violator); + + // Record faults in corresponding inst blocks. + ExeBlockIt block_it = instBlocksInExecuting.find(inst->getBlockID()); + (block_it->second)->setFault(); + + // Squash. + squashDueToMemOrder(block_it->second,tid); + + ++memOrderViolationEvents; + } else if (ldstQueue.loadBlocked(tid) && + !ldstQueue.isLoadBlockedHandled(tid)) { + + //fetchRedirect[tid] = true; + + DPRINTF(EdgeExe, "Load operation couldn't execute because the " + "memory system is blocked. PC: %#x [sn:%lli]\n", + inst->readPC(), inst->seqNum); + + ExeBlockIt block_it = instBlocksInExecuting.find(inst->getBlockID()); + (block_it->second)->setFault(); + + squashDueToMemBlocked(block_it->second, tid); + } + + executeExecutedInsts ++; + + } + + // Update and record activity if we processed any instructions. + if (inst_num) { + if (exeStatus == Idle) { + exeStatus = Running; + } + + updatedQueues = true; + + cpu->activityThisCycle(); + } + + // Need to reset this in case a writeback event needs to write into the + // iew queue. That way the writeback event will write into the correct + // spot in the queue. + wbNumInstBlock = 0; + +} + +template +void +SimpleEdgeExecute::completeInstBlocks() +{ + // Check for completed blocks. + ExeBlockIt block_it = instBlocksInExecuting.begin(); + ExeBlockIt end = instBlocksInExecuting.end(); + + while ( block_it != end ) { + // Once the block completed, mark it and move it to commit. + // Remember the reg write insts and mem store insts in this + // block will be still existed in LSQ and IQ unless there's a + // commit signal coming from commit stage. Remove the inst + // block from in-executing structure when it really commits. + BlockPtr inst_block = block_it->second; + + // The inst block has been marked as completed, pass it. + if ( inst_block->isCompleted() ) { + ++block_it; + continue; + } + + if ( inst_block->isSquashed() ) { + + DPRINTF(EdgeExe, "Block[Bid:%lli@%#x is squashed.\n", + inst_block->getBlockID(), + inst_block->getStartPC()); + + inst_block->setCompleted(); + + instBlockToCommit(inst_block); + + } else if ( inst_block->isNop() ) { + + DPRINTF(EdgeExe, "Block[Bid:%lli]@%#x is a nop inst block, sent it to commit directly.\n", + inst_block->getBlockID(), + inst_block->getStartPC()); + + inst_block->setCompleted(); + + instBlockToCommit(inst_block); + + } else if ( inst_block->isCompletionCondSatisfied() + && inst_block->isNonSpeculative() ) { + + DPRINTF(EdgeExe, "Block[Bid:%lli]@%#x completed, send it to commit.\n", + inst_block->getBlockID(), + inst_block->getStartPC() ); + + inst_block->setCompleted(); + + instQueue.complete( inst_block ); + + if ( inst_block->misPredicted() ) { + DPRINTF(EdgeExe, "Miss prediction detected!\n"); + DPRINTF(EdgeExe, "Block[id:%lli]@%#x: real target: %#x, miss predicted target: %#x\n", + inst_block->getBlockID(), + inst_block->getStartPC(), + inst_block->getBranchTarget(), + inst_block->getPredBranchTarget() ); + squashDueToBranch(inst_block, inst_block->getTid()); + branchMispredicts ++; + } else if (inst_block->isNeedSyscall()){ + // If a block is mispredicted, it will squash as well. + // So for syscall blocks, no need to call this. + DPRINTF(EdgeExe, "Syscall is required by this block.\n"); + squashDueToSyscall(inst_block, inst_block->getTid()); + } + + instBlockToCommit(inst_block); + + executeExecutedInstBlocks ++; + + }else if ( inst_block->isCompletionCondSatisfied()){ + DPRINTF(EdgeExe, "Inst block[Bid:%lli] completed in speculative status, waiting.\n"); + + executeExecutedInstBlocks ++; + } + + ++ block_it; + + } +} + +template +void +SimpleEdgeExecute::writebackInstBlocks() +{ +// Write back of a specific inst block will be +// implemented through writeback func in IQ +// and writebackStore in LSQ for write reg and +// store respectively. +#if 0 + for (int inst_block_num = 0; inst_block_num < wbWidth && + toCommit->instBlocks[inst_block_num]; inst_block_num++) { + BlockPtr inst_block = toCommit->instBlocks[inst_block_num]; + ThreadID tid = inst_block->getTid(); + + DPRINTF(EdgeExe, "Sending instruction block to commit, [id:%lli] PC %#x.\n", + inst_block->getTid(), inst_block->getStartPC()); + + executeInstBlocksToCommit[tid]++; + + if (!inst_block->isSquashed() && inst_block->isExecuted() && !inst_block->isFault()) { + writebackCount[tid]++; + } + + decrWb(inst_block->getBlockID()); + } +#endif +} + +template +void +SimpleEdgeExecute::tick() +{ + wbNumInstBlock = 0; + wbCycle = 0; + + wroteToTimeBuffer = false; + updatedQueues = false; + + sortInstBlocks(); + + // Free function units marked as being freed this cycle. + // Function units should be divided into ETs... + fuPool->processFreeUnits(); + + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + // Check stall and squash signals, dispatch any instructions. + while (threads != end) { + ThreadID tid = *threads++; + + DPRINTF(EdgeExe,"Issue: Processing [tid:%i]\n",tid); + checkSignalsAndUpdate(tid); + + // Actually, I need to dispatch insts in one block. + // But I still want some integrity as a block class + // should hold all informations about inst blocks. + + // dispatch() will extract insts from a block and arrange + // them with repect to ET organization. + dispatch(tid); + + } + + if (exeStatus != Squashing) { + + // Execute ready insts in IQ. Instructions + // in IQ could be from different inst blocks. + executeInstBlocks(); + + // Check for inst block completion condition. + completeInstBlocks(); + + // Have the instruction queue try to schedule any ready instructions. + // (In actuality, this scheduling is for instructions that will + // be executed next cycle.) + instQueue.scheduleReadyInsts(); + + // Also should advance its own time buffers if the stage ran. + // Not the best place for it, but this works (hopefully). + issueToExecQueue.advance(); + } + + bool broadcast_free_entries = false; + + if (updatedQueues || exeStatus == Running || updateLSQNextCycle) { + exeStatus = Idle; + updateLSQNextCycle = false; + + broadcast_free_entries = true; + } + + // Writeback any stores in commit stage. + ldstQueue.writebackStores(); + + // Check the committed load/store signals to see if there's a load + // or store to commit. Also check if it's being told to execute a + // nonspeculative instruction. + // This is pretty inefficient... + + threads = activeThreads->begin(); + while (threads != end) { + ThreadID tid = (*threads++); + BlockID commit_bid = fromCommit->commitInfo[tid].doneBlockID; + + DPRINTF(EdgeExe,"Processing [tid:%i]\n",tid); + + // Update structures based on instructions committed. + // Write back inst blocks and retire it. + if (commit_bid != 0 && + !fromCommit->commitInfo[tid].squash && + !fromCommit->commitInfo[tid].robSquashing) { + + // If block has fault, it will be informed by commit to squash. + // We'll never reach this in that circumstance. + + ldstQueue.commitStores(commit_bid,tid); + + ldstQueue.commitLoads(commit_bid,tid); + + updateLSQNextCycle = true; + + // This will write back write-reg insts. + instQueue.writeBack(commit_bid, tid); + + // This will retire instructions in IQ belong to the committed + // inst block. + instQueue.commit(commit_bid,tid); + + assert(!instBlocksInExecuting.empty()); + assert(instBlocksInExecuting.size() <= 8); + int num = 0; + DPRINTF(EdgeBlockCount, "Ref count of Block[Bid:%lli] is %i. Num of stores is %i.\n", commit_bid, + instBlocksInExecuting[commit_bid]->getCount(), + instBlocksInExecuting[commit_bid]->getNumStoreInst()); + num = instBlocksInExecuting.erase(commit_bid ); + // We can just commit exactly 1 block. + assert(num == 1); + + if ( fromCommit->commitInfo[tid].needSyscall ) { + // This block need syscall, handle it. + + DPRINTF(EdgeExe, "Need syscall, handle it\n"); + + Fault fault = TheISA::genSyscallFault(); + fault->invoke(cpu->thread[tid]->getTC()); + } + } + + if (broadcast_free_entries) { + toFetch->executeInfo[tid].iqCount = + instQueue.getCount(tid); + toFetch->executeInfo[tid].ldstqCount = + ldstQueue.getCount(tid); + + toMap->executeInfo[tid].usedIQ = true; + toMap->executeInfo[tid].freeIQEntries = + instQueue.numFreeEntries(); + toMap->executeInfo[tid].usedLSQ = true; + toMap->executeInfo[tid].freeLSQEntries = + ldstQueue.numFreeEntries(tid); + + wroteToTimeBuffer = true; + } + + DPRINTF(EdgeExe, "[tid:%i], Dispatch dispatched %i instructions.\n", + tid, toMap->executeInfo[tid].dispatched); + } + + DPRINTF(EdgeExe, "IQ has %i free entries (Can schedule: %i). " + "LSQ has %i free entries.\n", + instQueue.numFreeEntries(), instQueue.hasReadyInsts(), + ldstQueue.numFreeEntries()); + + updateStatus(); + + if (wroteToTimeBuffer) { + DPRINTF(Activity, "Activity this cycle.\n"); + cpu->activityThisCycle(); + } + + executeInflightInstBlocks.sample(instBlocksInExecuting.size()); + +} + +template +void +SimpleEdgeExecute::updateExeInstBlockStats(BlockPtr &inst_block) +{ +// @todo: update statistics of execution phase. +#if 0 + ThreadID tid = inst_block->getTid(); + + executeExecutedInstBlocks++; +#endif +} diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/exetrace.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/exetrace.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#ifndef __CPU_EDGEEXETRACE_HH__ +#define __CPU_EDGEEXETRACE_HH__ + +#include "base/trace.hh" +#include "base/types.hh" +#include "cpu/edge/static_inst.hh" +#include "params/EdgeExeTracer.hh" +#include "cpu/edge/insttracer.hh" + +class ThreadContext; + + +namespace Trace { + +class EdgeExeTracerRecord : public EdgeInstRecord +{ + public: + EdgeExeTracerRecord(Tick _when, ThreadContext *_thread, + const EDGEStaticInstPtr _staticInst, Addr _pc, bool spec) + : EdgeInstRecord(_when, _thread, _staticInst, _pc, spec) + { + } + + void traceInst(EDGEStaticInstPtr inst, bool ran); + + void dump(); + virtual void dumpTicks(std::ostream &outs); +}; + +class EdgeExeTracer : public EdgeInstTracer +{ + public: + typedef EdgeExeTracerParams Params; + EdgeExeTracer(const Params *params) : EdgeInstTracer(params) + {} + + EdgeInstRecord * + getEdgeInstRecord(Tick when, ThreadContext *tc, + const EDGEStaticInstPtr staticInst, Addr pc) + { + if (!IsOn(ExecEnable)) + return NULL; + + if (!Trace::enabled) + return NULL; + + if (!IsOn(ExecSpeculative) && tc->misspeculating()) + return NULL; + + return new EdgeExeTracerRecord(when, tc, + staticInst, pc, tc->misspeculating()); + } +}; + +/* namespace Trace */ } + +#endif // __CPU_EDGEEXETRACE_HH__ diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/exetrace.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/exetrace.cc Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#include + +#include "base/loader/symtab.hh" +#include "cpu/base.hh" +#include "cpu/edge/exetrace.hh" +#include "cpu/edge/static_inst.hh" +#include "cpu/thread_context.hh" +#include "config/the_isa.hh" +#include "enums/OpClass.hh" + +using namespace std; +using namespace TheISA; + +namespace Trace { + +void +EdgeExeTracerRecord::dumpTicks(ostream &outs) +{ + ccprintf(outs, "%7d: ", when); +} + +void +Trace::EdgeExeTracerRecord::traceInst(EDGEStaticInstPtr inst, bool ran) +{ + ostream &outs = Trace::output(); + + if (IsOn(ExecTicks)) + dumpTicks(outs); + + outs << thread->getCpuPtr()->name() << " "; + + if (IsOn(ExecSpeculative)) + outs << (misspeculating ? "-" : "+") << " "; + + if (IsOn(ExecThread)) + outs << "T" << thread->threadId() << " : "; + + std::string sym_str; + Addr sym_addr; + if (debugSymbolTable + && IsOn(ExecSymbol) +#if FULL_SYSTEM + && !inUserMode(thread) +#endif + && debugSymbolTable->findNearestSymbol(PC, sym_str, sym_addr)) { + if (PC != sym_addr) + sym_str += csprintf("+%d", PC - sym_addr); + outs << "@" << sym_str; + } + else { + outs << "0x" << hex << PC; + } + + outs << " "; + + + outs << " : "; + + // + // Print decoded instruction + // + + outs << setw(26) << left; + outs << inst->disassemble(PC, debugSymbolTable); + + if (ran) { + outs << " : "; + + if (IsOn(ExecOpClass)) { + outs << Enums::OpClassStrings[inst->opClass()] << " : "; + } + + if (IsOn(ExecResult) && data_status != DataInvalid) { + ccprintf(outs, " D=%#018x", data.as_int); + } + + if (IsOn(ExecEffAddr) && addr_valid) + outs << " A=0x" << hex << addr; + + if (IsOn(ExecFetchSeq) && fetch_seq_valid) + outs << " FetchSeq=" << dec << fetch_seq; + + if (IsOn(ExecCPSeq) && cp_seq_valid) + outs << " CPSeq=" << dec << cp_seq; + } + + // + // End of line... + // + outs << endl; +} + +void +Trace::EdgeExeTracerRecord::dump() +{ + //warn ("Trace::EdgeExeTracerRecord Unimplemented!\n"); + traceInst(staticInst, true); + +} + +/* namespace Trace */ } + +//////////////////////////////////////////////////////////////////////// +// +// EdgeExeTracer Simulation Object +// +Trace::EdgeExeTracer * +EdgeExeTracerParams::create() +{ + return new Trace::EdgeExeTracer(this); +}; diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/fetch.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/fetch.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,571 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#ifndef __CPU_EDGE_FETCH_HH__ +#define __CPU_EDGE_FETCH_HH__ + +#include "arch/utility.hh" +#include "arch/predecoder.hh" +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "config/the_isa.hh" +#include "cpu/pc_event.hh" +#include "mem/packet.hh" +#include "mem/port.hh" +#include "sim/eventq.hh" +#include "base/bitfield.hh" +//#include "cpu/edge/block.hh" + +class SimpleEdgeCPUParams; + +template +class SimpleEdgeFetch +{ + public: + /** Typedefs from Impl. */ + typedef typename Impl::CPUPol CPUPol; + typedef typename Impl::DynInst DynInst; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::EdgeBlock Block; + typedef typename Impl::EdgeBlockPtr BlockPtr; + typedef typename Impl::CPU CPU; + typedef typename Impl::EdgeBlock EdgeBlock; + typedef typename Impl::EdgeBlockPtr EdgeBlockPtr; + + typedef typename CPUPol::Fetch2Map Fetch2Map; + typedef typename CPUPol::TimeStruct TimeStruct; + typedef typename CPUPol::BPredUnit BPredUnit; + + /** Typedefs from ISA. */ + typedef TheISA::MachInst MachInst; + typedef TheISA::ExtMachInst ExtMachInst; + typedef TheISA::BlockID BlockID; + typedef TheISA::ChunkID ChunkID; + typedef TheISA::InstID InstID; + typedef TheISA::HeaderInfo HeaderInfo; + typedef TheISA::ExitID ExitID; + + /** IcachePort class for DefaultFetch. Handles doing the + * communication with the cache/memory. + */ + class IcachePort : public Port + { + protected: + /** Pointer to fetch. */ + SimpleEdgeFetch *fetch; + + public: + /** Default constructor. */ + IcachePort(SimpleEdgeFetch *_fetch) + : Port(_fetch->name() + "-iport", _fetch->cpu), fetch(_fetch) + { } + + bool snoopRangeSent; + + virtual void setPeer(Port *port); + + protected: + /** Atomic version of receive. Panics. */ + virtual Tick recvAtomic(PacketPtr pkt); + + /** Functional version of receive. Panics. */ + virtual void recvFunctional(PacketPtr pkt); + + /** Receives status change. Other than range changing, panics. */ + virtual void recvStatusChange(Status status); + + /** Returns the address ranges of this device. */ + virtual void getDeviceAddressRanges(AddrRangeList &resp, + bool &snoop) + { resp.clear(); snoop = true; } + + /** Timing version of receive. Handles setting fetch to the + * proper status to start fetching. */ + virtual bool recvTiming(PacketPtr pkt); + + /** Handles doing a retry of a failed fetch. */ + virtual void recvRetry(); + }; + + + public: + /** Overall fetch status. Used to determine if the CPU can + * deschedule itsef due to a lack of activity. + */ + enum FetchStatus { + Active, + Inactive + }; + + /** Individual thread status. */ + enum ThreadStatus { + Running, + Idle, + Squashing, + Blocked, + Fetching, + TrapPending, + QuiescePending, + SwitchOut, + IcacheWaitResponse, + IcacheWaitRetry, + IcacheAccessComplete + }; + + /** Fetching status for EDGE ISA to differ head fetching from normal fetching*/ + enum EdgeFetchStatus { + Head = 0, + Normal + }; + + /** Fetching Policy, Add new policies here.*/ + enum FetchPriority { + SingleThread, + RoundRobin, + Branch, + IQ, + LSQ + }; + + private: + /** Fetch status. */ + FetchStatus _status; + + /** Edge Fetch status */ + EdgeFetchStatus edgeFetchStatus[Impl::MaxThreads]; + + /** Per-thread status. */ + ThreadStatus fetchStatus[Impl::MaxThreads]; + + /** Fetch policy. */ + FetchPriority fetchPolicy; + + /** List that has the threads organized by priority. */ + std::list priorityList; + + public: + /** DefaultFetch constructor. */ + SimpleEdgeFetch(CPU *_cpu, SimpleEdgeCPUParams *params); + + /** Returns the name of fetch. */ + std::string name() const; + + /** Registers statistics. */ + void regStats(); + + /** Returns the icache port. */ + Port *getIcachePort() { return icachePort; } + + /** Sets the main backwards communication time buffer pointer. */ + void setTimeBuffer(TimeBuffer *time_buffer); + + /** Sets pointer to list of active threads. */ + void setActiveThreads(std::list *at_ptr); + + /** Sets pointer to time buffer used to communicate to the next stage. */ + void setFetchQueue(TimeBuffer *fq_ptr); + + /** Initialize stage. */ + void initStage(); + + /** Tells the fetch stage that the Icache is set. */ + void setIcache(); + + /** Processes cache completion event. */ + void processCacheCompletion(PacketPtr pkt); + + /** Begins the drain of the fetch stage. */ + bool drain(); + + /** Resumes execution after a drain. */ + void resume(); + + /** Tells fetch stage to prepare to be switched out. */ + void switchOut(); + + /** Takes over from another CPU's thread. */ + void takeOverFrom(); + + /** Checks if the fetch stage is switched out. */ + bool isSwitchedOut() { return switchedOut; } + + /** Tells fetch to wake up from a quiesce instruction. */ + void wakeFromQuiesce(); + +/** Returns the PC of the head instruction of the ROB. + * @todo: Probably remove this function as it returns only thread 0. + */ + Addr readPC() { return PC[0]; } + + /** Returns the PC of a specific thread. */ + Addr readPC(ThreadID tid) { return PC[tid]; } + + Addr readBlockPC(ThreadID tid) { return blockPC[tid]; } + + /** Sets the PC of a specific thread. */ + void setPC(Addr val, ThreadID tid) { PC[tid] = val; } + + /** Reads the next PC of a specific thread. */ + Addr readNextPC(ThreadID tid) { return nextPC[tid]; } + + /** Sets the next PC of a specific thread. */ + void setNextPC(Addr val, ThreadID tid) { nextPC[tid] = val; } + + /** Reads the next NPC of a specific thread. */ + Addr readNextNPC(ThreadID tid) { return nextNPC[tid]; } + + /** Sets the next NPC of a specific thread. */ + void setNextNPC(Addr val, ThreadID tid) { nextNPC[tid] = val; } + + private: + /** Changes the status of this stage to active, and indicates this + * to the CPU. + */ + inline void switchToActive(); + + /** Changes the status of this stage to inactive, and indicates + * this to the CPU. + */ + inline void switchToInactive(); + + /** + * Looks up in the branch predictor to see if the next PC should be + * either next PC+=MachInst or a branch target. + * @param next_PC Next PC variable passed in by reference. It is + * expected to be set to the current PC; it will be updated with what + * the next PC will be. + * @param next_NPC Used for ISAs which use delay slots. + * @return Whether or not a branch was predicted as taken. + */ + bool lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, Addr &next_NPC); + + /** + * Fetches the cache line that contains fetch_PC. Returns any + * fault that happened. Puts the data into the class variable + * cacheData. + * @param fetch_PC The PC address that is being fetched from. + * @param ret_fault The fault reference that will be set to the result of + * the icache access. + * @param tid Thread id. + * @return Any fault that occured. + */ + bool fetchCacheLine(Addr fetch_PC, Fault &ret_fault, ThreadID tid); + + /** Squashes a specific thread and resets the PC. */ + inline void doSquash(const Addr &new_PC, const Addr &new_NPC, ThreadID tid); + + /** Squashes a specific thread and resets the PC. Also tells the CPU to + * remove any instructions between fetch and decode that should be sqaushed. + */ + void squashFromMap(const Addr &new_PC, const Addr &new_NPC, + const BlockID &seq_num, ThreadID tid); + + /** Checks if a thread is stalled. */ + bool checkStall(ThreadID tid) const; + + /** Updates overall fetch stage status; to be called at the end of each + * cycle. */ + FetchStatus updateFetchStatus(); + + public: + /** Squashes a specific thread and resets the PC. Also tells the CPU to + * remove any instructions that are not in the ROB. The source of this + * squash should be the commit stage. + */ + void squash(const Addr &new_PC, const Addr &new_NPC, + const BlockID &seq_num, ThreadID tid); + + /** Ticks the fetch stage, processing all inputs signals and fetching + * as many instructions as possible. + */ + void tick(); + + /** Checks all input signals and updates the status as necessary. + * @return: Returns if the status has changed due to input signals. + */ + bool checkSignalsAndUpdate(ThreadID tid); + + /** Does the actual fetching of instructions and passing them on to the + * next stage. + * @param status_change fetch() sets this variable if there was a status + * change (ie switching to IcacheMissStall). + */ + void fetch(bool &status_change); + + /** Align a PC to the start of an I-cache block. */ + Addr icacheBlockAlignPC(Addr addr) + { + addr = TheISA::realPCToFetchPC(addr); + return (addr & ~(cacheBlkMask)); + } + + private: + /** Handles retrying the fetch access. */ + void recvRetry(); + + /** Returns the appropriate thread to fetch, given the fetch policy. */ + ThreadID getFetchingThread(FetchPriority &fetch_priority); + + /** Returns the appropriate thread to fetch using a round robin policy. */ + ThreadID roundRobin(); + + /** Returns the appropriate thread to fetch using the IQ count policy. */ + ThreadID iqCount(); + + /** Returns the appropriate thread to fetch using the LSQ count policy. */ + ThreadID lsqCount(); + + /** Returns the appropriate thread to fetch using the branch count + * policy. */ + ThreadID branchCount(); + + #if 0 + /** Extract info from headerInfo */ + void depackHeaderInfo(EdgeBlockPtr edgeBlock); + #endif + + /** Consutruct EDGE header */ + void constructHeader(ExtMachInst inst, uint32_t offset, ThreadID tid) { + assert(edgeFetchStatus[tid] == Head ); + + headerInfo[TheISA::HeaderInfoSize - 1-offset/32] |=( mbits(inst, 31, 28) >> ( offset % 32 )); + } + + void clearHeaderInfo () { + for ( int i = 0; i < TheISA::HeaderInfoSize; i ++ ) + headerInfo[i] = 0; + } + + private: + /** Pointer to the EDGECPU. */ + CPU *cpu; + + /** Time buffer interface. */ + TimeBuffer *timeBuffer; + + /** Wire to get decode's information from backwards time buffer. */ + typename TimeBuffer::wire fromMap; + + /** Wire to get iew's information from backwards time buffer. */ + typename TimeBuffer::wire fromExecute; + + /** Wire to get commit's information from backwards time buffer. */ + typename TimeBuffer::wire fromCommit; + + /** Internal fetch instruction queue. */ + TimeBuffer *fetch2mapQueue; + + //Might be annoying how this name is different than the queue. + /** Wire used to write any information heading to decode. */ + typename TimeBuffer::wire toMap; + + /** Icache interface. */ + IcachePort *icachePort; + + /** BPredUnit. */ + BPredUnit branchPred; + + /** Predecoder. */ + TheISA::Predecoder predecoder; + + /** Current block ptr*/ + BlockPtr curBlockPtr; + + /** Per-thread fetch PC. */ + Addr PC[Impl::MaxThreads]; + + Addr blockPC[Impl::MaxThreads]; + + /** Per-thread next PC. */ + Addr nextPC[Impl::MaxThreads]; + + Addr nextNPC[Impl::MaxThreads]; + + /** Memory request used to access cache. */ + RequestPtr memReq[Impl::MaxThreads]; + + /** Variable that tracks if fetch has written to the time buffer this + * cycle. Used to tell CPU if there is activity this cycle. + */ + bool wroteToTimeBuffer; + + /** Tracks how many instructions has been fetched this cycle. */ + int numInst; + + /** Source of possible stalls. */ + struct Stalls { + bool map; + bool execute; + bool commit; + }; + + /** EDGE instruction header info*/ + HeaderInfo headerInfo; + + /** Instrunction chunk offsets*/ + Addr chunkOffset; + /** Chunk size in bytes*/ + uint32_t chunkSize; + /** Max chunks in a instruction block */ + uint32_t maxBlockSize; + + /** Instruction ID of current instruction in one block*/ + InstID instID; + /** Chunk ID in current inst block*/ + ChunkID chunkID; + /** Block ID of current inst */ + BlockID blockID; + /** Chunk number of current inst block*/ + //uint32_t chunkNum; + + /** Tracks which stages are telling fetch to stall. */ + Stalls stalls[Impl::MaxThreads]; + + /** Map to fetch delay, in ticks. */ + unsigned mapToFetchDelay; + + /** Decode to fetch delay, in ticks. */ + unsigned decodeToFetchDelay; + + /** Execute to fetch delay, in ticks. */ + unsigned executeToFetchDelay; + + /** Commit to fetch delay, in ticks. */ + unsigned commitToFetchDelay; + + /** The width of fetch in instructions. */ + unsigned fetchWidth; + + /** Is the cache blocked? If so no threads can access it. */ + bool cacheBlocked; + + /** The packet that is waiting to be retried. */ + PacketPtr retryPkt; + + /** The thread that is waiting on the cache to tell fetch to retry. */ + ThreadID retryTid; + + /** Cache block size. */ + int cacheBlkSize; + + /** Mask to get a cache block's address. */ + Addr cacheBlkMask; + + /** The cache line being fetched. */ + uint8_t *cacheData[Impl::MaxThreads]; + + /** The PC of the cacheline that has been loaded. */ + Addr cacheDataPC[Impl::MaxThreads]; + + /** Whether or not the cache data is valid. */ + bool cacheDataValid[Impl::MaxThreads]; + + /** Size of instructions. */ + int instSize; + + /** Icache stall statistics. */ + Counter lastIcacheStall[Impl::MaxThreads]; + + /** List of Active Threads */ + std::list *activeThreads; + + /** Number of threads. */ + ThreadID numThreads; + + /** Number of threads that are actively fetching. */ + ThreadID numFetchingThreads; + + /** Thread ID being fetched. */ + ThreadID threadFetched; + + /** Checks if there is an interrupt pending. If there is, fetch + * must stop once it is not fetching PAL instructions. + */ + bool interruptPending; + + /** Is there a drain pending. */ + bool drainPending; + + /** Records if fetch is switched out. */ + bool switchedOut; + + // @todo: Consider making these vectors and tracking on a per thread basis. + /** Stat for total number of cycles stalled due to an icache miss. */ + Stats::Scalar icacheStallCycles; + /** Stat for total number of fetched instructions. */ + Stats::Scalar fetchedInsts; + /** Stat for total number of fetched NOP insts. */ + Stats::Scalar fetchedNopInsts; + /** Stat for total number of fetched MOV3/4 insts. */ + Stats::Scalar fetchedMovInsts; + /** Stat for total number of fetched blocks. */ + Stats::Scalar fetchedBlocks; + /** Stat for total number of fetched instruction chunks. */ + Stats::Scalar fetchedChunks; + /** Stat for total number of NOP blocks fetched. */ + Stats::Scalar fetchedNopBlocks; + /** Stat for total number of cycles spent fetching. */ + Stats::Scalar fetchCycles; + /** Stat for total number of cycles spent squashing. */ + Stats::Scalar fetchSquashCycles; + /** Stat for total number of squashed inst blocks. */ + Stats::Scalar fetchSquashBlocks; + /** Stat for total number of panic times due to block-misaligned prediction.*/ + Stats::Scalar fetchMisAlignedTimes; + /** Stat for total number of cycles spent blocked due to other stages in + * the pipeline. + */ + Stats::Scalar fetchIdleCycles; + /** Total number of cycles spent blocked. */ + Stats::Scalar fetchBlockedCycles; + /** Total number of cycles spent in any other state. */ + Stats::Scalar fetchMiscStallCycles; + /** Stat for total number of fetched cache lines. */ + Stats::Scalar fetchedCacheLines; + /** Total number of outstanding icache accesses that were dropped + * due to a squash. + */ + Stats::Scalar fetchIcacheSquashes; + /** Distribution of number of instructions fetched each cycle. */ + Stats::Distribution fetchNisnDist; + /** Rate of how often fetch was idle. */ + Stats::Formula idleRate; + /** Number of instruction fetched per cycle. */ + Stats::Formula fetchRate; + /** Number of useful instructions fetched per cycle. */ + Stats::Formula usefulFetchRate; +}; + +#endif //__CPU_EDGE_FETCH_HH__ diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/fetch.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/fetch.cc Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#include "cpu/edge/isa_specific.hh" +#include "cpu/edge/fetch_impl.hh" + +template class SimpleEdgeFetch; diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/fetch_impl.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/fetch_impl.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,1601 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#include +#include + +#include "arch/isa_traits.hh" +#include "arch/utility.hh" +#include "base/types.hh" +#include "config/the_isa.hh" +#include "config/use_checker.hh" +#include "cpu/checker/cpu.hh" +#include "cpu/exetrace.hh" +#include "cpu/edge/fetch.hh" +#include "mem/packet.hh" +#include "mem/request.hh" +#include "params/SimpleEdgeCPU.hh" +#include "sim/byteswap.hh" +#include "sim/core.hh" + +#if FULL_SYSTEM +#include "arch/tlb.hh" +#include "arch/vtophys.hh" +#include "sim/system.hh" +#endif // FULL_SYSTEM + +using namespace std; + +template +void +SimpleEdgeFetch::IcachePort::setPeer(Port *port) +{ + Port::setPeer(port); + + fetch->setIcache(); +} + +template +Tick +SimpleEdgeFetch::IcachePort::recvAtomic(PacketPtr pkt) +{ + panic("SimpleEdgeFetch doesn't expect recvAtomic callback!"); + return curTick; +} + +template +void +SimpleEdgeFetch::IcachePort::recvFunctional(PacketPtr pkt) +{ + DPRINTF(EdgeFetch, "SimpleEdgeFetch doesn't update its state from a " + "functional call."); +} + +template +void +SimpleEdgeFetch::IcachePort::recvStatusChange(Status status) +{ + if (status == RangeChange) { + if (!snoopRangeSent) { + snoopRangeSent = true; + sendStatusChange(Port::RangeChange); + } + return; + } + + panic("SimpleEdgeFetch doesn't expect recvStatusChange callback!"); +} + +template +bool +SimpleEdgeFetch::IcachePort::recvTiming(PacketPtr pkt) +{ + DPRINTF(EdgeFetch, "Received timing\n"); + if (pkt->isResponse()) { + fetch->processCacheCompletion(pkt); + } + //else Snooped a coherence request, just return + return true; +} + +template +void +SimpleEdgeFetch::IcachePort::recvRetry() +{ + fetch->recvRetry(); +} + +template +SimpleEdgeFetch::SimpleEdgeFetch(CPU *_cpu, SimpleEdgeCPUParams *params) + : cpu(_cpu), + branchPred(params), + predecoder(NULL), + mapToFetchDelay(params->mapToFetchDelay), + executeToFetchDelay(params->executeToFetchDelay), + commitToFetchDelay(params->commitToFetchDelay), + fetchWidth(params->fetchWidth), + cacheBlocked(false), + retryPkt(NULL), + retryTid(InvalidThreadID), + numThreads(params->numThreads), + numFetchingThreads(params->smtNumFetchingThreads), + interruptPending(false), + drainPending(false), + switchedOut(false) +{ + if (numThreads > Impl::MaxThreads) + fatal("numThreads (%d) is larger than compiled limit (%d),\n" + "\tincrease MaxThreads in src/cpu/o3/impl.hh\n", + numThreads, static_cast(Impl::MaxThreads)); + + // Set fetch stage's status to inactive. + _status = Inactive; + + std::string policy = params->smtFetchPolicy; + + // Convert string to lowercase + std::transform(policy.begin(), policy.end(), policy.begin(), + (int(*)(int)) tolower); + + // Figure out fetch policy + if (policy == "singlethread") { + fetchPolicy = SingleThread; + if (numThreads > 1) + panic("Invalid Fetch Policy for a SMT workload."); + } else if (policy == "roundrobin") { + fetchPolicy = RoundRobin; + DPRINTF(EdgeFetch, "Fetch policy set to Round Robin\n"); + } else if (policy == "branch") { + fetchPolicy = Branch; + DPRINTF(EdgeFetch, "Fetch policy set to Branch Count\n"); + } else if (policy == "iqcount") { + fetchPolicy = IQ; + DPRINTF(EdgeFetch, "Fetch policy set to IQ count\n"); + } else if (policy == "lsqcount") { + fetchPolicy = LSQ; + DPRINTF(EdgeFetch, "Fetch policy set to LSQ count\n"); + } else { + fatal("Invalid Fetch Policy. Options Are: {SingleThread," + " RoundRobin,LSQcount,IQcount}\n"); + } + + // Get the size of an instruction. + instSize = sizeof(TheISA::MachInst); + + // Name is finally available, so create the port. + icachePort = new IcachePort(this); + + icachePort->snoopRangeSent = false; + +#if USE_CHECKER + if (cpu->checker) { + cpu->checker->setIcachePort(icachePort); + } +#endif +} + +template +std::string +SimpleEdgeFetch::name() const +{ + return cpu->name() + ".fetch"; +} + +template +void +SimpleEdgeFetch::regStats() +{ + icacheStallCycles + .name(name() + ".icacheStallCycles") + .desc("Number of cycles fetch is stalled on an Icache miss") + .prereq(icacheStallCycles); + + fetchedInsts + .name(name() + ".Insts") + .desc("Number of instructions fetch has processed") + .prereq(fetchedInsts); + + fetchedBlocks + .name(name() + ".blocks") + .desc("Number of inst blocks fetch has processed") + .prereq(fetchedBlocks); + + fetchedNopBlocks + .name(name() + ".NOPblocks") + .desc("Number of NOP inst blocks fetch has processed") + .prereq(fetchedNopBlocks); + + fetchedChunks + .name(name() + ".chunks") + .desc("Number of inst chunks fetch has processed") + .prereq(fetchedChunks); + + fetchedNopInsts + .name(name() + ".Nops") + .desc("Number of NOPs that fetch encountered") + .prereq(fetchedNopInsts); + + fetchedMovInsts + .name(name() + ".Movs") + .desc("Number of mov3/4 that fetch has encountered") + .prereq(fetchedMovInsts); + + fetchCycles + .name(name() + ".Cycles") + .desc("Number of cycles fetch has run and was not squashing or" + " blocked") + .prereq(fetchCycles); + + fetchSquashCycles + .name(name() + ".SquashCycles") + .desc("Number of cycles fetch has spent squashing") + .prereq(fetchSquashCycles); + + fetchSquashBlocks + .name(name() + ".SquashBlocks") + .desc("Number of squashed inst blocks.") + .prereq(fetchSquashBlocks); + + fetchMisAlignedTimes + .name(name() + ".MisalignTimes") + .desc("Number of block-misalign times") + .prereq(fetchMisAlignedTimes); + + fetchIdleCycles + .name(name() + ".IdleCycles") + .desc("Number of cycles fetch was idle") + .prereq(fetchIdleCycles); + + fetchBlockedCycles + .name(name() + ".BlockedCycles") + .desc("Number of cycles fetch has spent blocked") + .prereq(fetchBlockedCycles); + + fetchedCacheLines + .name(name() + ".CacheLines") + .desc("Number of cache lines fetched") + .prereq(fetchedCacheLines); + + fetchMiscStallCycles + .name(name() + ".MiscStallCycles") + .desc("Number of cycles fetch has spent waiting on interrupts, or " + "bad addresses, or out of MSHRs") + .prereq(fetchMiscStallCycles); + + fetchIcacheSquashes + .name(name() + ".IcacheSquashes") + .desc("Number of outstanding Icache misses that were squashed") + .prereq(fetchIcacheSquashes); + + fetchNisnDist + .init(/* base value */ 0, + /* last value */ fetchWidth, + /* bucket size */ 1) + .name(name() + ".rateDist") + .desc("Number of instructions fetched each cycle (Total)") + .flags(Stats::pdf); + + idleRate + .name(name() + ".idleRate") + .desc("Percent of cycles fetch was idle") + .prereq(idleRate); + idleRate = fetchIdleCycles * 100 / cpu->numCycles; + + usefulFetchRate + .name(name() + ".usefulFetchRate") + .desc("Number of instructions fetched without NOPs per cycle") + .flags(Stats::total); + usefulFetchRate = (fetchedInsts - fetchedNopInsts) / cpu->numCycles; + + fetchRate + .name(name() + ".rate") + .desc("Number of inst fetches per cycle") + .flags(Stats::total); + fetchRate = fetchedInsts / cpu->numCycles; + +} + +template +void +SimpleEdgeFetch::setTimeBuffer(TimeBuffer *time_buffer) +{ + timeBuffer = time_buffer; + + // Create wires to get information from proper places in time buffer. + fromMap = timeBuffer->getWire(-mapToFetchDelay); + + fromExecute = timeBuffer->getWire(-executeToFetchDelay); + fromCommit = timeBuffer->getWire(-commitToFetchDelay); +} + +template +void +SimpleEdgeFetch::setActiveThreads(std::list *at_ptr) +{ + activeThreads = at_ptr; +} + +template +void +SimpleEdgeFetch::setFetchQueue(TimeBuffer *fq_ptr) +{ + fetch2mapQueue = fq_ptr; + + // Create wire to write information to proper place in fetch queue. + toMap = fetch2mapQueue->getWire(0); +} + +template +void +SimpleEdgeFetch::initStage() +{ + // Setup PC and nextPC with initial state. + for (ThreadID tid = 0; tid < numThreads; tid++) { + PC[tid] = cpu->readPC(tid); + nextPC[tid] = cpu->readNextPC(tid); + } + + for (ThreadID tid = 0; tid < numThreads; tid++) { + + fetchStatus[tid] = Running; + edgeFetchStatus[tid] = Head; + + priorityList.push_back(tid); + + memReq[tid] = NULL; + + stalls[tid].map = false; + stalls[tid].execute = false; + stalls[tid].commit = false; + } + + // Initial chunk related variables + chunkSize = TheISA::ChunkSize; + chunkOffset = TheISA::ChunkOffset; + maxBlockSize = TheISA::MaxBlockSize; + + instID = 0; + chunkID = 0; + blockID = 1; // Default to 1 + + // Initialize current block ptr. Add it into cpu list. + curBlockPtr = new Block(blockID, cpu); // Initial block ID as 1 + curBlockPtr->setBlockListIt(cpu->addInstBlock(curBlockPtr)); + + headerInfo.reserve(TheISA::HeaderInfoSize); // Reserve space for header info + headerInfo.resize(TheISA::HeaderInfoSize); + clearHeaderInfo(); + + // Schedule fetch to get the correct PC from the CPU + // scheduleFetchStartupEvent(1); + + // Fetch needs to start Running instructions at the very beginning, + // so it must start up in active state. + switchToActive(); +} + +template +void +SimpleEdgeFetch::setIcache() +{ + // Size of cache block. + cacheBlkSize = icachePort->peerBlockSize(); + + // Create mask to get rid of offset bits. + cacheBlkMask = (cacheBlkSize - 1); + + for (ThreadID tid = 0; tid < numThreads; tid++) { + // Create space to store a cache line. + cacheData[tid] = new uint8_t[cacheBlkSize]; + cacheDataPC[tid] = 0; + cacheDataValid[tid] = false; + } +} + +template +void +SimpleEdgeFetch::processCacheCompletion(PacketPtr pkt) +{ + ThreadID tid = pkt->req->threadId(); + + DPRINTF(EdgeFetch, "[tid:%u] Waking up due to cache completion.\n",tid); + + assert(!pkt->wasNacked()); + + // Only change the status if it's still waiting on the icache access + // to return. + if (fetchStatus[tid] != IcacheWaitResponse || + pkt->req != memReq[tid] || + isSwitchedOut()) { + ++fetchIcacheSquashes; + delete pkt->req; + delete pkt; + return; + } + + memcpy(cacheData[tid], pkt->getPtr(), cacheBlkSize); + cacheDataValid[tid] = true; + + if (!drainPending) { + // Wake up the CPU (if it went to sleep and was waiting on + // this completion event). + cpu->wakeCPU(); + + DPRINTF(Activity, "[tid:%u] Activating fetch due to cache completion\n", + tid); + + switchToActive(); + } + + // Only switch to IcacheAccessComplete if we're not stalled as well. + if (checkStall(tid)) { + fetchStatus[tid] = Blocked; + } else { + fetchStatus[tid] = IcacheAccessComplete; + } + + // Reset the mem req to NULL. + delete pkt->req; + delete pkt; + memReq[tid] = NULL; +} + +template +bool +SimpleEdgeFetch::drain() +{ + // Fetch is ready to drain at any time. + cpu->signalDrained(); + drainPending = true; + return true; +} + +template +void +SimpleEdgeFetch::resume() +{ + drainPending = false; +} + +template +void +SimpleEdgeFetch::switchOut() +{ + switchedOut = true; + // Branch predictor needs to have its state cleared. + // Fix me! + branchPred.switchOut(); +} + +template +void +SimpleEdgeFetch::takeOverFrom() +{ + // Reset all state + for (ThreadID i = 0; i < Impl::MaxThreads; ++i) { + stalls[i].map = 0; + stalls[i].execute = 0; + stalls[i].commit = 0; + PC[i] = cpu->readPC(i); + nextPC[i] = cpu->readNextPC(i); + fetchStatus[i] = Running; + } + numInst = 0; + wroteToTimeBuffer = false; + _status = Inactive; + switchedOut = false; + interruptPending = false; + branchPred.takeOverFrom(); +} + +template +void +SimpleEdgeFetch::wakeFromQuiesce() +{ + DPRINTF(EdgeFetch, "Waking up from quiesce\n"); + // Hopefully this is safe + // @todo: Allow other threads to wake from quiesce. + fetchStatus[0] = Running; +} + +template +inline void +SimpleEdgeFetch::switchToActive() +{ + if (_status == Inactive) { + DPRINTF(Activity, "Activating stage.\n"); + + cpu->activateStage(CPU::FetchIdx); + + _status = Active; + } +} + +template +inline void +SimpleEdgeFetch::switchToInactive() +{ + if (_status == Active) { + DPRINTF(Activity, "Deactivating stage.\n"); + + cpu->deactivateStage(CPU::FetchIdx); + + _status = Inactive; + } +} + +template +bool +SimpleEdgeFetch::lookupAndUpdateNextPC(DynInstPtr &inst, Addr &next_PC, + Addr &next_NPC) +{ + + if (!inst->isControl()) { + next_PC = next_NPC; + next_NPC = next_NPC + instSize; + return false; + } + + // Do branch prediction check here. + // A bit of a misnomer...next_PC is actually the current PC until + // this function updates it. + //bool predict_taken; + + //ThreadID tid = inst->threadNumber; + //Addr pred_PC = next_PC; + +#if ISA_HAS_DELAY_SLOT + next_PC = next_NPC; + next_NPC += instSize; +#else + next_PC += instSize; + next_NPC = next_PC + instSize; +#endif + + DPRINTF(EdgeFetch, "[tid:%i]: [sn:%i] Branch predicted to go to %#x and then %#x.\n", + inst->threadNumber, inst->seqNum, next_PC, next_NPC); + + //if (predict_taken) { + //++predictedBranches; + //} + + return false; +} + +template +bool +SimpleEdgeFetch::fetchCacheLine(Addr fetch_PC, Fault &ret_fault, ThreadID tid) +{ + Fault fault = NoFault; + + //AlphaDep + if (cacheBlocked) { + DPRINTF(EdgeFetch, "[tid:%i] Can't fetch cache line, cache blocked\n", + tid); + return false; + } else if (isSwitchedOut()) { + DPRINTF(EdgeFetch, "[tid:%i] Can't fetch cache line, switched out\n", + tid); + return false; + } else if (interruptPending && !(fetch_PC & 0x3)) { + // Hold off fetch from getting new instructions when: + // Cache is blocked, or + // while an interrupt is pending and we're not in PAL mode, or + // fetch is switched out. + DPRINTF(EdgeFetch, "[tid:%i] Can't fetch cache line, interrupt pending\n", + tid); + return false; + } + + // Align the fetch PC so it's at the start of a cache block. + Addr block_PC = icacheBlockAlignPC(fetch_PC); + + // If we've already got the block, no need to try to fetch it again. + if (cacheDataValid[tid] && block_PC == cacheDataPC[tid]) { + return true; + } + + // Setup the memReq to do a read of the first instruction's address. + // Set the appropriate read size and flags as well. + // Build request here. + RequestPtr mem_req = + new Request(tid, block_PC, cacheBlkSize, Request::INST_FETCH, + fetch_PC, cpu->thread[tid]->contextId(), tid); + + memReq[tid] = mem_req; + + // Translate the instruction request. + fault = cpu->itb->translateAtomic(mem_req, cpu->thread[tid]->getTC(), + BaseTLB::Execute); + + // In the case of faults, the fetch stage may need to stall and wait + // for the ITB miss to be handled. + + // If translation was successful, attempt to read the first + // instruction. + if (fault == NoFault) { +#if 0 + if (cpu->system->memctrl->badaddr(memReq[tid]->paddr) || + memReq[tid]->isUncacheable()) { + DPRINTF(Fetch, "Fetch: Bad address %#x (hopefully on a " + "misspeculating path)!", + memReq[tid]->paddr); + ret_fault = TheISA::genMachineCheckFault(); + return false; + } +#endif + + // Build packet here. + PacketPtr data_pkt = new Packet(mem_req, + MemCmd::ReadReq, Packet::Broadcast); + data_pkt->dataDynamicArray(new uint8_t[cacheBlkSize]); + + cacheDataPC[tid] = block_PC; + cacheDataValid[tid] = false; + + DPRINTF(EdgeFetch, "Fetch: Doing instruction read.\n"); + + fetchedCacheLines++; + + // Now do the timing access to see whether or not the instruction + // exists within the cache. + if (!icachePort->sendTiming(data_pkt)) { + assert(retryPkt == NULL); + assert(retryTid == InvalidThreadID); + DPRINTF(EdgeFetch, "[tid:%i] Out of MSHRs!\n", tid); + fetchStatus[tid] = IcacheWaitRetry; + retryPkt = data_pkt; + retryTid = tid; + cacheBlocked = true; + return false; + } + + DPRINTF(EdgeFetch, "[tid:%i]: Doing cache access.\n", tid); + + lastIcacheStall[tid] = curTick; + + DPRINTF(Activity, "[tid:%i]: Activity: Waiting on I-cache " + "response.\n", tid); + + fetchStatus[tid] = IcacheWaitResponse; + } else { + delete mem_req; + memReq[tid] = NULL; + } + + ret_fault = fault; + return true; +} + +template +inline void +SimpleEdgeFetch::doSquash(const Addr &new_PC, + const Addr &new_NPC, ThreadID tid) +{ + DPRINTF(EdgeFetch, "[tid:%i]: Squashing, setting PC to: %#x, NPC to: %#x.\n", + tid, new_PC, new_NPC); + + PC[tid] = new_PC; + nextPC[tid] = new_NPC; + + // Update the current block ptr. + edgeFetchStatus[tid] = Head; + clearHeaderInfo(); + chunkID = 0; + instID = 0; + blockID++; + + // Update stats + fetchSquashBlocks ++; + + // Clear the icache miss if it's outstanding. + if (fetchStatus[tid] == IcacheWaitResponse) { + DPRINTF(EdgeFetch, "[tid:%i]: Squashing outstanding Icache miss.\n", + tid); + memReq[tid] = NULL; + } + + // Get rid of the retrying packet if it was from this thread. + if (retryTid == tid) { + assert(cacheBlocked); + if (retryPkt) { + delete retryPkt->req; + delete retryPkt; + } + retryPkt = NULL; + retryTid = InvalidThreadID; + } + + fetchStatus[tid] = Squashing; + + ++fetchSquashCycles; + } + +template +void +SimpleEdgeFetch::squashFromMap(const Addr &new_PC, const Addr &new_NPC, + const BlockID &seq_num, ThreadID tid) +{ + DPRINTF(EdgeFetch, "[tid:%i]: Squashing from map.\n",tid); + + doSquash(new_PC, new_NPC, tid); + + // Tell the CPU to remove any instructions that are in flight between + // fetch and map. + cpu->removeBlocksUntil(seq_num, tid); +} + +template +bool +SimpleEdgeFetch::checkStall(ThreadID tid) const +{ + bool ret_val = false; + + if (cpu->contextSwitch) { + DPRINTF(EdgeFetch,"[tid:%i]: Stalling for a context switch.\n",tid); + ret_val = true; + } else if (stalls[tid].map) { + DPRINTF(EdgeFetch,"[tid:%i]: Stall from Map stage detected.\n",tid); + ret_val = true; + } else if (stalls[tid].execute) { + DPRINTF(EdgeFetch,"[tid:%i]: Stall from Execute stage detected.\n",tid); + ret_val = true; + } else if (stalls[tid].commit) { + DPRINTF(EdgeFetch,"[tid:%i]: Stall from Commit stage detected.\n",tid); + ret_val = true; + } + + return ret_val; +} + +template +typename SimpleEdgeFetch::FetchStatus +SimpleEdgeFetch::updateFetchStatus() +{ + //Check Running + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + if (fetchStatus[tid] == Running || + fetchStatus[tid] == Squashing || + fetchStatus[tid] == IcacheAccessComplete) { + + if (_status == Inactive) { + DPRINTF(Activity, "[tid:%i]: Activating stage.\n",tid); + + if (fetchStatus[tid] == IcacheAccessComplete) { + DPRINTF(Activity, "[tid:%i]: Activating fetch due to cache" + "completion\n",tid); + } + + cpu->activateStage(CPU::FetchIdx); + } + + return Active; + } + } + + // Stage is switching from active to inactive, notify CPU of it. + if (_status == Active) { + DPRINTF(Activity, "Deactivating stage.\n"); + + cpu->deactivateStage(CPU::FetchIdx); + } + + return Inactive; +} + +template +void +SimpleEdgeFetch::squash(const Addr &new_PC, const Addr &new_NPC, + const BlockID &seq_num, ThreadID tid) +{ + DPRINTF(EdgeFetch, "[tid:%u]: Squash from commit.\n",tid); + + doSquash(new_PC, new_NPC, tid); + + cpu->removeBlocksNotInROB(tid); +} + +template +void +SimpleEdgeFetch::tick() +{ + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + bool status_change = false; + + wroteToTimeBuffer = false; + + while (threads != end) { + ThreadID tid = *threads++; + // Check the signals for each thread to determine the proper status + // for each thread. + bool updated_status = checkSignalsAndUpdate(tid); + status_change = status_change || updated_status; + } + + DPRINTF(EdgeFetch, "Running stage.\n"); + + // Reset the number of the instruction we're Running. + numInst = 0; + +#if FULL_SYSTEM + if (fromCommit->commitInfo[0].interruptPending) { + interruptPending = true; + } + + if (fromCommit->commitInfo[0].clearInterrupt) { + interruptPending = false; + } +#endif + + for (threadFetched = 0; threadFetched < numFetchingThreads; + threadFetched++) { + // Fetch each of the actively Running threads. + fetch(status_change); + } + + // Record number of instructions fetched this cycle for distribution. + fetchNisnDist.sample(numInst); + + if (status_change) { + // Change the fetch stage status if there was a status change. + _status = updateFetchStatus(); + } + + // If there was activity this cycle, inform the CPU of it. + if (wroteToTimeBuffer || cpu->contextSwitch) { + DPRINTF(Activity, "Activity this cycle.\n"); + + cpu->activityThisCycle(); + } +} + +template +bool +SimpleEdgeFetch::checkSignalsAndUpdate(ThreadID tid) +{ + // Update the per thread stall statuses. + if (fromMap->mapBlock[tid]) { + stalls[tid].map = true; + } + + if (fromMap->mapUnblock[tid]) { + assert(stalls[tid].map); + assert(!fromMap->mapBlock[tid]); + stalls[tid].map = false; + } + + if (fromExecute->executeBlock[tid]) { + stalls[tid].execute = true; + } + + if (fromExecute->executeUnblock[tid]) { + assert(stalls[tid].execute); + assert(!fromExecute->executeBlock[tid]); + stalls[tid].execute = false; + } + + if (fromCommit->commitBlock[tid]) { + stalls[tid].commit = true; + } + + if (fromCommit->commitUnblock[tid]) { + assert(stalls[tid].commit); + assert(!fromCommit->commitBlock[tid]); + stalls[tid].commit = false; + } + + // Check squash signals from commit. + if (fromCommit->commitInfo[tid].squash) { + + DPRINTF(EdgeFetch, "[tid:%u]: Squashing instructions due to squash " + "from commit.\n",tid); + // In any case, squash. + squash(fromCommit->commitInfo[tid].nextPC, + fromCommit->commitInfo[tid].nextNPC, + fromCommit->commitInfo[tid].doneBlockID, + tid); + + // Create a new block, add it into list. + curBlockPtr = new Block(blockID, cpu); + curBlockPtr->setBlockListIt(cpu->addInstBlock(curBlockPtr)); + + // Also check if there's a mispredict that happened. + if (fromCommit->commitInfo[tid].branchMispredict) { + DPRINTF(EdgeFetch,"Inst block[Bid:%lli] commit, update BP. NPC = %#x, Exit =%i,\n", + fromCommit->commitInfo[tid].doneBlockID, + fromCommit->commitInfo[tid].nextPC, + fromCommit->commitInfo[tid].exitID); + branchPred.squash(fromCommit->commitInfo[tid].doneBlockID, 0, + fromCommit->commitInfo[tid].mispredPC, + fromCommit->commitInfo[tid].nextPC, + fromCommit->commitInfo[tid].exitType, + fromCommit->commitInfo[tid].exitID, + tid); + } else { + branchPred.squash(fromCommit->commitInfo[tid].doneBlockID, + tid); + } + + return true; + } else if (fromCommit->commitInfo[tid].doneBlockID + /*&& !fromCommit->commitInfo[tid].needSyscall*/) { + // If there's a committed block last cycle with no squash signal, + // it means we should wake fetch stage and fetch from the correct + // address. + + assert(fromCommit->commitInfo[tid].doneBlockID < blockID ); +#if 0 + PC[tid] = fromCommit->commitInfo[tid].nextPC; + nextPC[tid] = fromCommit->commitInfo[tid].nextNPC; + fetchStatus[tid] = Running; +#endif + if(!fromCommit->commitInfo[tid].branchMispredict) { + DPRINTF(EdgeFetch, "[Tid:%i] Inst block[Bid:%lli] commit, current fetch PC is" + "%#x\n", fromCommit->commitInfo[tid].doneBlockID, + tid, PC[tid]); + + branchPred.update(fromCommit->commitInfo[tid].doneBlockID, tid, + fromCommit->commitInfo[tid].blockPC, 0, + fromCommit->commitInfo[tid].nextPC, + fromCommit->commitInfo[tid].exitType, + fromCommit->commitInfo[tid].exitID); + } + } + + // Check ROB squash signals from commit. + if (fromCommit->commitInfo[tid].robSquashing) { + DPRINTF(EdgeFetch, "[tid:%u]: ROB is still squashing.\n", tid); + + // Continue to squash. + fetchStatus[tid] = Squashing; + + return true; + } + + // Check squash signals from decode. + if (fromMap->mapInfo[tid].squash) { + DPRINTF(EdgeFetch, "[tid:%u]: Squashing instructions due to squash " + "from map.\n",tid); +#if 0 + // Update the branch predictor. + if (fromMap->decodeInfo[tid].branchMispredict) { + branchPred.squash(fromDecode->decodeInfo[tid].doneSeqNum, + fromDecode->decodeInfo[tid].nextPC, + fromDecode->decodeInfo[tid].branchTaken, + tid); + } else { + branchPred.squash(fromDecode->decodeInfo[tid].doneSeqNum, + tid); + } +#endif + + if (fetchStatus[tid] != Squashing) { + + DPRINTF(EdgeFetch, "Squashing from map with PC = %#x.\n", + fromMap->mapInfo[tid].nextPC ); + // Squash unless we're already squashing + squashFromMap(fromMap->mapInfo[tid].nextPC, + fromMap->mapInfo[tid].nextPC + instSize, + fromMap->mapInfo[tid].doneBlockID, + tid); + + // Create a new block, add it into list. + curBlockPtr = new Block(blockID, cpu); + curBlockPtr->setBlockListIt(cpu->addInstBlock(curBlockPtr)); + + return true; + } + } + + // + // This if for Atomic-Edge only ... + // + if ( fetchStatus[tid] == Idle ) { + DPRINTF(EdgeFetch, "Still Idle ... \n"); + + return true; + + } + + if (checkStall(tid) && + fetchStatus[tid] != IcacheWaitResponse && + fetchStatus[tid] != IcacheWaitRetry) { + DPRINTF(EdgeFetch, "[tid:%i]: Setting to blocked\n",tid); + + fetchStatus[tid] = Blocked; + + return true; + } + + if (fetchStatus[tid] == Blocked || + fetchStatus[tid] == Squashing) { + // Switch status to running if fetch isn't being told to block or + // squash this cycle. + DPRINTF(EdgeFetch, "[tid:%i]: Done squashing, switching to running.\n", + tid); + + fetchStatus[tid] = Running; + + return true; + } + + // If we've reached this point, we have not gotten any signals that + // cause fetch to change its status. Fetch remains the same as before. + return false; +} + +template +void +SimpleEdgeFetch::fetch(bool &status_change) +{ + ////////////////////////////////////////// + // Start actual fetch + ////////////////////////////////////////// + ThreadID tid = getFetchingThread(fetchPolicy); + + if (tid == InvalidThreadID || drainPending) { + DPRINTF(EdgeFetch,"There are no more threads available to fetch from.\n"); + + // Breaks looping condition in tick() + threadFetched = numFetchingThreads; + return; + } + + DPRINTF(EdgeFetch, "Attempting to fetch from [tid:%i]\n", tid); + + // The current PC. + Addr fetch_PC = PC[tid]; + Addr fetch_NPC = nextPC[tid]; + + // Fault code for memory access. + Fault fault = NoFault; + + // If returning from the delay of a cache miss, then update the status + // to running, otherwise do the cache access. Possibly move this up + // to tick() function. + if (fetchStatus[tid] == IcacheAccessComplete) { + + DPRINTF(EdgeFetch, "[tid:%i]: Icache miss is complete.\n", + tid); + + fetchStatus[tid] = Running; + status_change = true; + + } else if (fetchStatus[tid] == Running) { + + DPRINTF(EdgeFetch, "[tid:%i]: Attempting to translate and read " + "instruction, starting at PC %08p.\n", + tid, fetch_PC); + + bool fetch_success = fetchCacheLine(fetch_PC, fault, tid); + if (!fetch_success) { + if (cacheBlocked) { + ++icacheStallCycles; + } else { + ++fetchMiscStallCycles; + } + return; + } + } else { + if (fetchStatus[tid] == Idle) { + ++fetchIdleCycles; + DPRINTF(EdgeFetch, "[tid:%i]: EdgeFetch is idle!\n", tid); + } else if (fetchStatus[tid] == Blocked) { + ++fetchBlockedCycles; + DPRINTF(EdgeFetch, "[tid:%i]: EdgeFetch is blocked!\n", tid); + } else if (fetchStatus[tid] == Squashing) { + ++fetchSquashCycles; + DPRINTF(EdgeFetch, "[tid:%i]: EdgeFetch is squashing!\n", tid); + } else if (fetchStatus[tid] == IcacheWaitResponse) { + ++icacheStallCycles; + DPRINTF(EdgeFetch, "[tid:%i]: EdgeFetch is waiting cache response!\n", tid); + } + + // Status is Idle, Squashing, Blocked, or IcacheWaitResponse, so + // fetch should do nothing. + return; + } + + ++fetchCycles; + + // If we had a stall due to an icache miss, then return. + if (fetchStatus[tid] == IcacheWaitResponse) { + ++icacheStallCycles; + status_change = true; + return; + } + + Addr next_PC = fetch_PC; + Addr next_NPC = fetch_NPC; + + InstSeqNum inst_seq; + MachInst inst; + ExtMachInst ext_inst; + + // @todo: Fix this hack. + unsigned offset = (fetch_PC & cacheBlkMask) & ~3; + unsigned chunk_offset = (fetch_PC & chunkOffset ) & ~3; + + EDGEStaticInstPtr staticInst = NULL; + + if (fault == NoFault) { + + DPRINTF(EdgeFetch, "[tid:%i]: Adding instructions to queue to " + "map.\n",tid); + + // Need to keep track of whether or not a predicted branch + // ended this fetch block. + //bool predicted_branch = false; + + // + // Currently maybe I should use fetchWidth as a chunk identifier + // + while (offset < cacheBlkSize && + numInst < fetchWidth && + chunk_offset < chunkSize ) { + + // Make sure this is a valid index. + assert(offset <= cacheBlkSize - instSize); + + // Get the instruction from the array of the cache line. + inst = TheISA::gtoh(*reinterpret_cast + (&cacheData[tid][offset])); + + predecoder.setTC(cpu->thread[tid]->getTC()); + predecoder.moreBytes(fetch_PC, fetch_PC, inst); + + ext_inst = predecoder.getExtMachInst(); + + DynInstPtr instruction; + + // Get a sequence number. + inst_seq = cpu->getAndIncrementInstSeq(); + + if (edgeFetchStatus[tid] == Head ) { + + constructHeader ( ext_inst, chunk_offset, tid ); + + // Replace the head nibble to 0 to avoid false decoding from + // some resemble insts to nop. + replaceBits(ext_inst, 31, 28, 0); + + staticInst = EDGEStaticInstPtr(ext_inst, fetch_PC, TheISA::Head); + + // Create a new DynInst from the instruction fetched. + instruction = new DynInst(staticInst, + fetch_PC, fetch_NPC, + next_PC, next_NPC, + inst_seq, cpu, TheISA::Head); + + } else if (edgeFetchStatus[tid] == Normal ) { + + staticInst = EDGEStaticInstPtr(ext_inst, fetch_PC, TheISA::Normal); + + // Create a new DynInst from the instruction fetched. + instruction = new DynInst(staticInst, + fetch_PC, fetch_NPC, + next_PC, next_NPC, + inst_seq, cpu, TheISA::Normal); + + } else { // Unrecogonized status + + panic("Unknown edgeFetchStatus"); + + } + + instruction->setEDGEInstStatus(instID, chunkID, blockID); + + instruction->setTid(tid); + + instruction->setASID(tid); + + instruction->setThreadState(cpu->thread[tid]); + + instruction->setBlockPtr(curBlockPtr); + + if(instruction->isNop()){ + fetchedNopInsts ++; + } + + if (instruction->isMove()){ + fetchedMovInsts ++; + } + +#if TRACING_ON + instruction->traceData = + cpu->getEdgeTracer()->getEdgeInstRecord(curTick, cpu->tcBase(tid), + instruction->staticInst, instruction->readPC()); +#else + instruction->traceData = NULL; +#endif + + // No branch operations inside blocks, update PC directly. + // next_PC is actually current fetch PC until this updating. + next_PC = next_NPC; + next_NPC = next_NPC + instSize; + + // Add instruction to the current block's list of instructions. + //instruction->setInstListIt(cpu->addInst(instruction)); + instruction->setInstListIt(curBlockPtr->addInst(instruction)); + + // Increment stat of fetched instructions. + ++fetchedInsts; + + if (instruction->isQuiesce()) { + DPRINTF(EdgeFetch, "Quiesce instruction encountered, halting fetch!", + curTick); + fetchStatus[tid] = QuiescePending; + ++numInst; + status_change = true; + break; + } + + // Move to the next instruction. + fetch_PC = next_PC; + fetch_NPC = next_NPC; + + ++numInst; + offset += instSize; + chunk_offset += instSize; + instID ++; + + }// End of the fetch loop + + DPRINTF(EdgeFetch,"chunk_offset = %d, chunkSize = %d, instNum = %d.\n", + chunk_offset, chunkSize,curBlockPtr->getNumInst()); + + if ( chunk_offset >= chunkSize ) { // A inst chunk has been fetched. + + if ( edgeFetchStatus[tid] == Head ) { // Head chunk, pack it. + + DPRINTF(EdgeFetch, "EDGE status change from Head to Normal\n"); + + edgeFetchStatus[tid] = Normal; + + curBlockPtr->setStartPC( (fetch_PC - instSize) & TheISA::ChunkMask ); + curBlockPtr->setTid(tid); + + if ( !curBlockPtr->depackHeader(headerInfo)) { + DPRINTF(EdgeFetch, "Not header of a inst block, go to idle status until commit stage tell" + " us to squash.\n"); + + fetchStatus[tid] = Idle; + status_change = true; + curBlockPtr->removeAllInsts(); + + fetchMisAlignedTimes ++; + } + //curBlockPtr->setBlockListIt(cpu->addInstBlock(curBlockPtr)); + clearHeaderInfo(); + chunkID++; + + fetchedChunks ++; + + } else if ( edgeFetchStatus[tid] == Normal ) { // Body chunk, check for block end. + + chunkID ++; + + fetchedChunks ++; + + DPRINTF(EdgeFetch,"chunkID = %d, chunkNum =%d.\n", + chunkID - 1, curBlockPtr->getChunkNum()); + + if ( chunkID > (curBlockPtr->getChunkNum()) ) { + + DPRINTF(EdgeFetch, "EDGE status change from Normal to Head. " + "chunkID = %d, chunkNum = %d\n", chunkID - 1, curBlockPtr->getChunkNum() ); + + // Branch predictor takes place here ... + Addr pred_NPC = curBlockPtr ->getStartPC(); + BlockID oldest_blockID = cpu->readHeadInstBlockID(tid); + ExitID pred_exitID = branchPred.predict(oldest_blockID, + curBlockPtr, + pred_NPC, + 0, + tid); + curBlockPtr->setPredInfo(pred_exitID,pred_NPC); + + // Update next_PC and next_NPC + next_PC = pred_NPC; + next_NPC = next_PC + instSize; + + DPRINTF(EdgeFetch, "Predicted exit[%i], predicted target @%#x.\n", + pred_exitID, pred_NPC ); + + edgeFetchStatus[tid] = Head; + + chunkID = 0; + instID = 0; + blockID++; + + fetchedBlocks ++; + + toMap->instBlocks[0] = curBlockPtr; + toMap->size++; + + curBlockPtr = new Block(blockID, cpu); + curBlockPtr->setBlockListIt(cpu->addInstBlock(curBlockPtr)); +#if 0 + if ( fromCommit->commitInfo[tid].doneBlockID == 0) { + + DPRINTF(EdgeFetch, "No inst block has been committed." + "Waiting for inst commit. \nSetting fetch status to Idle\n"); + + fetchStatus[tid] = Idle; + status_change = true; + } +#endif + } + } + } + + if (chunk_offset >= chunkSize ) { + DPRINTF(EdgeFetch, "[tid:%i]: Done Running, reached inst chunk end \n", tid); + } else if (numInst >= fetchWidth) { + DPRINTF(EdgeFetch, "[tid:%i]: Done Running, reached fetch bandwidth " + "for this cycle.\n", tid); + } else if (offset >= cacheBlkSize) { + DPRINTF(EdgeFetch, "[tid:%i]: Done Running, reached the end of cache " + "block.\n", tid); + } + } + + if (numInst > 0) { + wroteToTimeBuffer = true; + } + + // Now that Running is completed, update the PC to signify what the next + // cycle will be. + if (fault == NoFault ) { + if ( fetchStatus[tid] != Idle ) { + PC[tid] = next_PC; + nextPC[tid] = next_NPC; + DPRINTF(EdgeFetch, "[tid:%i]: Setting PC to %08p.\n", tid, next_PC); + } + + } else { + + DPRINTF(EdgeFetch, "[tid:%i]: Fault detected to %o8p.\n", tid, next_PC); + DPRINTF(EdgeFetch, "Inst block[Bid:%lli] send to map with fault directly.", + curBlockPtr->getBlockID()); + + edgeFetchStatus[tid] = Head; + clearHeaderInfo(); + + chunkID = 0; + instID = 0; + blockID ++; + + //Clear insts in block and make it a nop inst block + curBlockPtr->removeAllInsts(); + curBlockPtr->setNop(); + + fetchedNopBlocks ++; + + //Set fault to block so it can be handled when commit + curBlockPtr->setFault(fault); + + toMap->instBlocks[0] = curBlockPtr; + toMap->size++; + + curBlockPtr = new Block(blockID, cpu); + curBlockPtr->setBlockListIt(cpu->addInstBlock(curBlockPtr)); + + //fetchStatus[tid] = TrapPending; + fetchStatus[tid] = Idle; + status_change = true; + + } +} + +template +void +SimpleEdgeFetch::recvRetry() +{ + if (retryPkt != NULL) { + assert(cacheBlocked); + assert(retryTid != InvalidThreadID); + assert(fetchStatus[retryTid] == IcacheWaitRetry); + + if (icachePort->sendTiming(retryPkt)) { + fetchStatus[retryTid] = IcacheWaitResponse; + retryPkt = NULL; + retryTid = InvalidThreadID; + cacheBlocked = false; + } + } else { + assert(retryTid == InvalidThreadID); + // Access has been squashed since it was sent out. Just clear + // the cache being blocked. + cacheBlocked = false; + } +} + +/////////////////////////////////////// +// // +// SMT FETCH POLICY MAINTAINED HERE // +// // +/////////////////////////////////////// +template +ThreadID +SimpleEdgeFetch::getFetchingThread(FetchPriority &fetch_priority) +{ + if (numThreads > 1) { + switch (fetch_priority) { + + case SingleThread: + return 0; + + case RoundRobin: + return roundRobin(); + + case IQ: + return iqCount(); + + case LSQ: + return lsqCount(); + + case Branch: + return branchCount(); + + default: + return InvalidThreadID; + } + } else { + list::iterator thread = activeThreads->begin(); + if (thread == activeThreads->end()) { + return InvalidThreadID; + } + + ThreadID tid = *thread; + + if (fetchStatus[tid] == Running || + fetchStatus[tid] == IcacheAccessComplete + /*fetchStatus[tid] == Idle*/) { + return tid; + } else { + return InvalidThreadID; + } + } +} + + +template +ThreadID +SimpleEdgeFetch::roundRobin() +{ + list::iterator pri_iter = priorityList.begin(); + list::iterator end = priorityList.end(); + + ThreadID high_pri; + + while (pri_iter != end) { + high_pri = *pri_iter; + + assert(high_pri <= numThreads); + + if (fetchStatus[high_pri] == Running || + fetchStatus[high_pri] == IcacheAccessComplete || + fetchStatus[high_pri] == Idle) { + + priorityList.erase(pri_iter); + priorityList.push_back(high_pri); + + return high_pri; + } + + pri_iter++; + } + + return InvalidThreadID; +} + +template +ThreadID +SimpleEdgeFetch::iqCount() +{ + std::priority_queue PQ; + + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + PQ.push(fromExecute->executeInfo[tid].iqCount); + } + + while (!PQ.empty()) { + ThreadID high_pri = PQ.top(); + + if (fetchStatus[high_pri] == Running || + fetchStatus[high_pri] == IcacheAccessComplete || + fetchStatus[high_pri] == Idle) + return high_pri; + else + PQ.pop(); + + } + + return InvalidThreadID; +} + +template +ThreadID +SimpleEdgeFetch::lsqCount() +{ + std::priority_queue PQ; + + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + PQ.push(fromExecute->executeInfo[tid].ldstqCount); + } + + while (!PQ.empty()) { + ThreadID high_pri = PQ.top(); + + if (fetchStatus[high_pri] == Running || + fetchStatus[high_pri] == IcacheAccessComplete || + fetchStatus[high_pri] == Idle) + return high_pri; + else + PQ.pop(); + } + + return InvalidThreadID; +} + +template +ThreadID +SimpleEdgeFetch::branchCount() +{ +#if 0 + list::iterator thread = activeThreads->begin(); + assert(thread != activeThreads->end()); + ThreadID tid = *thread; +#endif + + panic("Branch Count Fetch policy unimplemented\n"); + return InvalidThreadID; +} + +#if 0 +template +void +SimpleEdgeFetch::depackHeaderInfo() +{ + uint32_t mark = bits(headerInfo[3], 31, 24 ); + uint32_t type = bits(headerInfo[3], 23, 16); + uint32_t xflags = bits( headerInfo[3], 15, 8 ); + uint32_t smask = headerInfo[1]; + chunkNum = ( type == 0 ) ? 4 : type; + // Currently do nothing but dprint it + DPRINTF (EdgeFetchTest, "Header MARK : 0x%x\nHeader TYPE : 0x%x\nHeader XFLAGS : 0x%x\n" + "Header SMASK : 0x%x\n", mark, type, xflags, smask); + if ( mark != 0xff ) + panic ("Wrong header!\n"); +} +#endif + diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/fu_pool.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/fu_pool.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#ifndef __CPU_O3_FU_POOL_HH__ +#define __CPU_O3_FU_POOL_HH__ + +#include +#include +#include +#include + +#include "base/sched_list.hh" +#include "cpu/op_class.hh" +#include "params/FUPool.hh" +#include "sim/sim_object.hh" + +class FUDesc; +class FuncUnit; + +/** + * Pool of FU's, specific to the new CPU model. The old FU pool had lists of + * free units and busy units, and whenever a FU was needed it would iterate + * through the free units to find a FU that provided the capability. This pool + * has lists of units specific to each of the capabilities, and whenever a FU + * is needed, it iterates through that list to find a free unit. The previous + * FU pool would have to be ticked each cycle to update which units became + * free. This FU pool lets the IEW stage handle freeing units, which frees + * them as their scheduled execution events complete. This limits units in this + * model to either have identical issue and op latencies, or 1 cycle issue + * latencies. + */ +class FUPool : public SimObject +{ + private: + /** Maximum op execution latencies, per op class. */ + unsigned maxOpLatencies[Num_OpClasses]; + /** Maximum issue latencies, per op class. */ + unsigned maxIssueLatencies[Num_OpClasses]; + + /** Bitvector listing capabilities of this FU pool. */ + std::bitset capabilityList; + + /** Bitvector listing which FUs are busy. */ + std::vector unitBusy; + + /** List of units to be freed at the end of this cycle. */ + std::vector unitsToBeFreed; + + /** + * Class that implements a circular queue to hold FU indices. The hope is + * that FUs that have been just used will be moved to the end of the queue + * by iterating through it, thus leaving free units at the head of the + * queue. + */ + class FUIdxQueue { + public: + /** Constructs a circular queue of FU indices. */ + FUIdxQueue() + : idx(0), size(0) + { } + + /** Adds a FU to the queue. */ + inline void addFU(int fu_idx); + + /** Returns the index of the FU at the head of the queue, and changes + * the index to the next element. + */ + inline int getFU(); + + private: + /** Circular queue index. */ + int idx; + + /** Size of the queue. */ + int size; + + /** Queue of FU indices. */ + std::vector funcUnitsIdx; + }; + + /** Per op class queues of FUs that provide that capability. */ + FUIdxQueue fuPerCapList[Num_OpClasses]; + + /** Number of FUs. */ + int numFU; + + /** Functional units. */ + std::vector funcUnits; + + typedef std::vector::iterator fuListIterator; + + public: + typedef FUPoolParams Params; + /** Constructs a FU pool. */ + FUPool(const Params *p); + ~FUPool(); + + /** Annotates units that provide memory operations. Included only because + * old FU pool provided this function. + */ + void annotateMemoryUnits(unsigned hit_latency); + + /** + * Gets a FU providing the requested capability. Will mark the unit as busy, + * but leaves the freeing of the unit up to the IEW stage. + * @param capability The capability requested. + * @return Returns -2 if the FU pool does not have the capability, -1 if + * there is no free FU, and the FU's index otherwise. + */ + int getUnit(OpClass capability); + + /** Frees a FU at the end of this cycle. */ + void freeUnitNextCycle(int fu_idx); + + /** Frees all FUs on the list. */ + void processFreeUnits(); + + /** Returns the total number of FUs. */ + int size() { return numFU; } + + /** Debugging function used to dump FU information. */ + void dump(); + + /** Returns the operation execution latency of the given capability. */ + unsigned getOpLatency(OpClass capability) { + return maxOpLatencies[capability]; + } + + /** Returns the issue latency of the given capability. */ + unsigned getIssueLatency(OpClass capability) { + return maxIssueLatencies[capability]; + } + + /** Switches out functional unit pool. */ + void switchOut(); + + /** Takes over from another CPU's thread. */ + void takeOverFrom(); +}; + +#endif // __CPU_O3_FU_POOL_HH__ diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/fu_pool.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/fu_pool.cc Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,282 @@ +/* + * Copyright (c) 2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#include + +#include "cpu/edge/fu_pool.hh" +#include "cpu/func_unit.hh" + +using namespace std; + +//////////////////////////////////////////////////////////////////////////// +// +// A pool of function units +// + +inline void +FUPool::FUIdxQueue::addFU(int fu_idx) +{ + funcUnitsIdx.push_back(fu_idx); + ++size; +} + +inline int +FUPool::FUIdxQueue::getFU() +{ + int retval = funcUnitsIdx[idx++]; + + if (idx == size) + idx = 0; + + return retval; +} + +FUPool::~FUPool() +{ + fuListIterator i = funcUnits.begin(); + fuListIterator end = funcUnits.end(); + for (; i != end; ++i) + delete *i; +} + + +// Constructor +FUPool::FUPool(const Params *p) + : SimObject(p) +{ + numFU = 0; + + funcUnits.clear(); + + for (int i = 0; i < Num_OpClasses; ++i) { + maxOpLatencies[i] = 0; + maxIssueLatencies[i] = 0; + } + + // + // Iterate through the list of FUDescData structures + // + const vector ¶mList = p->FUList; + for (FUDDiterator i = paramList.begin(); i != paramList.end(); ++i) { + + // + // Don't bother with this if we're not going to create any FU's + // + if ((*i)->number) { + // + // Create the FuncUnit object from this structure + // - add the capabilities listed in the FU's operation + // description + // + // We create the first unit, then duplicate it as needed + // + FuncUnit *fu = new FuncUnit; + + OPDDiterator j = (*i)->opDescList.begin(); + OPDDiterator end = (*i)->opDescList.end(); + for (; j != end; ++j) { + // indicate that this pool has this capability + capabilityList.set((*j)->opClass); + + // Add each of the FU's that will have this capability to the + // appropriate queue. + for (int k = 0; k < (*i)->number; ++k) + fuPerCapList[(*j)->opClass].addFU(numFU + k); + + // indicate that this FU has the capability + fu->addCapability((*j)->opClass, (*j)->opLat, (*j)->issueLat); + + if ((*j)->opLat > maxOpLatencies[(*j)->opClass]) + maxOpLatencies[(*j)->opClass] = (*j)->opLat; + + if ((*j)->issueLat > maxIssueLatencies[(*j)->opClass]) + maxIssueLatencies[(*j)->opClass] = (*j)->issueLat; + } + + numFU++; + + // Add the appropriate number of copies of this FU to the list + ostringstream s; + + s << (*i)->name() << "(0)"; + fu->name = s.str(); + funcUnits.push_back(fu); + + for (int c = 1; c < (*i)->number; ++c) { + ostringstream s; + numFU++; + FuncUnit *fu2 = new FuncUnit(*fu); + + s << (*i)->name() << "(" << c << ")"; + fu2->name = s.str(); + funcUnits.push_back(fu2); + } + } + } + + unitBusy.resize(numFU); + + for (int i = 0; i < numFU; i++) { + unitBusy[i] = false; + } +} + +void +FUPool::annotateMemoryUnits(unsigned hit_latency) +{ + maxOpLatencies[MemReadOp] = hit_latency; + + fuListIterator i = funcUnits.begin(); + fuListIterator iend = funcUnits.end(); + for (; i != iend; ++i) { + if ((*i)->provides(MemReadOp)) + (*i)->opLatency(MemReadOp) = hit_latency; + + if ((*i)->provides(MemWriteOp)) + (*i)->opLatency(MemWriteOp) = hit_latency; + } +} + +int +FUPool::getUnit(OpClass capability) +{ + // If this pool doesn't have the specified capability, + // return this information to the caller + if (!capabilityList[capability]) + return -2; + + int fu_idx = fuPerCapList[capability].getFU(); + int start_idx = fu_idx; + + // Iterate through the circular queue if needed, stopping if we've reached + // the first element again. + while (unitBusy[fu_idx]) { + fu_idx = fuPerCapList[capability].getFU(); + if (fu_idx == start_idx) { + // No FU available + return -1; + } + } + + assert(fu_idx < numFU); + + unitBusy[fu_idx] = true; + + return fu_idx; +} + +void +FUPool::freeUnitNextCycle(int fu_idx) +{ + assert(unitBusy[fu_idx]); + unitsToBeFreed.push_back(fu_idx); +} + +void +FUPool::processFreeUnits() +{ + while (!unitsToBeFreed.empty()) { + int fu_idx = unitsToBeFreed.back(); + unitsToBeFreed.pop_back(); + + assert(unitBusy[fu_idx]); + + unitBusy[fu_idx] = false; + } +} + +void +FUPool::dump() +{ + cout << "Function Unit Pool (" << name() << ")\n"; + cout << "======================================\n"; + cout << "Free List:\n"; + + for (int i = 0; i < numFU; ++i) { + if (unitBusy[i]) { + continue; + } + + cout << " [" << i << "] : "; + + cout << funcUnits[i]->name << " "; + + cout << "\n"; + } + + cout << "======================================\n"; + cout << "Busy List:\n"; + for (int i = 0; i < numFU; ++i) { + if (!unitBusy[i]) { + continue; + } + + cout << " [" << i << "] : "; + + cout << funcUnits[i]->name << " "; + + cout << "\n"; + } +} + +void +FUPool::switchOut() +{ +} + +void +FUPool::takeOverFrom() +{ + for (int i = 0; i < numFU; i++) { + unitBusy[i] = false; + } + unitsToBeFreed.clear(); +} + +// + +//////////////////////////////////////////////////////////////////////////// +// +// The SimObjects we use to get the FU information into the simulator +// +//////////////////////////////////////////////////////////////////////////// + +// +// FUPool - Contails a list of FUDesc objects to make available +// + +// +// The FuPool object +// +FUPool * +FUPoolParams::create() +{ + return new FUPool(this); +} diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/global_regfile.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/global_regfile.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,209 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#ifndef __CPU_EDGE_REGFILE_HH__ +#define __CPU_EDGE_REGFILE_HH__ + +#include + +#include "arch/isa_traits.hh" +#include "arch/types.hh" +#include "base/trace.hh" +#include "config/full_system.hh" +#include "config/the_isa.hh" +#include "cpu/edge/comm.hh" + +#if FULL_SYSTEM +#include "arch/kernel_stats.hh" +#endif + +/** + * Simple physical register file class. + * Right now this is specific to Alpha until we decide if/how to make things + * generic enough to support other ISAs. + */ +template +class EdgePhysRegFile +{ + protected: + typedef TheISA::IntReg IntReg; + typedef TheISA::FloatReg FloatReg; + typedef TheISA::FloatRegBits FloatRegBits; + + typedef union { + FloatReg d; + FloatRegBits q; + } PhysFloatReg; + + // Note that most of the definitions of the IntReg, FloatReg, etc. exist + // within the Impl/ISA class and not within this PhysRegFile class. + + // Will make these registers public for now, but they probably should + // be private eventually with some accessor functions. + public: + typedef typename Impl::CPU CPU; + + /** + * Constructs a physical register file with the specified amount of + * integer and floating point registers. + */ + EdgePhysRegFile(CPU *_cpu, unsigned _numPhysicalIntRegs, + unsigned _numPhysicalFloatRegs); + + //Everything below should be pretty well identical to the normal + //register file that exists within AlphaISA class. + //The duplication is unfortunate but it's better than having + //different ways to access certain registers. + + /** Reads an integer register. */ + uint64_t readIntReg(PhysRegIndex reg_idx) + { + assert(reg_idx < numPhysicalIntRegs); + + DPRINTF(EdgeReg, "RegFile: Access to int register %i, has data " + "%#x\n", int(reg_idx), intRegFile[reg_idx]); + return intRegFile[reg_idx]; + } + + /** Reads a floating point register (double precision). */ + FloatReg readFloatReg(PhysRegIndex reg_idx) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + + FloatReg floatReg = floatRegFile[reg_idx].d; + + DPRINTF(EdgeReg, "RegFile: Access to float register %i, has " + "data %#x\n", int(reg_idx), floatRegFile[reg_idx].q); + + return floatReg; + } + + FloatRegBits readFloatRegBits(PhysRegIndex reg_idx) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + assert(reg_idx < numPhysicalFloatRegs + numPhysicalIntRegs); + + FloatRegBits floatRegBits = floatRegFile[reg_idx].q; + + DPRINTF(EdgeReg, "RegFile: Access to float register %i as int, " + "has data %#x\n", int(reg_idx), (uint64_t)floatRegBits); + + return floatRegBits; + } + + /** Sets an integer register to the given value. */ + void setIntReg(PhysRegIndex reg_idx, uint64_t val) + { + assert(reg_idx < numPhysicalIntRegs); + + DPRINTF(EdgeReg, "RegFile: Setting int register %i to %#x\n", + int(reg_idx), val); + + if (reg_idx != TheISA::ZeroReg) + intRegFile[reg_idx] = val; + } + + /** Sets a double precision floating point register to the given value. */ + void setFloatReg(PhysRegIndex reg_idx, FloatReg val) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + assert(reg_idx < numPhysicalFloatRegs); + + DPRINTF(EdgeReg, "RegFile: Setting float register %i to %#x\n", + int(reg_idx), (uint64_t)val); + +#if THE_ISA == ALPHA_ISA + if (reg_idx != TheISA::ZeroReg) +#endif + floatRegFile[reg_idx].d = val; + } + + void setFloatRegBits(PhysRegIndex reg_idx, FloatRegBits val) + { + // Remove the base Float reg dependency. + reg_idx = reg_idx - numPhysicalIntRegs; + + assert(reg_idx < numPhysicalFloatRegs); + + DPRINTF(EdgeReg, "RegFile: Setting float register %i to %#x\n", + int(reg_idx), (uint64_t)val); + + floatRegFile[reg_idx].q = val; + } + + public: + /** (signed) integer register file. */ + IntReg *intRegFile; + + /** Floating point register file. */ + PhysFloatReg *floatRegFile; + +#if FULL_SYSTEM + private: + int intrflag; // interrupt flag +#endif + + private: + /** CPU pointer. */ + CPU *cpu; + + public: + /** Number of physical integer registers. */ + unsigned numPhysicalIntRegs; + /** Number of physical floating point registers. */ + unsigned numPhysicalFloatRegs; +}; + +template +EdgePhysRegFile::EdgePhysRegFile(CPU *_cpu, unsigned _numPhysicalIntRegs, + unsigned _numPhysicalFloatRegs) + : cpu(_cpu), numPhysicalIntRegs(_numPhysicalIntRegs), + numPhysicalFloatRegs(_numPhysicalFloatRegs) +{ + intRegFile = new IntReg[numPhysicalIntRegs]; + floatRegFile = new PhysFloatReg[numPhysicalFloatRegs]; + + memset(intRegFile, 0, sizeof(IntReg) * numPhysicalIntRegs); + memset(floatRegFile, 0, sizeof(PhysFloatReg) * numPhysicalFloatRegs); +} + +#endif diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/impl.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/impl.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#ifndef __CPU_EDGE_IMPL_HH__ +#define __CPU_EDGE_IMPL_HH__ + +#include "arch/isa_traits.hh" +#include "config/the_isa.hh" +#include "cpu/edge/cpu_policy.hh" +#include "cpu/edge/dyn_inst.hh" +#include "cpu/edge/block.hh" + + +// Forward declarations. +//template +//class SimpleEdgeBlock; + +//template +//class SimpleEdgeDynInst; + +template +class EdgeCPU; + +struct EdgeCPUImpl +{ + typedef TheISA::MachInst MachInst; + typedef SimpleEdgeCPUPolicy CPUPol; + typedef SimpleEdgeDynInst DynInst; + typedef SimpleEdgeBlock EdgeBlock; + typedef RefCountingPtr DynInstPtr; + typedef RefCountingPtr EdgeBlockPtr; + typedef EdgeCPU CPU; + typedef CPU CPUType; + + enum { + MaxThreads = 4, + MaxFetchWidth = 1, // This is the max number of inst blocks that can be fetched. + MaxCompleteWidth = 8, // This is the max number of inst blocks that can be completed each cycle. + MaxIssueWidth = 16, // This is the max number of insts can be issued each cycle. + MaxFrameNum = 8, + MaxSlotNum = 8, + MaxByteNum = 8, + MaxInFlightBlockNum = 8 + }; +}; + +#endif // __CPU_EDGE_SPARC_IMPL_HH__ diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/inst_queue.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/inst_queue.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,553 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#ifndef __CPU_EDGE_INST_QUEUE_HH__ +#define __CPU_EDGE_INST_QUEUE_HH__ + +#include +#include +#include +#include +#include + +#include "base/statistics.hh" +#include "base/timebuf.hh" +#include "base/types.hh" +#include "cpu/inst_seq.hh" +#include "cpu/op_class.hh" +#include "sim/eventq.hh" +#include "cpu/edge/cpu.hh" +#include "cpu/edge/dep_graph.hh" + +class SimpleEdgeCPUParams; +class FUPool; +//class MemInterface; + +/** + * This is a IQ working in data flow execution style. + * @todo: Add comments here to make clear of how this IQ works. + * @todo: Make IQ able to handle multiple FU pools. + */ +template +class InstructionQueue +{ + public: + //Typedefs from the Impl. + typedef typename Impl::CPU CPU; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::EdgeBlockPtr BlockPtr; + typedef typename Impl::CPUPol::Execute Execute; + typedef typename Impl::CPUPol::MemDepUnit MemDepUnit; + typedef typename Impl::CPUPol::Issue2Execute Issue2Execute; + typedef typename Impl::CPUPol::TimeStruct TimeStruct; + + // Typedef of iterator through the list of instructions. + typedef typename std::list::iterator ListIt; + typedef typename std::deque::iterator QueueIt; + + // Typedef of ISA specific types. + typedef TheISA::BlockID BlockID; + typedef TheISA::InstID InstID; + typedef TheISA::ConsumerID ConsumerID; + typedef TheISA::ConsumerType ConsumerType; + typedef TheISA::ConsumerSubType ConsumerSubType; + + typedef TheISA::OpSize OpSize; + + friend class Impl::CPU; + + /** FU completion event class. */ + class FUCompletion : public Event { + private: + /** Executing instruction. */ + DynInstPtr inst; + + /** Index of the FU used for executing. */ + int fuIdx; + + /** Pointer back to the instruction queue. */ + InstructionQueue *iqPtr; + + /** Should the FU be added to the list to be freed upon + * completing this event. + */ + bool freeFU; + + public: + /** Construct a FU completion event. */ + FUCompletion(DynInstPtr &_inst, int fu_idx, + InstructionQueue *iq_ptr); + + virtual void process(); + virtual const char *description() const; + void setFreeFU() { freeFU = true; } + }; + + /** Constructs an IQ. */ + InstructionQueue(CPU *cpu_ptr, Execute* execute, SimpleEdgeCPUParams *params); + + /** Destructs the IQ. */ + ~InstructionQueue(); + + /** Returns the name of the IQ. */ + std::string name() const; + + /** Registers statistics. */ + void regStats(); + + /** Resets all instruction queue state. */ + void resetState(); + + /** Sets active threads list. */ + void setActiveThreads(std::list *at_ptr); + + /** Sets the timer buffer between issue and execute. */ + void setIssueToExecuteQueue(TimeBuffer *i2eQueue); + + /** Sets the global time buffer. */ + void setTimeBuffer(TimeBuffer *tb_ptr); + + /** Switches out the instruction queue. */ + void switchOut(); + + /** Takes over execution from another CPU's thread. */ + void takeOverFrom(); + + /** Returns if the IQ is switched out. */ + bool isSwitchedOut() { return switchedOut; } + + /** Number of entries needed for given amount of threads. */ + int entryAmount(ThreadID num_threads); + + /** Resets max entries for all threads. */ + void resetEntries(); + + /** Returns total number of free entries. */ + unsigned numFreeEntries(); + + /** Returns number of free entries for a thread. */ + unsigned numFreeEntries(ThreadID tid); + + /** Returns whether or not the IQ is full. */ + bool isFull(); + + /** Returns whether or not the IQ is full for a specific thread. */ + bool isFull(ThreadID tid); + + /** Returns if there are any ready instructions in the IQ. */ + bool hasReadyInsts(); + + /** Returns the LSID of the last fetched store. */ + TheISA::LsID getLastFetchedStore(){ return lastFetchedStore;} + + /** Inserts a new instruction into the IQ. */ + void insert(DynInstPtr &new_inst); + + /** Builds mem inst dependent. */ + void buildMemDependent( DynInstPtr &mem_inst); + + /** Inserts a new, non-speculative instruction into the IQ. */ + void insertNonSpec(DynInstPtr &new_inst); + + /** Inserts a memory or write barrier into the IQ to make sure + * loads and stores are ordered properly. + */ + void insertBarrier(DynInstPtr &barr_inst); + + /** Returns the oldest scheduled instruction, and removes it from + * the list of instructions waiting to execute. + */ + DynInstPtr getInstToExecute(); + + /** + * Records the instruction as the producer of a register without + * adding it to the rest of the IQ. + * @todo: Delete this method due to its unability of supporting data flow + * execution. + */ + void recordProducer(DynInstPtr &inst) + { addToProducers(inst); } + + /** Process FU completion event. */ + void processFUCompletion(DynInstPtr &inst, int fu_idx); + + /** + * Schedules ready instructions, adding the ready ones (oldest first) to + * the queue to execute. + */ + void scheduleReadyInsts(); + + /** Schedules a single specific non-speculative instruction. */ + void scheduleNonSpec(const InstSeqNum &inst); + + /** + * Commits all instructions up to and including the given sequence number, + * for a specific thread and a specific block. + */ + void commit(const TheISA::BlockID &commit_bid, ThreadID tid = 0); + + /** + * Write back register write instructions in one specific inst block for a specific thread. + */ + void writeBack(const TheISA::BlockID &inst_block, ThreadID tid = 0 ); + + /** + * Mark instructions belongs to a specific inst block as block-completed. + */ + void complete( BlockPtr & inst_block ); + + /** Complete reg-write inst in reg dep graph. */ + void completeInRegDepGraph(DynInstPtr &inst); + + /** Wakes all dependents of a completed instruction. + * This is handled in an data flow execution style. + */ + int wakeDependents(DynInstPtr &completed_inst); + + /** Adds a ready memory instruction to the ready list. */ + void addReadyMemInst(DynInstPtr &ready_inst); + + /** + * Reschedules a memory instruction. It will be ready to issue once + * replayMemInst() is called. + */ + void rescheduleMemInst(DynInstPtr &resched_inst); + + /** Replays a memory instruction. It must be rescheduled first. + * @todo: Figure out if this method should be reserved or not. + */ + void replayMemInst(DynInstPtr &replay_inst); + + /** Moves an instruction to the ready queue if it is ready. */ + void addIfReady(DynInstPtr &inst); + + /** Completes a memory operation. */ + void completeMemInst(DynInstPtr &completed_inst); + + /** Indicates an ordering violation between a store and a load. */ + void violation(DynInstPtr &store, DynInstPtr &faulting_load); + + /** + * Squashes instructions for a thread. Squashing information is obtained + * from the time buffer. + */ + void squash(ThreadID tid); + + /** Returns the number of used entries for a thread. */ + unsigned getCount(ThreadID tid) { return count[tid]; }; + + /** Debug function to print all instructions. */ + void printInsts(); + + private: + /** Does the actual squashing. */ + void doSquash(ThreadID tid); + + ///////////////////////// + // Various pointers + ///////////////////////// + + /** Pointer to the CPU. */ + CPU *cpu; + + /** + * Cache interface. + * @todo: Figure out if we can delete this. + */ + //MemInterface *dcacheInterface; + + /**Pointer to Execute stage.*/ + Execute *executeStage; + + /** The memory dependence unit, which tracks/predicts memory dependences + * between instructions. + */ + MemDepUnit memDepUnit[Impl::MaxThreads]; + + /** The queue to the execute stage. Issued instructions will be written + * into it. + */ + TimeBuffer *issueToExecuteQueue; + + /** The backwards time buffer. */ + TimeBuffer *timeBuffer; + + /** Wire to read information from timebuffer. */ + typename TimeBuffer::wire fromCommit; + + /** Function unit pool. */ + FUPool *fuPool; + + ////////////////////////////////////// + // Instruction queues, ready queues, and ordering + ////////////////////////////////////// + + /** List of all the instructions in the IQ (some of which may be issued). */ + std::deque instQueue[Impl::MaxThreads][Impl::MaxFrameNum]; + + /** Put write inst iterators here for write-back. */ + std::list writeList[Impl::MaxThreads]; + + /** List of instructions that are ready to be executed. */ + std::list instsToExecute; + + /** + * Struct for comparing entries to be added to the priority queue. + * This gives reverse ordering to the instructions in terms of + * sequence numbers: the instructions with smaller sequence + * numbers (and hence are older) will be at the top of the + * priority queue. + */ + struct pqCompare { + bool operator() (const DynInstPtr &lhs, const DynInstPtr &rhs) const + { + return lhs->seqNum > rhs->seqNum; + } + }; + + typedef std::priority_queue, pqCompare> + ReadyInstQueue; + + /** List of ready instructions, per op class. They are separated by op + * class to allow for easy mapping to FUs. + */ + ReadyInstQueue readyInsts[Num_OpClasses]; + + /** List of non-speculative instructions that will be scheduled + * once the IQ gets a signal from commit. While it's redundant to + * have the key be a part of the value (the sequence number is stored + * inside of DynInst), when these instructions are woken up only + * the sequence number will be available. Thus it is most efficient to be + * able to search by the sequence number alone. + * @todo: Figure out if we really need this. + */ + std::map nonSpecInsts; + + typedef typename std::map::iterator NonSpecMapIt; + + /** Entry for the list age ordering by op class. */ + struct ListOrderEntry { + OpClass queueType; + InstSeqNum oldestInst; + }; + + /** List that contains the age order of the oldest instruction of each + * ready queue. Used to select the oldest instruction available + * among op classes. + * @todo: Might be better to just move these entries around instead + * of creating new ones every time the position changes due to an + * instruction issuing. Not sure std::list supports this. + */ + std::list listOrder; + + typedef typename std::list::iterator ListOrderIt; + + /** Tracks if each ready queue is on the age order list. */ + bool queueOnList[Num_OpClasses]; + + /** Iterators of each ready queue. Points to their spot in the age order + * list. + */ + ListOrderIt readyIt[Num_OpClasses]; + + /** Add an op class to the age order list. */ + void addToOrderList(OpClass op_class); + + /** + * Called when the oldest instruction has been removed from a ready queue; + * this places that ready queue into the proper spot in the age order list. + */ + void moveToYoungerInst(ListOrderIt age_order_it); + + /** Register dependence graph to maintain register forwarding between + * inst blocks. + */ + DependencyGraph regDepGraph; + + ////////////////////////////////////// + // Various parameters + ////////////////////////////////////// + + /** IQ Resource Sharing Policy */ + enum IQPolicy { + Dynamic, + Partitioned, + Threshold + }; + + /** IQ sharing policy for SMT. */ + IQPolicy iqPolicy; + + /** Number of Total Threads*/ + ThreadID numThreads; + + /** Pointer to list of active threads. */ + std::list *activeThreads; + + /** Per Thread IQ count */ + unsigned count[Impl::MaxThreads]; + + /** Max IQ Entries Per Thread */ + unsigned maxEntries[Impl::MaxThreads]; + + /** Number of free IQ entries left. */ + unsigned freeEntries; + + /** The number of entries in the instruction queue. */ + unsigned numEntries; + + /** The total number of instructions that can be issued in one cycle. */ + unsigned totalWidth; + + /** The number of physical registers in the CPU. */ + unsigned numPhysRegs; + + /** The number of physical integer registers in the CPU. */ + unsigned numPhysIntRegs; + + /** The number of floating point registers in the CPU. */ + unsigned numPhysFloatRegs; + + /** Delay between commit stage and the IQ. + * @todo: Make there be a distinction between the delays within execution. + */ + unsigned commitToExecuteDelay; + + /** Is the IQ switched out. */ + bool switchedOut; + + /** Last fetch store will be marked using the LSID. */ + TheISA::LsID lastFetchedStore; + + /** The sequence number of the squashed instruction. */ + InstSeqNum squashedSeqNum[Impl::MaxThreads]; + + /** A cache of the recently woken registers. It is 1 if the register + * has been woken up recently, and 0 if the register has been added + * to the dependency graph and has not yet received its value. + * In EDGE model, register will be modified by special register read/write + * insts. This will happen when inst blocks start executing (for read) + * or end executing( for write). Scoreboard will mark the register as + * ready or not after inst blocks committed. + */ + std::vector regScoreboard; + + /** Adds an instruction to the dependency graph, as a consumer. + * @todo: Is this neccesary? + */ + bool addToDependents(DynInstPtr &new_inst); + + /** Adds an instruction to the dependency graph, as a producer. + * @todo: Is this neccesary? + */ + void addToProducers(DynInstPtr &new_inst); + + /** Moves an instruction to the ready queue if it is ready. */ + //void addIfReady(DynInstPtr &inst); + + /** Debugging function to count how many entries are in the IQ. It does + * a linear walk through the instructions, so do not call this function + * during normal execution. + */ + int countInsts(); + + /** Debugging function to dump all the list sizes, as well as print + * out the list of nonspeculative instructions. Should not be used + * in any other capacity, but it has no harmful sideaffects. + */ + void dumpLists(); + + /** Debugging function to dump out all instructions that are in the + * IQ. + */ + void dumpInsts(); + + + // @todo: Tidy up these statistics. + + /** Stat for number of instructions added. */ + Stats::Scalar iqInstsAdded; + + Stats::Scalar iqInstsIssued; + /** Stat for number of integer instructions issued. */ + Stats::Scalar iqIntInstsIssued; + /** Stat for number of floating point instructions issued. */ + Stats::Scalar iqFloatInstsIssued; + /** Stat for number of branch instructions issued. */ + Stats::Scalar iqBranchInstsIssued; + /** Stat for number of memory instructions issued. */ + Stats::Scalar iqMemInstsIssued; + /** Stat for number of miscellaneous instructions issued. */ + Stats::Scalar iqMiscInstsIssued; + /** Stat for number of squashed instructions that were ready to issue. */ + Stats::Scalar iqDummyInstsIssued; + /** Stat for number of squashed instructions examined when squashing. */ + Stats::Scalar iqInstsSquashed; + /** Stat for number of reg-reads dependent upon unexecuted reg-writes. */ + Stats::Scalar iqRegReadDep; + /** Stat for number of reg-writes writed back. */ + Stats::Scalar iqRegWriteWrited; + /** Stat for number of operands propagated. */ + Stats::Scalar iqOperandsPropagated; + /** Stat for number of nullified token propagated. */ + Stats::Scalar iqNullTokenPropagated; + /** Stat for number of exeception token propagated. */ + Stats::Scalar iqExceptTokenPropagated; + /** Stat for number of predication propagated. */ + Stats::Scalar iqPredicationPropagated; + + // Also include number of instructions rescheduled and replayed. + + /** Distribution of the number of instructions issued. */ + Stats::Distribution numIssuedDist; + + /** Number of times an instruction could not be issued because a + * FU was busy. + */ + Stats::Vector statFuBusy; + + /** Stat for total number issued for each instruction type. */ + Stats::Vector2d statIssuedInstType; + + /** Number of instructions issued per cycle. */ + Stats::Formula issueRate; + + /** Number of times the FU was busy. */ + Stats::Vector fuBusy; + /** Number of times the FU was busy per instruction issued. */ + Stats::Formula fuBusyRate; +}; + +#endif //__CPU_EDGE_INST_QUEUE_HH__ diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/inst_queue.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/inst_queue.cc Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#include "cpu/edge/isa_specific.hh" +#include "cpu/edge/inst_queue_impl.hh" + +// Force instantiation of InstructionQueue. +template class InstructionQueue; diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/inst_queue_impl.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/inst_queue_impl.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,1577 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#include +#include + +#include "cpu/edge/fu_pool.hh" +#include "cpu/edge/inst_queue.hh" +#include "enums/OpClass.hh" +#include "params/SimpleEdgeCPU.hh" +#include "sim/core.hh" +#include "base/bitfield.hh" + +using namespace std; + +template +InstructionQueue::FUCompletion::FUCompletion(DynInstPtr &_inst, + int fu_idx, InstructionQueue *iq_ptr) + : Event(Stat_Event_Pri), inst(_inst), fuIdx(fu_idx), iqPtr(iq_ptr), + freeFU(false) +{ + this->setFlags(Event::AutoDelete); +} + +template +void +InstructionQueue::FUCompletion::process() +{ + iqPtr->processFUCompletion(inst, freeFU ? fuIdx : -1); + inst = NULL; +} + + +template +const char * +InstructionQueue::FUCompletion::description() const +{ + return "Functional unit completion"; +} + +template +InstructionQueue::InstructionQueue(CPU *cpu_ptr, Execute* execute, SimpleEdgeCPUParams *params) + : cpu(cpu_ptr), + executeStage(execute), + fuPool(params->fuPool), + numEntries(params->numIQEntries * ( Impl::MaxFrameNum )), + totalWidth(params->issueWidth), + numPhysIntRegs(params->numPhysIntRegs), + numPhysFloatRegs(params->numPhysFloatRegs), + commitToExecuteDelay(params->commitToExecuteDelay) +{ + + assert(fuPool); + + switchedOut = false; + + numThreads = params->numThreads; + + // Set the number of physical registers as the number of int + float + numPhysRegs = numPhysIntRegs + numPhysFloatRegs; + + // Set register dependence graph + regDepGraph.resize(numPhysRegs); + regDepGraph.setInstQueuePtr(this); + + //Initialize Mem Dependence Units + for (ThreadID tid = 0; tid < numThreads; tid++) { + memDepUnit[tid].init(params, tid); + memDepUnit[tid].setIQ(this); + } + + resetState(); + + std::string policy = params->smtIQPolicy; + + //Convert string to lowercase + std::transform(policy.begin(), policy.end(), policy.begin(), + (int(*)(int)) tolower); + + //Figure out resource sharing policy + if (policy == "dynamic") { + iqPolicy = Dynamic; + + //Set Max Entries to Total ROB Capacity + for (ThreadID tid = 0; tid < numThreads; tid++) { + maxEntries[tid] = numEntries; + } + + } else if (policy == "partitioned") { + iqPolicy = Partitioned; + + //@todo:make work if part_amt doesnt divide evenly. + int part_amt = numEntries / numThreads; + + //Divide ROB up evenly + for (ThreadID tid = 0; tid < numThreads; tid++) { + maxEntries[tid] = part_amt; + } + + DPRINTF(EdgeIQ, "EdgeIQ sharing policy set to Partitioned:" + "%i entries per thread.\n",part_amt); + } else if (policy == "threshold") { + iqPolicy = Threshold; + + double threshold = (double)params->smtIQThreshold / 100; + + int thresholdIQ = (int)((double)threshold * numEntries); + + //Divide up by threshold amount + for (ThreadID tid = 0; tid < numThreads; tid++) { + maxEntries[tid] = thresholdIQ; + } + + DPRINTF(EdgeIQ, "EdgeIQ sharing policy set to Threshold:" + "%i entries per thread.\n",thresholdIQ); + } else { + assert(0 && "Invalid EdgeIQ Sharing Policy.Options Are:{Dynamic," + "Partitioned, Threshold}"); + } + + #if 0 + for (ThreadID tid = 0; tid < numThreads; tid++) { + maxEntries[tid] = (int)((Impl::FrameNum)/numThreads)*(Impl::SlotNum); + } + #endif + + //lastFetchedStore = 0; + +} + +template +InstructionQueue::~InstructionQueue() +{ + regDepGraph.reset(); +#ifdef DEBUG + cprintf("Nodes traversed: %i, removed: %i\n", + regDepGraph.nodesTraversed, regDepGraph.nodesRemoved); +#endif +} + +template +std::string +InstructionQueue::name() const +{ + return cpu->name() + ".iq"; +} + +template +void +InstructionQueue::regStats() +{ + using namespace Stats; + iqInstsAdded + .name(name() + ".iqInstsAdded") + .desc("Number of instructions added to the IQ ") + .prereq(iqInstsAdded); + + iqInstsIssued + .name(name() + ".iqInstsIssued") + .desc("Number of instructions issued") + .prereq(iqInstsIssued); + + iqIntInstsIssued + .name(name() + ".iqIntInstsIssued") + .desc("Number of integer instructions issued") + .prereq(iqIntInstsIssued); + + iqFloatInstsIssued + .name(name() + ".iqFloatInstsIssued") + .desc("Number of float instructions issued") + .prereq(iqFloatInstsIssued); + + iqBranchInstsIssued + .name(name() + ".iqBranchInstsIssued") + .desc("Number of branch instructions issued") + .prereq(iqBranchInstsIssued); + + iqMemInstsIssued + .name(name() + ".iqMemInstsIssued") + .desc("Number of memory instructions issued") + .prereq(iqMemInstsIssued); + + iqMiscInstsIssued + .name(name() + ".iqMiscInstsIssued") + .desc("Number of miscellaneous instructions issued") + .prereq(iqMiscInstsIssued); + + iqDummyInstsIssued + .name(name() + ".iqDummyInstsIssued") + .desc("Number of squashed or block competed instructions issued") + .prereq(iqDummyInstsIssued); + + iqInstsSquashed + .name(name() + ".iqInstsSquashed") + .desc("Number of squashed instructions.") + .prereq(iqInstsSquashed); + + iqRegReadDep + .name(name() + ".iqRegReadDep") + .desc("Number of reg-reads dependent upon unexecuted reg-writes.") + .prereq(iqRegReadDep); + + iqRegWriteWrited + .name(name() + ".iqRegWriteWrited") + .desc("Number of reg-writes really writed back.") + .prereq(iqRegWriteWrited); + + iqOperandsPropagated + .name(name() + ".iqOperandsPropagated") + .desc("Number of operands propagated.") + .prereq(iqOperandsPropagated); + + iqNullTokenPropagated + .name(name() + ".iqNullTokenPropagated") + .desc("Number of null token propagated.") + .prereq(iqNullTokenPropagated); + + iqExceptTokenPropagated + .name(name() + ".iqExceptTokenPropagated") + .desc("Number of except token propagated.") + .prereq(iqExceptTokenPropagated); + + iqPredicationPropagated + .name(name() + ".iqPredicationPropagated") + .desc("Number of predication propagated.") + .prereq(iqPredicationPropagated); + + numIssuedDist + .init(0,totalWidth,1) + .name(name() + ".ISSUE:issued_per_cycle") + .desc("Number of insts issued each cycle") + .flags(pdf) + ; + + statIssuedInstType + .init(numThreads,Enums::Num_OpClass) + .name(name() + ".ISSUE:FU_type") + .desc("Type of FU issued") + .flags(total | pdf | dist) + ; + statIssuedInstType.ysubnames(Enums::OpClassStrings); + + issueRate + .name(name() + ".ISSUE:rate") + .desc("Inst issue rate") + .flags(total) + ; + issueRate = iqInstsIssued / cpu->numCycles; + + statFuBusy + .init(Num_OpClasses) + .name(name() + ".ISSUE:fu_full") + .desc("attempts to use FU when none available") + .flags(pdf | dist) + ; + for (int i=0; i < Num_OpClasses; ++i) { + statFuBusy.subname(i, Enums::OpClassStrings[i]); + } + + fuBusy + .init(numThreads) + .name(name() + ".ISSUE:fu_busy_cnt") + .desc("FU busy when requested") + .flags(total) + ; + + fuBusyRate + .name(name() + ".ISSUE:fu_busy_rate") + .desc("FU busy rate (busy events/executed inst)") + .flags(total) + ; + fuBusyRate = fuBusy / iqInstsIssued; + + for (ThreadID tid = 0; tid < numThreads; tid++) { + // Tell mem dependence unit to reg stats as well. + memDepUnit[tid].regStats(); + } +} + +template +void +InstructionQueue::resetState() +{ + //Initialize thread IQ counts + for (ThreadID tid = 0; tid +void +InstructionQueue::setActiveThreads(list *at_ptr) +{ + activeThreads = at_ptr; +} + +template +void +InstructionQueue::setIssueToExecuteQueue(TimeBuffer *i2e_ptr) +{ + issueToExecuteQueue = i2e_ptr; +} + +template +void +InstructionQueue::setTimeBuffer(TimeBuffer *tb_ptr) +{ + timeBuffer = tb_ptr; + + fromCommit = timeBuffer->getWire(-commitToExecuteDelay); +} + +template +void +InstructionQueue::switchOut() +{ + for ( int i = 0; i < Impl::MaxFrameNum; i ++ ) { + if (!instQueue[0][i].empty() || (numEntries != freeEntries) || + !readyInsts[0].empty() || !nonSpecInsts.empty() || !listOrder.empty()) { + dumpInsts(); + assert(0); + } + } + resetState(); + regDepGraph.reset(); + instsToExecute.clear(); + switchedOut = true; + for (ThreadID tid = 0; tid < numThreads; ++tid) { + memDepUnit[tid].switchOut(); + } +} + +template +void +InstructionQueue::takeOverFrom() +{ + switchedOut = false; +} + +template +int +InstructionQueue::entryAmount(ThreadID num_threads) +{ + if (iqPolicy == Partitioned) { + return numEntries / num_threads; + } else { + return 0; + } +} + + +template +void +InstructionQueue::resetEntries() +{ + if (iqPolicy != Dynamic || numThreads > 1) { + int active_threads = activeThreads->size(); + + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + if (iqPolicy == Partitioned) { + maxEntries[tid] = numEntries / active_threads; + } else if(iqPolicy == Threshold && active_threads == 1) { + maxEntries[tid] = numEntries; + } + } + } +} + +template +unsigned +InstructionQueue::numFreeEntries() +{ + return freeEntries; +} + +template +unsigned +InstructionQueue::numFreeEntries(ThreadID tid) +{ + return maxEntries[tid] - count[tid]; +} + +// Might want to do something more complex if it knows how many instructions +// will be issued this cycle. +template +bool +InstructionQueue::isFull() +{ + if (freeEntries == 0) { + return(true); + } else { + return(false); + } +} + +template +bool +InstructionQueue::isFull(ThreadID tid) +{ + if (numFreeEntries(tid) == 0) { + return(true); + } else { + return(false); + } +} + +template +bool +InstructionQueue::hasReadyInsts() +{ + if (!listOrder.empty()) { + return true; + } + + for (int i = 0; i < Num_OpClasses; ++i) { + if (!readyInsts[i].empty()) { + return true; + } + } + + return false; +} + +template +void +InstructionQueue::insert(DynInstPtr &new_inst) +{ + // Make sure the instruction is valid + assert(new_inst); + + ThreadID tid = new_inst->threadNumber; + int frame_id = new_inst->getFrameID(); + + DPRINTF(EdgeIQTest, "Adding instruction [Bid:%lli][Iid:%lli] @PC %#x to IQ[tid:%i][Frame:%i]." + " Inst has %i operands\n", + new_inst->getBlockID(), new_inst->getInstID(), + new_inst->readPC(),tid, frame_id, + new_inst->getNumOperands()); + + assert(freeEntries != 0); + + if ( instQueue[tid][frame_id].size() != 0 ) { + assert(instQueue[tid][frame_id].front()->getBlockID() == + new_inst->getBlockID()); + DPRINTF(EdgeIQTest,"%lli -- %lli, size %i\n", + instQueue[tid][frame_id].front()->getBlockID(), + new_inst->getBlockID(), + instQueue[tid][frame_id].size()); + } + + instQueue[tid][frame_id].push_back(new_inst); + + // Set the queue iterator for this inst + new_inst->setInstQueueIt(--instQueue[tid][frame_id].end()); + + // For reg-write insts, add it to writeList for write-back when + // this block commits. + if ( new_inst->isGRegWR() && new_inst->isGRegWriteValid() ) { + + DPRINTF(EdgeIQ, "Reg Write inst encountered, add it to write list\n"); + + writeList[tid].push_back(new_inst->getInstQueueIt()); + + // Add reg-write inst to register dep graph as producer. + addToProducers(new_inst); + } + + if ( new_inst->isGRegWR() && new_inst->isGRegReadValid() ) { + + DPRINTF(EdgeIQ, "Reg Read inst encountered, mark it as ready.\n" ); + + // Set reg-read inst to reg dep graph as dependents. + addToDependents(new_inst); + + } else if ( new_inst->getNumOperands() == 0 && + (!new_inst->isNop()) && + (new_inst->staticInst->getPredication() == TheISA::Disable || + new_inst->staticInst->getPredication() == TheISA::Reserved)) { + + DPRINTF(EdgeIQ, "Inst need no operand, mark it as ready.\n"); + + new_inst->setCanIssue(); + + } + + --freeEntries; + + new_inst->setInIQ(); + + if (new_inst->isMemRef()) { + + DPRINTF(EdgeIQ, "Mem ref inst encounted, dependent will be built later.\n" ); + + } else { + // Check to see if this inst can issue. + addIfReady(new_inst); + + } + + ++iqInstsAdded; + + count[new_inst->threadNumber]++; + + assert(freeEntries == (numEntries - countInsts())); + +} + +template +void +InstructionQueue::buildMemDependent(DynInstPtr &mem_inst) +{ + assert(mem_inst->isMemRef() ); +#if 0 + if(mem_inst->staticInst->getLSID()==0){ + lastFetchedStore = 0; + } +#endif + memDepUnit[mem_inst->threadNumber].insert(mem_inst); +#if 0 + if(mem_inst ->isStore()) { + lastFetchedStore = mem_inst ->staticInst->getLSID(); + } +#endif +} + +template +void +InstructionQueue::insertNonSpec(DynInstPtr &new_inst) +{ + // @todo:There's no non speculative insts right now ... + panic("Unimplemented func: insertNonSpec(). \n"); +#if 0 + // @todo: Clean up this code; can do it by setting inst as unable + // to issue, then calling normal insert on the inst. + + assert(new_inst); + + nonSpecInsts[new_inst->seqNum] = new_inst; + + DPRINTF(EdgeIQ, "Adding non-speculative instruction [sn:%lli] PC %#x " + "to the IQ.\n", + new_inst->seqNum, new_inst->readPC()); + + assert(freeEntries != 0); + + instList[new_inst->threadNumber].push_back(new_inst); + + --freeEntries; + + new_inst->setInIQ(); + + // Have this instruction set itself as the producer of its destination + // register(s). + addToProducers(new_inst); +#if 0 + // If it's a memory instruction, add it to the memory dependency + // unit. + if (new_inst->isMemRef()) { + memDepUnit[new_inst->threadNumber].insertNonSpec(new_inst); + } +#endif + ++iqNonSpecInstsAdded; + + count[new_inst->threadNumber]++; + + assert(freeEntries == (numEntries - countInsts())); +#endif +} + +template +void +InstructionQueue::insertBarrier(DynInstPtr &barr_inst) +{ + // @todo:Don't know how to implemented barrier right now. + panic("Unimplemented func: insertBarrier. \n"); +#if 0 + memDepUnit[barr_inst->threadNumber].insertBarrier(barr_inst); + insertNonSpec(barr_inst); +#endif +} + +template +typename Impl::DynInstPtr +InstructionQueue::getInstToExecute() +{ + assert(!instsToExecute.empty()); + DynInstPtr inst = instsToExecute.front(); + instsToExecute.pop_front(); + return inst; +} + +template +void +InstructionQueue::addToOrderList(OpClass op_class) +{ + assert(!readyInsts[op_class].empty()); + + ListOrderEntry queue_entry; + + queue_entry.queueType = op_class; + + queue_entry.oldestInst = readyInsts[op_class].top()->seqNum; + + ListOrderIt list_it = listOrder.begin(); + ListOrderIt list_end_it = listOrder.end(); + + while (list_it != list_end_it) { + if ((*list_it).oldestInst > queue_entry.oldestInst) { + break; + } + + list_it++; + } + + readyIt[op_class] = listOrder.insert(list_it, queue_entry); + queueOnList[op_class] = true; +} + +template +void +InstructionQueue::moveToYoungerInst(ListOrderIt list_order_it) +{ + // Get iterator of next item on the list + // Delete the original iterator + // Determine if the next item is either the end of the list or younger + // than the new instruction. If so, then add in a new iterator right here. + // If not, then move along. + ListOrderEntry queue_entry; + OpClass op_class = (*list_order_it).queueType; + ListOrderIt next_it = list_order_it; + + ++next_it; + + queue_entry.queueType = op_class; + queue_entry.oldestInst = readyInsts[op_class].top()->seqNum; + + while (next_it != listOrder.end() && + (*next_it).oldestInst < queue_entry.oldestInst) { + ++next_it; + } + + readyIt[op_class] = listOrder.insert(next_it, queue_entry); +} + +template +void +InstructionQueue::processFUCompletion(DynInstPtr &inst, int fu_idx) +{ + DPRINTF(EdgeIQ, "Processing FU completion [sn:%lli]\n", inst->seqNum); + // The CPU could have been sleeping until this op completed (*extremely* + // long latency op). Wake it if it was. This may be overkill. + + // If block of some insts have completed, we should just return. + if (isSwitchedOut()||inst->isBlockCompleted()) { + + DPRINTF(EdgeIQ, "FU completion not processed, " + "IQ is switched out or block has completed [sn:%lli]\n", + inst->seqNum); + + // If block of this inst completed and there's a fu-unit used, free it + if ( inst->isBlockCompleted() && fu_idx > -1 ) { + fuPool->freeUnitNextCycle(fu_idx); + } + + return; + } + + executeStage->wakeCPU(); + + if (fu_idx > -1) + fuPool->freeUnitNextCycle(fu_idx); + + // @todo: Ensure that these FU Completions happen at the beginning + // of a cycle, otherwise they could add too many instructions to + // the queue. + issueToExecuteQueue->access(-1)->size++; + instsToExecute.push_back(inst); +} + +// @todo: Figure out a better way to remove the squashed items from the +// lists. Checking the top item of each list to see if it's squashed +// wastes time and forces jumps. +template +void +InstructionQueue::scheduleReadyInsts() +{ + DPRINTF(EdgeIQ, "Attempting to schedule ready instructions from " + "the IQ.\n"); + + Issue2Execute *i2e_info = issueToExecuteQueue->access(0); + + // Have iterator to head of the list + // While I haven't exceeded bandwidth or reached the end of the list, + // Try to get a FU that can do what this op needs. + // If successful, change the oldestInst to the new top of the list, put + // the queue in the proper place in the list. + // Increment the iterator. + // This will avoid trying to schedule a certain op class if there are no + // FUs that handle it. + ListOrderIt order_it = listOrder.begin(); + ListOrderIt order_end_it = listOrder.end(); + int total_issued = 0; + + while (total_issued < totalWidth && + executeStage->canIssue() && + order_it != order_end_it) { + OpClass op_class = (*order_it).queueType; + + assert(!readyInsts[op_class].empty()); + + DynInstPtr issuing_inst = readyInsts[op_class].top(); + + assert(issuing_inst->seqNum == (*order_it).oldestInst); + + if (issuing_inst->isSquashed() || issuing_inst->isBlockCompleted() ) { + + DPRINTF(EdgeIQ, "Inst[Bid:%lli][Iid:%lli] squashed or block completed, can't issue!\n", + issuing_inst->getBlockID(), issuing_inst->getInstID()); + + readyInsts[op_class].pop(); + + if (!readyInsts[op_class].empty()) { + moveToYoungerInst(order_it); + } else { + readyIt[op_class] = listOrder.end(); + queueOnList[op_class] = false; + } + + listOrder.erase(order_it++); + + ++iqDummyInstsIssued; + + continue; + } + + int idx = -2; + int op_latency = 1; + ThreadID tid = issuing_inst->threadNumber; + + if (op_class != No_OpClass) { + idx = fuPool->getUnit(op_class); + + if (idx > -1) { + op_latency = fuPool->getOpLatency(op_class); + } + } + + // If we have an instruction that doesn't require a FU, or a + // valid FU, then schedule for execution. + if (idx == -2 || idx != -1) { + if (op_latency == 1) { + i2e_info->size++; + instsToExecute.push_back(issuing_inst); + + // Add the FU onto the list of FU's to be freed next + // cycle if we used one. + if (idx >= 0) + fuPool->freeUnitNextCycle(idx); + } else { + int issue_latency = fuPool->getIssueLatency(op_class); + // Generate completion event for the FU + FUCompletion *execution = new FUCompletion(issuing_inst, + idx, this); + + cpu->schedule(execution, curTick + cpu->ticks(op_latency - 1)); + + // @todo: Enforce that issue_latency == 1 or op_latency + if (issue_latency > 1) { + // If FU isn't pipelined, then it must be freed + // upon the execution completing. + execution->setFreeFU(); + } else { + // Add the FU onto the list of FU's to be freed next cycle. + fuPool->freeUnitNextCycle(idx); + } + } + + DPRINTF(EdgeIQ, "Thread %i: Issuing instruction PC %#x " + "[sn:%lli]\n", + tid, issuing_inst->readPC(), + issuing_inst->seqNum); + + readyInsts[op_class].pop(); + + if (!readyInsts[op_class].empty()) { + moveToYoungerInst(order_it); + } else { + readyIt[op_class] = listOrder.end(); + queueOnList[op_class] = false; + } + + issuing_inst->setIssued(); + ++total_issued; + + if (!issuing_inst->isMemRef()) { + DPRINTF(EdgeIQ, "Issuing none mem insts.\n"); + // In Edge architecture, entry of inst queue for one inst block will be freed + // once the inst block has been committed. + #if 0 + // Memory instructions can not be freed from the IQ until they + // complete. + ++freeEntries; + count[tid]--; + issuing_inst->clearInIQ(); + #endif + + } else { + DPRINTF(EdgeIQ, "Issuing mem insts.\n"); + memDepUnit[tid].issue(issuing_inst); + } + + listOrder.erase(order_it++); + statIssuedInstType[tid][op_class]++; + + // Only reg-write need to mark the wboutstanding variable. + if ( issuing_inst->isWriteRegIssuing() ) { + DPRINTF(EdgeIQ, "Issuing reg-write inst.\n"); + issuing_inst->setWriteRegIssued(); + executeStage->incrWb(issuing_inst->seqNum); + } else if ( issuing_inst->isReadRegIssuing() ) { + DPRINTF(EdgeIQ, "Issuing reg-read inst.\n"); + issuing_inst->setReadRegIssued(); + } + + } else { + statFuBusy[op_class]++; + fuBusy[tid]++; + ++order_it; + } + } + + numIssuedDist.sample(total_issued); + iqInstsIssued+= total_issued; + + // If we issued any instructions, tell the CPU we had activity. + if (total_issued) { + cpu->activityThisCycle(); + } else { + DPRINTF(EdgeIQ, "Not able to schedule any instructions.\n"); + } +} + +template +void +InstructionQueue::scheduleNonSpec(const InstSeqNum &inst) +{ + DPRINTF(EdgeIQ, "Marking nonspeculative instruction [sn:%lli] as ready " + "to execute.\n", inst); + + NonSpecMapIt inst_it = nonSpecInsts.find(inst); + + assert(inst_it != nonSpecInsts.end()); + + //ThreadID tid = (*inst_it).second->threadNumber; + + (*inst_it).second->setAtCommit(); + + (*inst_it).second->setCanIssue(); + + if (!(*inst_it).second->isMemRef()) { + addIfReady((*inst_it).second); + } else { + #if 0 + memDepUnit[tid].nonSpecInstReady((*inst_it).second); + #endif + } + + (*inst_it).second = NULL; + + nonSpecInsts.erase(inst_it); +} + +template +void +InstructionQueue::commit(const TheISA::BlockID &commit_bid, ThreadID tid) +{ + DPRINTF(EdgeIQ, "[tid:%i]: Committing inst block older than [Bid:%i]\n", + tid,commit_bid); + + int commit_count = 0; + + for ( int i = 0; i < Impl::MaxFrameNum; i ++ ) { + if ( !instQueue[tid][i].empty() && + instQueue[tid][i].front()->getBlockID() <= commit_bid ) { + + DPRINTF(EdgeIQ, "Inst in frame[%i] with Bid[%lli] committed.\n", + i, instQueue[tid][i].front()->getBlockID()); + + commit_count += instQueue[tid][i].size(); + freeEntries += commit_count; + count[tid] -= commit_count; + instQueue[tid][i].clear(); + } + } + + assert(freeEntries == (numEntries - countInsts())); +} + +template +void +InstructionQueue::complete(BlockPtr & inst_block) +{ + + int frame_id = inst_block->getFrameID(); + ThreadID tid = inst_block->getTid(); + + // We need to mark all the insts in inst queue as block completed + // to avoid scheduling inst when block completion condition has been + // satisfied. Here we should only mark inst having exactly the + // wanted block id to be compeleted. + DPRINTF(EdgeIQ, "[tid:%i]: Completing inst block [Bid:%i][Frame:%i]." + " Mark insts of this block as block completed. \n", + tid, + inst_block->getBlockID(), + frame_id); + + QueueIt iq_it = instQueue[tid][frame_id].begin(); + + while (iq_it != instQueue[tid][frame_id].end()) { + + assert((*iq_it)->getBlockID() == inst_block->getBlockID()); + + (*iq_it)->setBlockCompleted(); + ++iq_it; + + } + +} + +template +void +InstructionQueue::completeInRegDepGraph(DynInstPtr & inst) +{ + assert(inst->isGRegWR() && inst->isGRegWriteValid()); + regDepGraph.complete(inst->getMappedDestReg(0), inst); +} + +template +void +InstructionQueue::writeBack(const TheISA::BlockID &inst_block, ThreadID tid) +{ + // All the writes having an smaller or equal block id to inst_block + // should be writing back. + DPRINTF(EdgeIQ, "[tid:%i]: Write back inst block older than [Bid:%i]\n", + tid, inst_block); + + typename std::list::iterator list_it = writeList[tid].begin(); + + while( list_it != writeList[tid].end() ){ + + QueueIt it = *list_it; + assert((*it)->isGRegWR() && (*it)->isGRegWriteValid() ); + if ( (*it)->getBlockID() <= inst_block && + (*it)->isWriteRegExecuted() ){ + + if ( !(*it)->isNullified() ) { + + // Actually, a flag named WriteRegExecuted has + // been set while this write inst is proccessed + // in execute stage. ExecutingRegWrite flag is + // set here just for the execute() func to distinguish + // write from read. + (*it)->setExecutingWriteReg(); + // This will write register. + (*it)->execute(); + + // Only inst that hasn't been nullified will commit in reg dep graph. + // Nullified inst should be committed when it was nullified. + regDepGraph.commit((*it)->getMappedDestReg(0),(*it)); + + iqRegWriteWrited ++; + } else { + DPRINTF(EdgeIQ, "Nullified reg-write inst[Bid:%lli][Iid:%lli]\n", + (*it)->getBlockID(), (*it)->getInstID() ); + } + + //@todo: Executed flag will be set in execute stage. + //(*it)->setExecuted(); + executeStage->decrWb((*it)->seqNum ); + list_it = writeList[tid].erase(list_it); + + continue; + } + ++ list_it; + } +} + +template +int +InstructionQueue::wakeDependents(DynInstPtr &completed_inst) +{ + int dependents = 0; + + DPRINTF(EdgeIQ, "Waking dependents of completed instruction[Bid:%lli][Iid:%lli].\n", + completed_inst->getBlockID(), completed_inst->getInstID() ); + + // Dump exec trace + if ( completed_inst->traceData ) { + + completed_inst->traceData->dump(); + delete completed_inst->traceData; + completed_inst->traceData = NULL; + + } + + assert(!completed_inst->isSquashed()); + + // Reg-write inst will never reach here because it + // has no consumers. + // Write class may has one consumer because of read, so + // we have to return it here. + // Store has no consumer, so wakeDependents will not + // wake any consumer even we don't check it here. + //if ( completed_inst->isExecutingWriteReg() ) { + + //DPRINTF(EdgeIQ, "Executing reg-write, no consumers will be waken up.\n"); + //return dependents; + + //} + + ThreadID tid = completed_inst->threadNumber; + int frame_id = completed_inst->getFrameID(); + + // Tell the memory dependence unit to wake any dependents on this + // instruction if it is a memory instruction. Also complete the memory + // instruction at this point since we know it executed without issues. + // @todo: Might want to rename "completeMemInst" to something + // indicating that it won't need to be replayed, and call this + // earlier. Might not be a big deal. + + // No matter whether the mem insts have been nullified or not, + // they should always wake up dependence in memDepUnit. + if (completed_inst->isMemRef()) { + memDepUnit[tid].wakeDependents(completed_inst); + completeMemInst(completed_inst); + } + + DynInstPtr consumer; + OpSize result = 0; + + for (int consumer_idx = 0; + consumer_idx < completed_inst->getNumConsumers(); + consumer_idx ++) + { + + uint8_t type = + completed_inst->getConsumerType ( consumer_idx ); + + DPRINTF(EdgeIQ, "Waking any dependents on consumer[idx:%i][type:%i][id:%i]" + "in frame[%i].\n", + (int) consumer_idx, + (int)completed_inst->getConsumerType(consumer_idx), + (int)completed_inst->getConsumerID(consumer_idx), + frame_id + ); + + if ( type == TheISA::WriteSlotOrNoTarget ) { + + uint8_t sub_type = + completed_inst->getConsumerSubType ( consumer_idx ); + + if ( sub_type == TheISA::WriteSlot ) { + + // Wakeup write inst + consumer = instQueue[tid][frame_id] + [completed_inst->getConsumerID ( consumer_idx )]; + + assert(consumer->isGRegWR() && + consumer->isGRegWriteValid()); + + // consumer->setExecutingWriteReg(); + // Operand of write-reg inst is defaulted to 0 + consumer->setIntOperand(0, + completed_inst->getIntResult() ); + // Mark inst as receiving a operand + consumer->markOperandReady(); + + // If this reg-write inst is ready to issue, + // set the status. + if ( consumer->readyToIssue() ) { + consumer->setWriteRegIssuing(); + } + + } else { + // No target, continue. + continue; + } + } else if ( type == TheISA::PredSlot ) { + + // Wakeup insts. + consumer = instQueue[tid][frame_id][TheISA::ChunkSizeInWords + + completed_inst->getConsumerID( consumer_idx )]; + + if ( completed_inst->getIntResult() == 0) { + consumer->setPredStatus ( TheISA::PredFalse ); + } else { + consumer->setPredStatus ( TheISA::PredTrue ); + } + + // Mark inst as receiving a pred + consumer->markPredReady(); + + iqPredicationPropagated ++; + + } else { + + assert( type == TheISA::Operand0 || type == TheISA::Operand1 ); + + result = completed_inst->getIntResult(); + consumer = instQueue[tid][frame_id][TheISA::ChunkSizeInWords + + completed_inst->getConsumerID( consumer_idx ) ]; + + if ( type == TheISA::Operand0 ){ + consumer->setIntOperand(0, result ); + } else if ( type == TheISA::Operand1 ){ + consumer->setIntOperand(1, result ); + } else { + panic("Unrecogonized operand number\n"); + } + + // Mark inst as receiving a operand + consumer->markOperandReady(); + + + } + + iqOperandsPropagated ++; + + DPRINTF(EdgeIQ, "Consumer[Bid:%lli][Iid:%lli].\n", + consumer->getBlockID(), consumer->getInstID()); + + // Producer and consumer should be in the same inst block. + assert(completed_inst->getBlockID() == consumer->getBlockID() ); + + // If the data flow token type of this producer is Nullification, + // set the consumer to be nullified. Propagating the token. + if ( completed_inst->getDataflowTokenType() + == TheISA::Nullification ) { + + DPRINTF( EdgeIQ, "Nullify consumer[Bid:%lli][Iid:%lli] @PC %lli", + consumer->getBlockID(), + consumer->getInstID(), + consumer->readPC() ); + + assert(type != TheISA::PredSlot); + + consumer->setNullified(); + consumer->setDataflowTokenType(TheISA::Nullification); + + iqNullTokenPropagated ++; + + } else if ( completed_inst->getDataflowTokenType() + == TheISA::Exception ) { + + Fault fault = completed_inst->getFault(); + + assert(fault!=NoFault); + + DPRINTF(EdgeIQ, "Propagate fault %s from" + " producer[Bid%lli][Iid%lli] to consumer[Bid%lli][Iid%lli].\n", + fault->name(), completed_inst->getBlockID(), + completed_inst->getInstID(), + consumer->getBlockID(), consumer->getInstID() ); + + consumer->fault = fault; + //Propagate the fault + consumer->setDataflowTokenType ( TheISA::Exception ); + + iqExceptTokenPropagated ++; + } + + DPRINTF(EdgeIQ, "Inst ID of consumer inst is %lli " + "while Consumer ID of complete inst is %lli\n", + consumer->getInstID(), + completed_inst->getConsumerID( consumer_idx ) ); + + // Check if this inst can be issued. + addIfReady( consumer ); + } + + return dependents; + +} + +template +void +InstructionQueue::addReadyMemInst(DynInstPtr &ready_inst) +{ + OpClass op_class = ready_inst->opClass(); + + readyInsts[op_class].push(ready_inst); + + // Will need to reorder the list if either a queue is not on the list, + // or it has an older instruction than last time. + if (!queueOnList[op_class]) { + addToOrderList(op_class); + } else if (readyInsts[op_class].top()->seqNum < + (*readyIt[op_class]).oldestInst) { + listOrder.erase(readyIt[op_class]); + addToOrderList(op_class); + } + + DPRINTF(EdgeIQ, "Instruction is ready to issue, putting it onto " + "the ready list, PC %#x opclass:%i [Bid:%lli][Iid:%lli].\n", + ready_inst->readPC(), op_class, ready_inst->getBlockID(), ready_inst->getInstID()); +} + +template +void +InstructionQueue::rescheduleMemInst(DynInstPtr &resched_inst) +{ + DPRINTF(EdgeIQ, "Rescheduling mem inst [sn:%lli]\n", resched_inst->seqNum); + resched_inst->clearCanIssue(); + + memDepUnit[resched_inst->threadNumber].reschedule(resched_inst); +} + +template +void +InstructionQueue::replayMemInst(DynInstPtr &replay_inst) +{ + + memDepUnit[replay_inst->threadNumber].replay(replay_inst); + +} + +template +void +InstructionQueue::completeMemInst(DynInstPtr &completed_inst) +{ + ThreadID tid = completed_inst->threadNumber; + + DPRINTF(EdgeIQ, "Completing mem instruction PC:%#x [sn:%lli]\n", + completed_inst->readPC(), completed_inst->seqNum); + + #if 0 + ++freeEntries; + #endif + + completed_inst->memOpDone = true; + + memDepUnit[tid].completed(completed_inst); + + #if 0 + count[tid]--; + #endif +} + +template +void +InstructionQueue::violation(DynInstPtr &store, + DynInstPtr &faulting_load) +{ + + memDepUnit[store->threadNumber].violation(store, faulting_load); + +} + +template +void +InstructionQueue::squash(ThreadID tid) +{ + DPRINTF(EdgeIQ, "[tid:%i]: Starting to squash insts in " + "the IQ.\n", tid); + + // Read instruction sequence number of last instruction out of the + // time buffer. + squashedSeqNum[tid] = fromCommit->commitInfo[tid].doneBlockID; + + // Call doSquash if there are insts in the IQ + if (count[tid] > 0) { + doSquash(tid); + } + + // Also tell the memory dependence unit to squash. + memDepUnit[tid].squash(squashedSeqNum[tid], tid); + +} + +template +void +InstructionQueue::doSquash(ThreadID tid) +{ + // Squashing will clean up all the insts younger than + // squashedSeqNum[tid] + + DPRINTF(EdgeIQ, "[tid:%i]: Squashing until block id %i!\n", + tid, squashedSeqNum[tid]); + + typename std::list::iterator list_it = writeList[tid].end(); + -- list_it; + + // Squash write list first. + while( list_it != writeList[tid].end() ){ + + QueueIt it = *list_it; + + assert((*it)->isGRegWR() && (*it)->isGRegWriteValid()); + + if ( (*it)->getBlockID() > squashedSeqNum[tid] ){ + + DPRINTF(EdgeIQ, "[tid:%i]: Squshing write list [sn:%lli] PC %#x of Block[Bid:%lli]" + "squashed. Issued flag = %d.\n", + tid, (*it)->seqNum, (*it)->readPC(), + (*it)->getBlockID(), (*it)->isIssued()); + + if ((*it)->isWriteRegIssued() ) { + executeStage->decrWb((*it)->seqNum); + } + + // Clear dependence in reg dep graph. + regDepGraph.remove((*it)->getMappedDestReg(0), squashedSeqNum[tid]); + + writeList[tid].erase(list_it--); + continue; + + } + ++ list_it; + } + + // Start at the tail. + // Because queue has no iterator, I use deque instead + for ( int i = 0; i < Impl::MaxFrameNum; i ++ ) { + // Squash any instructions younger than the squashed sequence number + // given. + // @todo: Maybe it's not necessary to iterate through frames, just + // clear the frame would be more efficient. + + if ( instQueue[tid][i].empty() ) { + // Empty frame, skip it. + continue; + } + + if ( instQueue[tid][i].front()->getBlockID() > squashedSeqNum[tid] ){ + + QueueIt squash_it = instQueue[tid][i].end(); + --squash_it; + while ( !instQueue[tid][i].empty()) { + DynInstPtr squashed_inst = (*squash_it); + assert(squashed_inst->getBlockID() > squashedSeqNum[tid] ); + + // Squash anything that matched the block id. + DPRINTF(EdgeIQTest, "[tid:%i]: Instruction [Iid:%lli] PC %#x of Block[Bid:%lli]" + "squashed in frame[%i].\n", + tid, squashed_inst->getInstID(), + squashed_inst->readPC(), + squashed_inst->getBlockID(), i); + + // Mark it as squashed within the IQ. + squashed_inst->setSquashedInIQ(); + + // @todo: Remove this hack where several statuses are set so the + // inst will flow through the rest of the pipeline. + squashed_inst->setIssued(); + squashed_inst->setCanCommit(); + squashed_inst->clearInIQ(); + + //Update Thread IQ Count + count[squashed_inst->threadNumber]--; + ++freeEntries; + + squash_it --; + instQueue[tid][i].pop_back(); + + ++iqInstsSquashed; + } + + // This is a little redundant ... + instQueue[tid][i].clear(); + } + } +} + +template +bool +InstructionQueue::addToDependents(DynInstPtr &new_inst) +{ + // Only reg read inst can be added into dependent graph + // and only one src reg is allowed. + assert(new_inst->isGRegWR() && new_inst->isGRegReadValid()); + assert(new_inst->numSrcRegs() == 1); + + bool return_val = false; + + // Only add it to the dependency graph if + // source register is not ready. + PhysRegIndex src_reg = new_inst->getMappedSrcReg(0); + if (src_reg < numPhysRegs) { + // Try to add reg-read inst into reg dep graph. + // Return true means this inst can issue immediately + // while return false means this inst has been added + // into reg dep graph. + if (!regDepGraph.insertConsumer(src_reg, new_inst)){ + DPRINTF(EdgeIQ, "Instruction PC %#x has src reg %i that " + "is being added to the dependency chain.\n", + new_inst->readPC(), src_reg); + + // Change the return value to indicate that something + // was added to the dependency graph. + return_val = true; + + iqRegReadDep ++; + } else { + DPRINTF(EdgeIQ, "Register Read Inst[Bid:%lli][Iid:%lli] @PC %#x has src reg %i that " + "became ready before it reached the IQ.\n", + new_inst->getBlockID(), + new_inst->getInstID(), + new_inst->readPC(), src_reg); + // Mark this register read inst as can-issue. + new_inst->setCanIssue(); + // Set this reg-read inst in Issuing read status. + new_inst->setReadRegIssuing(); + } + }else { + DPRINTF(EdgeIQ, "Not a general register, leave it alone.\n"); + } + return return_val; +} + +template +void +InstructionQueue::addToProducers(DynInstPtr &new_inst) +{ + // A ptr to the producing instruction will be placed + // in the head node of the dependency links. + + // Only write register inst is supporsed to arrive here + // and only one dest reg is allowed. + assert(new_inst->isGRegWR() && new_inst->isGRegWriteValid()); + assert(new_inst->numDestRegs() == 1); + + PhysRegIndex dest_reg = new_inst->getMappedDestReg(0); + + if (dest_reg < numPhysRegs) { + + // Set entry of this dest reg. + regDepGraph.insertProducer(dest_reg, new_inst); + + } else { + DPRINTF(EdgeIQ, "Not a general register, leave it alone.\n"); + } +} + +template +void +InstructionQueue::addIfReady(DynInstPtr &inst) +{ + // If the instruction now has all of its source registers + // available, then add it to the list of ready instructions. + + if (!inst->readyToIssue()){ + + DPRINTF(EdgeIQ,"Instruction %d[Iid:%i] still has %d src.\n", + inst->seqNum,inst->getInstID(), + inst->getReadyOperands()); + + DPRINTF(EdgeIQ,"Instruction is %s.\n", + inst->staticInst->disassemble(inst->readPC())); + + } + + if (inst->readyToIssue()) { + + DPRINTF(EdgeIQ,"Inst[Bid:%lli][Iid:%lli] is ready to issue\n" + "Inst %s.\n", inst->getBlockID(), inst->getInstID(), + inst->staticInst->disassemble(inst->readPC())); + + if (inst->isMemRef()) { + + DPRINTF(EdgeIQ, "This is a memory ref inst, checking if " + "memory instruction can issue.\n"); + + // Message to the mem dependence unit that this instruction has + // its registers ready. + memDepUnit[inst->threadNumber].opsReady(inst); + + return; + } + + OpClass op_class = inst->opClass(); + + DPRINTF(EdgeIQ, "Putting instruction onto " + "the ready list, PC %#x opclass:%i [Bid:%lli][Iid:%lli].\n", + inst->readPC(), op_class, + inst->getBlockID(), inst->getInstID()); + + readyInsts[op_class].push(inst); + + // Will need to reorder the list if either a queue is not on the list, + // or it has an older instruction than last time. + if (!queueOnList[op_class]) { + addToOrderList(op_class); + } else if (readyInsts[op_class].top()->seqNum < + (*readyIt[op_class]).oldestInst) { + listOrder.erase(readyIt[op_class]); + addToOrderList(op_class); + } + + } + +} + +template +int +InstructionQueue::countInsts() +{ + // @todo: This is really simple ... fix it. + return numEntries - freeEntries; +} + +template +void +InstructionQueue::dumpLists() +{ + panic("Unimplemented func: dumpLists().\n"); +} + + +template +void +InstructionQueue::dumpInsts() +{ + panic("Unimplemented func: dumpInsts().\n"); +} diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/insttracer.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/insttracer.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,168 @@ +/* + * Copyright (c) 2001-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Steve Reinhardt + * Nathan Binkert + */ + +#ifndef __EDGEINSTRECORD_HH__ +#define __EDGEINSTRECORD_HH__ + +#include "base/bigint.hh" +#include "base/trace.hh" +#include "base/types.hh" +#include "cpu/inst_seq.hh" // for InstSeqNum +#include "cpu/edge/static_inst.hh" +#include "sim/sim_object.hh" + +class ThreadContext; + +namespace Trace { + +class EdgeInstRecord +{ + protected: + Tick when; + + // The following fields are initialized by the constructor and + // thus guaranteed to be valid. + ThreadContext *thread; + // need to make this ref-counted so it doesn't go away before we + // dump the record + EDGEStaticInstPtr staticInst; + Addr PC; + bool misspeculating; + + // The remaining fields are only valid for particular instruction + // types (e.g, addresses for memory ops) or when particular + // options are enabled (e.g., tracing full register contents). + // Each data field has an associated valid flag to indicate + // whether the data field is valid. + Addr addr; + bool addr_valid; + + union { + uint64_t as_int; + double as_double; + } data; + enum { + DataInvalid = 0, + DataInt8 = 1, // set to equal number of bytes + DataInt16 = 2, + DataInt32 = 4, + DataInt64 = 8, + DataDouble = 3 + } data_status; + + InstSeqNum fetch_seq; + bool fetch_seq_valid; + + InstSeqNum cp_seq; + bool cp_seq_valid; + + public: + EdgeInstRecord(Tick _when, ThreadContext *_thread, + const EDGEStaticInstPtr _staticInst, + Addr _pc, bool spec ) + : when(_when), thread(_thread), + staticInst(_staticInst), PC(_pc), + misspeculating(spec) + { + data_status = DataInvalid; + addr_valid = false; + + fetch_seq_valid = false; + cp_seq_valid = false; + } + + virtual ~EdgeInstRecord() { } + + void setAddr(Addr a) { addr = a; addr_valid = true; } + + void setData(Twin64_t d) { data.as_int = d.a; data_status = DataInt64; } + void setData(Twin32_t d) { data.as_int = d.a; data_status = DataInt32; } + void setData(uint64_t d) { data.as_int = d; data_status = DataInt64; } + void setData(uint32_t d) { data.as_int = d; data_status = DataInt32; } + void setData(uint16_t d) { data.as_int = d; data_status = DataInt16; } + void setData(uint8_t d) { data.as_int = d; data_status = DataInt8; } + + void setData(int64_t d) { setData((uint64_t)d); } + void setData(int32_t d) { setData((uint32_t)d); } + void setData(int16_t d) { setData((uint16_t)d); } + void setData(int8_t d) { setData((uint8_t)d); } + + void setData(double d) { data.as_double = d; data_status = DataDouble; } + + void setFetchSeq(InstSeqNum seq) + { fetch_seq = seq; fetch_seq_valid = true; } + + void setCPSeq(InstSeqNum seq) + { cp_seq = seq; cp_seq_valid = true; } + + virtual void dump() = 0; + + public: + Tick getWhen() { return when; } + ThreadContext *getThread() { return thread; } + EDGEStaticInstPtr getEDGEStaticInst() { return staticInst; } + Addr getPC() { return PC; } + bool getMisspeculating() { return misspeculating; } + + Addr getAddr() { return addr; } + bool getAddrValid() { return addr_valid; } + + uint64_t getIntData() { return data.as_int; } + double getFloatData() { return data.as_double; } + int getDataStatus() { return data_status; } + + InstSeqNum getFetchSeq() { return fetch_seq; } + bool getFetchSeqValid() { return fetch_seq_valid; } + + InstSeqNum getCpSeq() { return cp_seq; } + bool getCpSeqValid() { return cp_seq_valid; } +}; + +class EdgeInstTracer : public SimObject +{ + public: + EdgeInstTracer(const Params *p) : SimObject(p) + {} + + virtual ~EdgeInstTracer() + {}; + + virtual EdgeInstRecord * + getEdgeInstRecord(Tick when, ThreadContext *tc, + const EDGEStaticInstPtr staticInst, Addr pc + ) = 0; +}; + + + +}; // namespace Trace + +#endif // __EDGEINSTRECORD_HH__ diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/isa_specific.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/isa_specific.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#include "cpu/base.hh" + +#include "cpu/edge/impl.hh" +#include "cpu/edge/dyn_inst.hh" +//#include "cpu/edge/block.hh" + diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/lsq.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/lsq.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,405 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#ifndef __CPU_EDGE_LSQ_HH__ +#define __CPU_EDGE_LSQ_HH__ + +#include +#include + +#include "config/full_system.hh" +#include "cpu/inst_seq.hh" +#include "cpu/edge/lsq_unit.hh" +#include "mem/port.hh" +#include "sim/sim_object.hh" + +class SimpleEdgeCPUParams; + +template +class LSQ { + public: + typedef typename Impl::CPU CPU; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::EdgeBlockPtr BlockPtr; + typedef typename Impl::CPUPol::Execute Execute; + typedef typename Impl::CPUPol::LSQUnit LSQUnit; + typedef TheISA::BlockID BlockID; + + typedef TheISA::BlockID BlockID; + + /** SMT policy. */ + enum LSQPolicy { + Dynamic, + Partitioned, + Threshold + }; + + /** Constructs an LSQ with the given parameters. */ + LSQ(CPU *cpu_ptr, Execute *execute_ptr, SimpleEdgeCPUParams *params); + + /** Returns the name of the LSQ. */ + std::string name() const; + + /** Registers statistics of each LSQ unit. */ + void regStats(); + + /** Returns dcache port. + * @todo: Dcache port needs to be moved up to this level for SMT + * to work. For now it just returns the port from one of the + * threads. + */ + Port *getDcachePort() { return &dcachePort; } + + /** Sets the pointer to the list of active threads. */ + void setActiveThreads(std::list *at_ptr); + /** Switches out the LSQ. */ + void switchOut(); + /** Takes over execution from another CPU's thread. */ + void takeOverFrom(); + + /** Number of entries needed for the given amount of threads.*/ + int entryAmount(ThreadID num_threads); + void removeEntries(ThreadID tid); + /** Reset the max entries for each thread. */ + void resetEntries(); + /** Resize the max entries for a thread. */ + void resizeEntries(unsigned size, ThreadID tid); + + /** Ticks the LSQ. */ + void tick(); + /** Ticks a specific LSQ Unit. */ + void tick(ThreadID tid) + { thread[tid].tick(); } + + /** Inserts a load into the LSQ. */ + void insertLoad(DynInstPtr &load_inst); + /** Inserts a store into the LSQ. */ + void insertStore(DynInstPtr &store_inst); + + /** Executes a load. */ + Fault executeLoad(DynInstPtr &inst); + + /** Executes a store. */ + Fault executeStore(DynInstPtr &inst); + + /** Nullify store*/ + void nullifyStore( DynInstPtr &inst); + + /** + * Commits loads up until the given sequence number for a specific thread. + */ + void commitLoads(BlockID &youngest_inst_block, ThreadID tid) + { thread[tid].commitLoads(youngest_inst_block); } + + + /** + * Commits stores up until the given sequence number for a specific thread. + */ + void commitStores(BlockID &youngest_inst_block, ThreadID tid) + { thread[tid].commitStores(youngest_inst_block); } + + /** + * Attempts to write back stores until all cache ports are used or the + * interface becomes blocked. + */ + void writebackStores(); + /** Same as above, but only for one thread. */ + void writebackStores(ThreadID tid); + + /** Set store mask*/ + void setStoreMask(BlockPtr inst_block) + { thread[inst_block->getTid()].setStoreMask(inst_block); } + /** + * Squash instructions from a thread until the specified sequence number. + */ + void squash(const TheISA::BlockID &squashed_num, ThreadID tid) + { thread[tid].squash(squashed_num); } + + /** Returns whether or not there was a memory ordering violation. */ + bool violation(); + /** + * Returns whether or not there was a memory ordering violation for a + * specific thread. + */ + bool violation(ThreadID tid) + { return thread[tid].violation(); } + + /** Returns if a load is blocked due to the memory system for a specific + * thread. + */ + bool loadBlocked(ThreadID tid) + { return thread[tid].loadBlocked(); } + + bool isLoadBlockedHandled(ThreadID tid) + { return thread[tid].isLoadBlockedHandled(); } + + void setLoadBlockedHandled(ThreadID tid) + { thread[tid].setLoadBlockedHandled(); } + + /** Gets the instruction that caused the memory ordering violation. */ + DynInstPtr getMemDepViolator(ThreadID tid) + { return thread[tid].getMemDepViolator(); } + + /** Returns the head index of the load queue for a specific thread. */ + int getLoadHead(ThreadID tid) + { return thread[tid].getLoadHead(); } + + /** Returns the sequence number of the head of the load queue. */ + InstSeqNum getLoadHeadSeqNum(ThreadID tid) + { + return thread[tid].getLoadHeadSeqNum(); + } + + /** Returns the head index of the store queue. */ + int getStoreHead(ThreadID tid) + { return thread[tid].getStoreHead(); } + + /** Returns the sequence number of the head of the store queue. */ + InstSeqNum getStoreHeadSeqNum(ThreadID tid) + { + return thread[tid].getStoreHeadSeqNum(); + } + + /** Returns the number of instructions in all of the queues. */ + int getCount(); + /** Returns the number of instructions in the queues of one thread. */ + int getCount(ThreadID tid) + { return thread[tid].getCount(); } + + /** Returns the total number of loads in the load queue. */ + int numLoads(); + /** Returns the total number of loads for a single thread. */ + int numLoads(ThreadID tid) + { return thread[tid].numLoads(); } + + /** Returns the total number of stores in the store queue. */ + int numStores(); + /** Returns the total number of stores for a single thread. */ + int numStores(ThreadID tid) + { return thread[tid].numStores(); } + + /** Returns the total number of loads that are ready. */ + int numLoadsReady(); + /** Returns the number of loads that are ready for a single thread. */ + int numLoadsReady(ThreadID tid) + { return thread[tid].numLoadsReady(); } + + /** Returns the number of free entries. */ + unsigned numFreeEntries(); + /** Returns the number of free entries for a specific thread. */ + unsigned numFreeEntries(ThreadID tid); + + /** Returns if the LSQ is full (either LQ or SQ is full). */ + bool isFull(); + /** + * Returns if the LSQ is full for a specific thread (either LQ or SQ is + * full). + */ + bool isFull(ThreadID tid); + + /** Returns if any of the LQs are full. */ + bool lqFull(); + /** Returns if the LQ of a given thread is full. */ + bool lqFull(ThreadID tid); + + /** Returns if any of the SQs are full. */ + bool sqFull(); + /** Returns if the SQ of a given thread is full. */ + bool sqFull(ThreadID tid); + + /** + * Returns if the LSQ is stalled due to a memory operation that must be + * replayed. + */ + bool isStalled(); + /** + * Returns if the LSQ of a specific thread is stalled due to a memory + * operation that must be replayed. + */ + bool isStalled(ThreadID tid); + + /** Returns whether or not there are any stores to write back to memory. */ + bool hasStoresToWB(); + + /** Returns whether or not a specific thread has any stores to write back + * to memory. + */ + bool hasStoresToWB(ThreadID tid) + { return thread[tid].hasStoresToWB(); } + + /** Returns the number of stores a specific thread has to write back. */ + int numStoresToWB(ThreadID tid) + { return thread[tid].numStoresToWB(); } + + /** Returns if the LSQ will write back to memory this cycle. */ + bool willWB(); + /** Returns if the LSQ of a specific thread will write back to memory this + * cycle. + */ + bool willWB(ThreadID tid) + { return thread[tid].willWB(); } + + /** Returns if the cache is currently blocked. */ + bool cacheBlocked() + { return retryTid != InvalidThreadID; } + + /** Sets the retry thread id, indicating that one of the LSQUnits + * tried to access the cache but the cache was blocked. */ + void setRetryTid(ThreadID tid) + { retryTid = tid; } + + /** Debugging function to print out all instructions. */ + void dumpInsts(); + /** Debugging function to print out instructions from a specific thread. */ + void dumpInsts(ThreadID tid) + { thread[tid].dumpInsts(); } + + /** Executes a read operation, using the load specified at the load index. */ + template + Fault read(RequestPtr req, T &data, int load_idx); + + /** Executes a store operation, using the store specified at the store + * index. + */ + template + Fault write(RequestPtr req, T &data, int store_idx); + + /** The CPU pointer. */ + CPU *cpu; + + /** The IEW stage pointer. */ + Execute *executeStage; + + /** DcachePort class for this LSQ. Handles doing the + * communication with the cache/memory. + */ + class DcachePort : public Port + { + protected: + /** Pointer to LSQ. */ + LSQ *lsq; + + public: + /** Default constructor. */ + DcachePort(LSQ *_lsq) + : Port(_lsq->name() + "-dport", _lsq->cpu), lsq(_lsq) + { } + + bool snoopRangeSent; + + virtual void setPeer(Port *port); + + protected: + /** Atomic version of receive. Panics. */ + virtual Tick recvAtomic(PacketPtr pkt); + + /** Functional version of receive. Panics. */ + virtual void recvFunctional(PacketPtr pkt); + + /** Receives status change. Other than range changing, panics. */ + virtual void recvStatusChange(Status status); + + /** Returns the address ranges of this device. */ + virtual void getDeviceAddressRanges(AddrRangeList &resp, + bool &snoop) + { resp.clear(); snoop = true; } + + /** Timing version of receive. Handles writing back and + * completing the load or store that has returned from + * memory. */ + virtual bool recvTiming(PacketPtr pkt); + + /** Handles doing a retry of the previous send. */ + virtual void recvRetry(); + }; + + /** D-cache port. */ + DcachePort dcachePort; + +#if FULL_SYSTEM + /** Tell the CPU to update the Phys and Virt ports. */ + void updateMemPorts() { cpu->updateMemPorts(); } +#endif + + protected: + /** The LSQ policy for SMT mode. */ + LSQPolicy lsqPolicy; + + /** The LSQ units for individual threads. */ + LSQUnit thread[Impl::MaxThreads]; + + /** List of Active Threads in System. */ + std::list *activeThreads; + + /** Total Size of LQ Entries. */ + unsigned LQEntries; + /** Total Size of SQ Entries. */ + unsigned SQEntries; + + /** Max LQ Size - Used to Enforce Sharing Policies. */ + unsigned maxLQEntries; + + /** Max SQ Size - Used to Enforce Sharing Policies. */ + unsigned maxSQEntries; + + /** Number of Threads. */ + ThreadID numThreads; + + /** The thread id of the LSQ Unit that is currently waiting for a + * retry. */ + ThreadID retryTid; +}; + +template +template +Fault +LSQ::read(RequestPtr req, T &data, int load_idx) +{ + ThreadID tid = req->threadId(); + + return thread[tid].read(req, data, load_idx); +} + +template +template +Fault +LSQ::write(RequestPtr req, T &data, int store_idx) +{ + ThreadID tid = req->threadId(); + + return thread[tid].write(req, data, store_idx); +} + +#endif // __CPU_EDGE_LSQ_HH__ diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/lsq.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/lsq.cc Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#include "cpu/edge/isa_specific.hh" +#include "cpu/edge/lsq_impl.hh" + +// Force the instantiation of LDSTQ for all the implementations we care about. +template class LSQ; + diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/lsq_impl.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/lsq_impl.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,642 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#include +#include +#include + +#include "cpu/edge/lsq.hh" +#include "params/SimpleEdgeCPU.hh" + +using namespace std; + +template +void +LSQ::DcachePort::setPeer(Port *port) +{ + Port::setPeer(port); + +#if FULL_SYSTEM + // Update the ThreadContext's memory ports (Functional/Virtual + // Ports) + lsq->updateMemPorts(); +#endif +} + +template +Tick +LSQ::DcachePort::recvAtomic(PacketPtr pkt) +{ + panic("EdgeCPU model does not work with atomic mode!"); + return curTick; +} + +template +void +LSQ::DcachePort::recvFunctional(PacketPtr pkt) +{ + DPRINTF(EdgeLSQ, "LSQ doesn't update things on a recvFunctional."); +} + +template +void +LSQ::DcachePort::recvStatusChange(Status status) +{ + if (status == RangeChange) { + if (!snoopRangeSent) { + snoopRangeSent = true; + sendStatusChange(Port::RangeChange); + } + return; + } + panic("EdgeCPU doesn't expect recvStatusChange callback!"); +} + +template +bool +LSQ::DcachePort::recvTiming(PacketPtr pkt) +{ + DPRINTF(EdgeLSQ,"Received Timing.\n"); + if (pkt->isError()) + DPRINTF(EdgeLSQ, "Got error packet back for address: %#X\n", pkt->getAddr()); + if (pkt->isResponse()) { + lsq->thread[pkt->req->threadId()].completeDataAccess(pkt); + } + else { + // must be a snoop + + // @TODO someday may need to process invalidations in LSQ here + // to provide stronger consistency model + } + return true; +} + +template +void +LSQ::DcachePort::recvRetry() +{ + if (lsq->retryTid == -1) + { + //Squashed, so drop it + return; + } + int curr_retry_tid = lsq->retryTid; + // Speculatively clear the retry Tid. This will get set again if + // the LSQUnit was unable to complete its access. + lsq->retryTid = -1; + lsq->thread[curr_retry_tid].recvRetry(); +} + +template +LSQ::LSQ(CPU *cpu_ptr, Execute *execute_ptr, SimpleEdgeCPUParams *params) + : cpu(cpu_ptr), executeStage(execute_ptr), dcachePort(this), + LQEntries(params->LQEntries * ( Impl::MaxFrameNum ) ), + SQEntries(params->SQEntries * ( Impl::MaxFrameNum ) ), + numThreads(params->numThreads), + retryTid(-1) +{ + dcachePort.snoopRangeSent = false; + + //**********************************************/ + //************ Handle SMT Parameters ***********/ + //**********************************************/ + std::string policy = params->smtLSQPolicy; + + //Convert string to lowercase + std::transform(policy.begin(), policy.end(), policy.begin(), + (int(*)(int)) tolower); + + //Figure out fetch policy + if (policy == "dynamic") { + lsqPolicy = Dynamic; + + maxLQEntries = LQEntries; + maxSQEntries = SQEntries; + + DPRINTF(EdgeLSQ, "LSQ sharing policy set to Dynamic\n"); + } else if (policy == "partitioned") { + lsqPolicy = Partitioned; + + //@todo:make work if part_amt doesnt divide evenly. + maxLQEntries = LQEntries / numThreads; + maxSQEntries = SQEntries / numThreads; + + DPRINTF(EdgeLSQ, "LSQ sharing policy set to Partitioned: " + "%i entries per LQ | %i entries per SQ", + maxLQEntries,maxSQEntries); + } else if (policy == "threshold") { + lsqPolicy = Threshold; + + assert(params->smtLSQThreshold > LQEntries); + assert(params->smtLSQThreshold > SQEntries); + + //Divide up by threshold amount + //@todo: Should threads check the max and the total + //amount of the LSQ + maxLQEntries = params->smtLSQThreshold; + maxSQEntries = params->smtLSQThreshold; + + DPRINTF(EdgeLSQ, "LSQ sharing policy set to Threshold: " + "%i entries per LQ | %i entries per SQ", + maxLQEntries,maxSQEntries); + } else { + assert(0 && "Invalid LSQ Sharing Policy.Options Are:{Dynamic," + "Partitioned, Threshold}"); + } + + //Initialize LSQs + for (ThreadID tid = 0; tid < numThreads; tid++) { + thread[tid].init(cpu, execute_ptr, params, this, + maxLQEntries, maxSQEntries, tid); + thread[tid].setDcachePort(&dcachePort); + } +} + + +template +std::string +LSQ::name() const +{ + return executeStage->name() + ".lsq"; +} + +template +void +LSQ::regStats() +{ + //Initialize LSQs + for (ThreadID tid = 0; tid < numThreads; tid++) { + thread[tid].regStats(); + } +} + +template +void +LSQ::setActiveThreads(list *at_ptr) +{ + activeThreads = at_ptr; + assert(activeThreads != 0); +} + +template +void +LSQ::switchOut() +{ + for (ThreadID tid = 0; tid < numThreads; tid++) { + thread[tid].switchOut(); + } +} + +template +void +LSQ::takeOverFrom() +{ + for (ThreadID tid = 0; tid < numThreads; tid++) { + thread[tid].takeOverFrom(); + } +} + +template +int +LSQ::entryAmount(ThreadID num_threads) +{ + if (lsqPolicy == Partitioned) { + return LQEntries / num_threads; + } else { + return 0; + } +} + +template +void +LSQ::resetEntries() +{ + if (lsqPolicy != Dynamic || numThreads > 1) { + int active_threads = activeThreads->size(); + + int maxEntries; + + if (lsqPolicy == Partitioned) { + maxEntries = LQEntries / active_threads; + } else if (lsqPolicy == Threshold && active_threads == 1) { + maxEntries = LQEntries; + } else { + maxEntries = LQEntries; + } + + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + resizeEntries(maxEntries, tid); + } + } +} + +template +void +LSQ::removeEntries(ThreadID tid) +{ + thread[tid].clearLQ(); + thread[tid].clearSQ(); +} + +template +void +LSQ::resizeEntries(unsigned size, ThreadID tid) +{ + thread[tid].resizeLQ(size); + thread[tid].resizeSQ(size); +} + +template +void +LSQ::tick() +{ + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + thread[tid].tick(); + } +} + +template +void +LSQ::insertLoad(DynInstPtr &load_inst) +{ + ThreadID tid = load_inst->threadNumber; + + thread[tid].insertLoad(load_inst); +} + +template +void +LSQ::insertStore(DynInstPtr &store_inst) +{ + ThreadID tid = store_inst->threadNumber; + + thread[tid].insertStore(store_inst); +} + +template +Fault +LSQ::executeLoad(DynInstPtr &inst) +{ + ThreadID tid = inst->threadNumber; + + return thread[tid].executeLoad(inst); +} + +template +Fault +LSQ::executeStore(DynInstPtr &inst) +{ + ThreadID tid = inst->threadNumber; + + return thread[tid].executeStore(inst); +} + +template +void +LSQ::nullifyStore(DynInstPtr &inst) +{ + ThreadID tid = inst->threadNumber; + + thread[tid].nullifyStore(inst); +} + +template +void +LSQ::writebackStores() +{ + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + if (numStoresToWB(tid) > 0) { + DPRINTF(EdgeLSQ,"[tid:%i] Writing back stores. %i stores " + "available for Writeback.\n", tid, numStoresToWB(tid)); + } + + thread[tid].writebackStores(); + } +} + +template +bool +LSQ::violation() +{ + /* Answers: Does Anybody Have a Violation?*/ + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + if (thread[tid].violation()) + return true; + } + + return false; +} + +template +int +LSQ::getCount() +{ + unsigned total = 0; + + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + total += getCount(tid); + } + + return total; +} + +template +int +LSQ::numLoads() +{ + unsigned total = 0; + + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + total += numLoads(tid); + } + + return total; +} + +template +int +LSQ::numStores() +{ + unsigned total = 0; + + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + total += thread[tid].numStores(); + } + + return total; +} + +template +int +LSQ::numLoadsReady() +{ + unsigned total = 0; + + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + total += thread[tid].numLoadsReady(); + } + + return total; +} + +template +unsigned +LSQ::numFreeEntries() +{ + unsigned total = 0; + + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + total += thread[tid].numFreeEntries(); + } + + return total; +} + +template +unsigned +LSQ::numFreeEntries(ThreadID tid) +{ + //if (lsqPolicy == Dynamic) + //return numFreeEntries(); + //else + return thread[tid].numFreeEntries(); +} + +template +bool +LSQ::isFull() +{ + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + if (!(thread[tid].lqFull() || thread[tid].sqFull())) + return false; + } + + return true; +} + +template +bool +LSQ::isFull(ThreadID tid) +{ + //@todo: Change to Calculate All Entries for + //Dynamic Policy + if (lsqPolicy == Dynamic) + return isFull(); + else + return thread[tid].lqFull() || thread[tid].sqFull(); +} + +template +bool +LSQ::lqFull() +{ + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + if (!thread[tid].lqFull()) + return false; + } + + return true; +} + +template +bool +LSQ::lqFull(ThreadID tid) +{ + //@todo: Change to Calculate All Entries for + //Dynamic Policy + if (lsqPolicy == Dynamic) + return lqFull(); + else + return thread[tid].lqFull(); +} + +template +bool +LSQ::sqFull() +{ + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + if (!sqFull(tid)) + return false; + } + + return true; +} + +template +bool +LSQ::sqFull(ThreadID tid) +{ + //@todo: Change to Calculate All Entries for + //Dynamic Policy + if (lsqPolicy == Dynamic) + return sqFull(); + else + return thread[tid].sqFull(); +} + +template +bool +LSQ::isStalled() +{ + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + if (!thread[tid].isStalled()) + return false; + } + + return true; +} + +template +bool +LSQ::isStalled(ThreadID tid) +{ + if (lsqPolicy == Dynamic) + return isStalled(); + else + return thread[tid].isStalled(); +} + +template +bool +LSQ::hasStoresToWB() +{ + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + if (hasStoresToWB(tid)) + return true; + } + + return false; +} + +template +bool +LSQ::willWB() +{ + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + if (willWB(tid)) + return true; + } + + return false; +} + +template +void +LSQ::dumpInsts() +{ + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + thread[tid].dumpInsts(); + } +} diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/lsq_unit.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/lsq_unit.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,808 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#ifndef __CPU_EDGE_LSQ_UNIT_HH__ +#define __CPU_EDGE_LSQ_UNIT_HH__ + +#include +#include +#include +#include + +#include "arch/faults.hh" +#include "arch/locked_mem.hh" +#include "config/full_system.hh" +#include "config/the_isa.hh" +#include "base/fast_alloc.hh" +#include "base/hashmap.hh" +#include "cpu/inst_seq.hh" +#include "mem/packet.hh" +#include "mem/port.hh" + +class SimpleEdgeCPUParams; + +/** + * Class that implements the actual LQ and SQ for each specific + * thread. Both are circular queues; load entries are freed upon + * committing, while store entries are freed once they writeback. The + * LSQUnit tracks if there are memory ordering violations, and also + * detects partial load to store forwarding cases (a store only has + * part of a load's data) that requires the load to wait until the + * store writes back. In the former case it holds onto the instruction + * until the dependence unit looks at it, and in the latter it stalls + * the LSQ until the store writes back. At that point the load is + * replayed. + */ +template +class LSQUnit { + protected: + typedef TheISA::IntReg IntReg; + public: + typedef typename Impl::CPU CPU; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::EdgeBlockPtr BlockPtr; + typedef typename Impl::CPUPol::Execute Execute; + typedef typename Impl::CPUPol::LSQ LSQ; + typedef typename Impl::CPUPol::Issue2Execute Issue2Execute; + + typedef TheISA::BlockID BlockID; + + public: + /** Constructs an LSQ unit. init() must be called prior to use. */ + LSQUnit(); + + /** Initializes the LSQ unit with the specified number of entries. */ + void init(CPU *cpu_ptr, Execute *execute_ptr, SimpleEdgeCPUParams *params, + LSQ *lsq_ptr, unsigned maxLQEntries, unsigned maxSQEntries, + unsigned id); + + /** Returns the name of the LSQ unit. */ + std::string name() const; + + /** Registers statistics. */ + void regStats(); + + /** Sets the pointer to the dcache port. */ + void setDcachePort(Port *dcache_port); + + /** Switches out LSQ unit. */ + void switchOut(); + + /** Takes over from another CPU's thread. */ + void takeOverFrom(); + + /** Returns if the LSQ is switched out. */ + bool isSwitchedOut() { return switchedOut; } + + /** Ticks the LSQ unit, which in this case only resets the number of + * used cache ports. + * @todo: Move the number of used ports up to the LSQ level so it can + * be shared by all LSQ units. + */ + void tick() { usedPorts = 0; } + + /** Inserts an instruction. */ + void insert(DynInstPtr &inst); + /** Inserts a load instruction. */ + void insertLoad(DynInstPtr &load_inst); + /** Inserts a store instruction. */ + void insertStore(DynInstPtr &store_inst); + + /** Executes a load instruction. */ + Fault executeLoad(DynInstPtr &inst); + + Fault executeLoad(int lq_idx) { panic("Not implemented"); return NoFault; } + /** Executes a store instruction. */ + Fault executeStore(DynInstPtr &inst); + + /** Nullify a store instruction. */ + void nullifyStore(DynInstPtr &inst); + + /** Commits the inst block. */ + void commitInstBlock(BlockID blockID); + + /** Commits the head load. */ + void commitLoad(); + + /** Commits loads older than a specific sequence number. */ + void commitLoads(BlockID blockID); + + /** Commits stores older than a specific sequence number. */ + void commitStores(BlockID blockID); + + /** Writes back stores. */ + void writebackStores(); + + /** Completes the data access that has been returned from the + * memory system. */ + void completeDataAccess(PacketPtr pkt); + + /** Clears all the entries in the LQ. */ + void clearLQ(); + + /** Clears all the entries in the SQ. */ + void clearSQ(); + + /** Resizes the LQ to a given size. */ + void resizeLQ(unsigned size); + + /** Resizes the SQ to a given size. */ + void resizeSQ(unsigned size); + + void setStoreMask(BlockPtr inst_block) + { smask = inst_block ->getStoreMask();} + + /** Squashes all instructions younger than a specific sequence number. */ + void squash(const TheISA::BlockID &squashed_num); + + /** Returns if there is a memory ordering violation. Value is reset upon + * call to getMemDepViolator(). + */ + bool violation() { return memDepViolator; } + + /** Returns the memory ordering violator. */ + DynInstPtr getMemDepViolator(); + + /** Returns if a load became blocked due to the memory system. */ + bool loadBlocked() + { return isLoadBlocked; } + + /** Clears the signal that a load became blocked. */ + void clearLoadBlocked() + { isLoadBlocked = false; } + + /** Returns if the blocked load was handled. */ + bool isLoadBlockedHandled() + { return loadBlockedHandled; } + + /** Records the blocked load as being handled. */ + void setLoadBlockedHandled() + { loadBlockedHandled = true; } + + /** Returns the number of free entries (min of free LQ and SQ entries). */ + unsigned numFreeEntries(); + + /** Returns the number of loads ready to execute. */ + int numLoadsReady(); + + /** Returns the number of loads in the LQ. */ + int numLoads() { return loads; } + + /** Returns the number of stores in the SQ. */ + int numStores() { return stores; } + + /** Returns if either the LQ or SQ is full. */ + bool isFull() { return lqFull() || sqFull(); } + + /** Returns if the LQ is full. */ + bool lqFull() { return loads >= (LQEntries - 1); } + + /** Returns if the SQ is full. */ + bool sqFull() { return stores >= (SQEntries - 1); } + + /** Returns the number of instructions in the LSQ. */ + unsigned getCount() { return loads + stores; } + + /** Returns if there are any stores to writeback. */ + bool hasStoresToWB() { return storesToWB; } + + /** Returns the number of stores to writeback. */ + int numStoresToWB() { return storesToWB; } + + /** Returns if the LSQ unit will writeback on this cycle. */ + bool willWB() { return storeQueue[storeWBIdx].canWB && + !storeQueue[storeWBIdx].completed && + !isStoreBlocked; } + + /** Handles doing the retry. */ + void recvRetry(); + + private: + /** Writes back the instruction, sending it to IEW. */ + void writeback(DynInstPtr &inst, PacketPtr pkt); + + /** Handles completing the send of a store to memory. */ + void storePostSend(PacketPtr pkt); + + /** Completes the store at the specified index. */ + void completeStore(int store_idx); + + /** Increments the given store index (circular queue). */ + inline void incrStIdx(int &store_idx) + { + if (++store_idx >= SQEntries) + store_idx = 0; + } + /** Decrements the given store index (circular queue). */ + inline void decrStIdx(int &store_idx) + { + if (--store_idx < 0) + store_idx += SQEntries; + } + /** Increments the given load index (circular queue). */ + inline void incrLdIdx(int &load_idx) + { + if (++load_idx >= LQEntries) + load_idx = 0; + } + /** Decrements the given load index (circular queue). */ + inline void decrLdIdx(int &load_idx) + { + if (--load_idx < 0) + load_idx += LQEntries; + } + + public: + /** Debugging function to dump instructions in the LSQ. */ + void dumpInsts(); + + private: + /** Pointer to the CPU. */ + CPU *cpu; + + /** Pointer to the IEW stage. */ + Execute *executeStage; + + /** Pointer to the LSQ. */ + LSQ *lsq; + + /** Pointer to the dcache port. Used only for sending. */ + Port *dcachePort; + + /** Derived class to hold any sender state the LSQ needs. */ + class LSQSenderState : public Packet::SenderState, public FastAlloc + { + public: + /** Default constructor. */ + LSQSenderState() + : noWB(false) + { } + + /** Instruction who initiated the access to memory. */ + DynInstPtr inst; + /** Whether or not it is a load. */ + bool isLoad; + /** The LQ/SQ index of the instruction. */ + int idx; + /** Whether or not the instruction will need to writeback. */ + bool noWB; + }; + + /** Writeback event, specifically for when stores forward data to loads. */ + class WritebackEvent : public Event { + public: + /** Constructs a writeback event. */ + WritebackEvent(DynInstPtr &_inst, PacketPtr pkt, LSQUnit *lsq_ptr); + + /** Processes the writeback event. */ + void process(); + + /** Returns the description of this event. */ + const char *description() const; + + private: + /** Instruction whose results are being written back. */ + DynInstPtr inst; + + /** The packet that would have been sent to memory. */ + PacketPtr pkt; + + /** The pointer to the LSQ unit that issued the store. */ + LSQUnit *lsqPtr; + }; + + public: + struct SQEntry { + /** Constructs an empty store queue entry. */ + SQEntry() + : inst(NULL), req(NULL), size(0), + canWB(0), committed(0), completed(0), nullified(0) + { + std::memset(data, 0, sizeof(data)); + } + + /** Constructs a store queue entry for a given instruction. */ + SQEntry(DynInstPtr &_inst) + : inst(_inst), req(NULL), size(0), + canWB(0), committed(0), completed(0), nullified(0) + { + std::memset(data, 0, sizeof(data)); + } + + /** The store instruction. */ + DynInstPtr inst; + /** The request for the store. */ + RequestPtr req; + /** The size of the store. */ + int size; + /** The store data. */ + char data[sizeof(IntReg)]; + /** Whether or not the store can writeback. */ + bool canWB; + /** Whether or not the store is committed. */ + bool committed; + /** Whether or not the store is completed. */ + bool completed; + /** Whether or not the store is nullified. */ + bool nullified; + }; + + private: + /** The LSQUnit thread id. */ + ThreadID lsqID; + + /** The store queue. */ + std::vector storeQueue; + + /** The load queue. */ + std::vector loadQueue; + + /** The number of LQ entries, plus a sentinel entry (circular queue). + * @todo: Consider having var that records the true number of LQ entries. + */ + unsigned LQEntries; + /** The number of SQ entries, plus a sentinel entry (circular queue). + * @todo: Consider having var that records the true number of SQ entries. + */ + unsigned SQEntries; + + /** The number of load instructions in the LQ. */ + int loads; + /** The number of store instructions in the SQ. */ + int stores; + /** The number of store instructions in the SQ waiting to writeback. */ + int storesToWB; + + /** The index of the head instruction in the LQ. */ + int loadHead; + /** The index of the tail instruction in the LQ. */ + int loadTail; + + /** The index of the head instruction in the SQ. */ + int storeHead; + /** The index of the first instruction that may be ready to be + * written back, and has not yet been written back. + */ + int storeWBIdx; + /** The index of the tail instruction in the SQ. */ + int storeTail; + + uint32_t smask; + /// @todo Consider moving to a more advanced model with write vs read ports + /** The number of cache ports available each cycle. */ + int cachePorts; + + /** The number of used cache ports in this cycle. */ + int usedPorts; + + /** Is the LSQ switched out. */ + bool switchedOut; + + //list mshrSeqNums; + + /** Wire to read information from the issue stage time queue. */ + typename TimeBuffer::wire fromIssue; + + /** Whether or not the LSQ is stalled. */ + bool stalled; + /** The store that causes the stall due to partial store to load + * forwarding. + */ + InstSeqNum stallingStoreIsn; + /** The index of the above store. */ + int stallingLoadIdx; + + /** The packet that needs to be retried. */ + PacketPtr retryPkt; + + /** Whehter or not a store is blocked due to the memory system. */ + bool isStoreBlocked; + + /** Whether or not a load is blocked due to the memory system. */ + bool isLoadBlocked; + + /** Has the blocked load been handled. */ + bool loadBlockedHandled; + + /** The sequence number of the blocked load. */ + InstSeqNum blockedLoadSeqNum; + + /** The oldest load that caused a memory ordering violation. */ + DynInstPtr memDepViolator; + + // Will also need how many read/write ports the Dcache has. Or keep track + // of that in stage that is one level up, and only call executeLoad/Store + // the appropriate number of times. + /** Total number of loads that all data forwarded from LSQ stores. */ + Stats::Scalar lsqAllForwLoads; + + /** Total number of loads that partial data forwarded from LSQ stores. */ + Stats::Scalar lsqPartialForwLoads; + + /** Total number of squashed loads. */ + Stats::Scalar lsqSquashedLoads; + + /** Total number of responses from the memory system that are + * ignored due to the instruction already being squashed or block completed. */ + Stats::Scalar lsqIgnoredResponses; + + /** Total number of squashed stores. */ + Stats::Scalar lsqSquashedStores; + + /** Number of loads that were rescheduled. */ + Stats::Scalar lsqRescheduledLoads; + + /** Number of times the LSQ is blocked due to the cache. */ + Stats::Scalar lsqCacheBlocked; + + public: + /** Executes the load at the given index. */ + template + Fault read(Request *req, T &data, int load_idx); + + /** Executes the store at the given index. */ + template + Fault write(Request *req, T &data, int store_idx); + + /** Returns the index of the head load instruction. */ + int getLoadHead() { return loadHead; } + /** Returns the sequence number of the head load instruction. */ + InstSeqNum getLoadHeadSeqNum() + { + if (loadQueue[loadHead]) { + return loadQueue[loadHead]->seqNum; + } else { + return 0; + } + + } + + /** Returns the index of the head store instruction. */ + int getStoreHead() { return storeHead; } + /** Returns the sequence number of the head store instruction. */ + InstSeqNum getStoreHeadSeqNum() + { + if (storeQueue[storeHead].inst) { + return storeQueue[storeHead].inst->seqNum; + } else { + return 0; + } + + } + + /** Returns whether or not the LSQ unit is stalled. */ + bool isStalled() { return stalled; } +}; + +template +template +Fault +LSQUnit::read(Request *req, T &data, int load_idx) +{ + DynInstPtr load_inst = loadQueue[load_idx]; + + assert(load_inst); + + assert(!load_inst->isExecuted()); + + // Make sure this isn't an uncacheable access + // A bit of a hackish way to get uncached accesses to work only if they're + // at the head of the LSQ and are ready to commit (at the head of the ROB + // too). + if (req->isUncacheable() && + (load_idx != loadHead || !load_inst->isAtCommit())) { + + executeStage->rescheduleMemInst(load_inst); + ++lsqRescheduledLoads; + + delete req; + + //Fix me!!! + return TheISA::genMachineCheckFault(); + } + + // Check the SQ for any previous stores that might lead to forwarding + + // This will handle EDGE-like forwarding scheme that + // forward datas both inter and intra blocks. + + int store_idx = storeHead; + + // Traverse the store queue starting from the queue head + // to find the first unpossible forward store idx, then + // check every possible store for forwarding in an descending + // order from the first unpossible store to store queue head. + while(store_idx != storeTail){ + + DynInstPtr store_candidate = storeQueue[store_idx].inst; + + // If the store inst in the same block with a greater LSID, no + // forward exists, break out the loop as well. + if((store_candidate->getBlockID() == load_inst->getBlockID())&& + (store_candidate->threadNumber == load_inst->threadNumber )&& + (store_candidate->staticInst ->getLSID() > + load_inst->staticInst->getLSID())){ + break; + } + + // If store idx hits the younger inst block, no possible forward + // exists, break out the loop. + if((store_candidate->getBlockID() > load_inst->getBlockID())&& + (store_candidate->threadNumber == load_inst->threadNumber )) { + break; + } + + // Any older inst block could have possiblly forwarded stores, + // go on traversing. + incrStIdx(store_idx); + } + + // Check for the validation of store idx + if(store_idx == storeHead) { + // No stores in SQ satisfy the forwarding condition, + // then set the store idx to NULL. + store_idx = -1; + } else { + // There're stores in SQ may possiblly satisfying the + // forwarding condition, then exlude the first unpossible + // one for later traversing from this idx to SQ head. + decrStIdx(store_idx); + } + + int store_size = 0; + + DPRINTF(EdgeLSQUnit, "Read called by load idx %i @%#x, while looking for " + "forwards from store idx %i in SQ.\n", + load_idx, req->getPaddr(), store_idx ); + + // @todo: There's no LLSC reqeust right now ... + if (req->isLLSC()) { + // Disable recording the result temporarily. Writing to misc + // regs normally updates the result, but this is not the + // desired behavior when handling store conditionals. + load_inst->recordResult = false; + TheISA::handleLockedRead(load_inst.get(), req); + load_inst->recordResult = true; + } + + bool all_forward = true; + bool partial_forward = false; + + //Check whether data can be forwarded from store for each byte + for(int offset = 0 ; offset < req ->getSize() ; ++offset){ + + int idx = store_idx; + + // Check for forwarding through a valid idx + while(idx != -1){ + + assert(storeQueue[idx].inst); + + assert((storeQueue[idx].inst->getBlockID() + < load_inst ->getBlockID())|| + ((storeQueue[idx].inst ->getBlockID() == + load_inst ->getBlockID())&& + (storeQueue[idx].inst ->staticInst ->getLSID() < + load_inst ->staticInst ->getLSID()))); + + store_size = storeQueue[idx].size; + + // Check store to guarantee validation of itself. + if(store_size == 0) { + // Size of 0 means invalid stores + if(idx == storeHead){ // If idx hits head of SQ, traversing end. + idx = -1; + }else{ // If idx still stands, go on traversing. + decrStIdx(idx); + } + continue; + } else if(storeQueue[idx].inst ->uncacheable()) { + // @todo: + // Uncacheable store can not forwarding as well, + // but I don't concern about handling uncacheable + // stores so far. + if(idx == storeHead){ + idx = -1; + }else{ + decrStIdx(idx); + } + continue; + } + + assert(storeQueue[idx].inst->effAddrValid); + + // Figure out the forwarding boudaries. + bool store_has_lower_limit = + (req->getVaddr() + offset) >= + storeQueue[idx].inst->effAddr; + bool store_has_upper_limit = + (req->getVaddr() + offset) < + (storeQueue[idx].inst->effAddr + store_size); + + if(store_has_lower_limit&&store_has_upper_limit) { + //This Byte can be forwarded. + //Set the forward flag and data of this byte. + int position = req ->getVaddr() + + offset - storeQueue[idx].inst ->effAddr; + char data = storeQueue[idx].data[position]; + load_inst ->setForward(offset , data); + partial_forward = true; + break; + } + + // This byte can not be forwarded, find next. + if(idx == storeHead){ // Hit SQ head, stop traversing. + idx = -1; + }else{ // Come on! + decrStIdx(idx); + } + + } + + // -1 means there's a byte can not be forwarded. + if(idx == -1){ + all_forward = false; + } + + } + + if ( partial_forward && !all_forward ){ + lsqPartialForwLoads ++; + } + + if(all_forward) { + //All load data can be forwarded from stores. + + char forwardedData[Impl::MaxByteNum]; + for(int i = 0; i < req ->getSize(); ++i){ + forwardedData[i] = load_inst ->getForwardedData(i); + } + + // Copy data to read buffer. + memcpy(&data, forwardedData, sizeof(T)); + assert(!load_inst->memData); + load_inst->memData = new uint8_t[64]; + memcpy(load_inst ->memData, forwardedData, req->getSize()); + + DPRINTF(EdgeLSQUnit, "Forwarding from store to load to " + "addr %#x, data %#x\n", + req->getVaddr(), data); + + PacketPtr data_pkt = new Packet(req, MemCmd::ReadReq, + Packet::Broadcast); + data_pkt->dataStatic(load_inst->memData); + + WritebackEvent *wb = new WritebackEvent(load_inst, data_pkt, this); + + // We'll say this has a 1 cycle load-store forwarding latency + // for now. + // @todo: Need to make this a parameter. + cpu->schedule(wb, curTick); + ++lsqAllForwLoads; + + return NoFault; + } + + // If there's no forwarding case, then go accessing memory + DPRINTF(EdgeLSQUnit, "Doing memory access for inst [sn:%lli] PC %#x\n", + load_inst->seqNum, load_inst->readPC()); + + assert(!load_inst->memData); + load_inst->memData = new uint8_t[64]; + + ++usedPorts; + + // if we the cache is not blocked, do cache access + if (!lsq->cacheBlocked()) { + PacketPtr data_pkt = + new Packet(req, + (req->isLLSC() ? + MemCmd::LoadLockedReq : MemCmd::ReadReq), + Packet::Broadcast); + data_pkt->dataStatic(load_inst->memData); + + LSQSenderState *state = new LSQSenderState; + state->isLoad = true; + state->idx = load_idx; + state->inst = load_inst; + data_pkt->senderState = state; + + if (!dcachePort->sendTiming(data_pkt)) { + // Delete state and data packet because a load retry + // initiates a pipeline restart; it does not retry. + delete state; + delete data_pkt->req; + delete data_pkt; + + req = NULL; + + // If the access didn't succeed, tell the LSQ by setting + // the retry thread id. + lsq->setRetryTid(lsqID); + } + } + + // If the cache was blocked, or has become blocked due to the access, + // handle it. + if (lsq->cacheBlocked()) { + if (req) + delete req; + + ++lsqCacheBlocked; + + //executeStage->decrWb(load_inst->seqNum); + // There's an older load that's already going to squash. + if (isLoadBlocked && blockedLoadSeqNum < load_inst->seqNum) + return NoFault; + + // Record that the load was blocked due to memory. This + // load will squash all instructions after it, be + // refetched, and re-executed. + isLoadBlocked = true; + loadBlockedHandled = false; + blockedLoadSeqNum = load_inst->seqNum; + + // No fault occurred, even though the interface is blocked. + return NoFault; + } + + return NoFault; +} + +template +template +Fault +LSQUnit::write(Request *req, T &data, int store_idx) +{ + assert(storeQueue[store_idx].inst); + + DPRINTF(EdgeLSQUnit, "Doing write to store idx %i, physical addr %#x virtual addr %#x data %#x" + " | storeHead:%i [sn:%i]\n", + store_idx, req->getPaddr(), req->getVaddr(), data, storeHead, + storeQueue[store_idx].inst->seqNum); + + storeQueue[store_idx].req = req; + storeQueue[store_idx].size = sizeof(T); + assert(sizeof(T) <= sizeof(storeQueue[store_idx].data)); + + T gData = htog(data); + memcpy(storeQueue[store_idx].data, &gData, sizeof(T)); + + // This function only writes the data to the store queue, so no fault + // can happen here. + return NoFault; +} + +#endif // __CPU_EDGE_LSQ_UNIT_HH__ diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/lsq_unit.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/lsq_unit.cc Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#include "cpu/edge/isa_specific.hh" +#include "cpu/edge/lsq_unit_impl.hh" + +// Force the instantiation of LDSTQ for all the implementations we care about. +template class LSQUnit; + diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/lsq_unit_impl.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/lsq_unit_impl.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,1191 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#include "arch/locked_mem.hh" +#include "config/the_isa.hh" +#include "config/use_checker.hh" +#include "cpu/edge/lsq.hh" +#include "cpu/edge/lsq_unit.hh" +#include "base/str.hh" +#include "mem/packet.hh" +#include "mem/request.hh" + +#if USE_CHECKER +#include "cpu/checker/cpu.hh" +#endif + +template +LSQUnit::WritebackEvent::WritebackEvent(DynInstPtr &_inst, PacketPtr _pkt, + LSQUnit *lsq_ptr) + : inst(_inst), pkt(_pkt), lsqPtr(lsq_ptr) +{ + this->setFlags(Event::AutoDelete); +} + +template +void +LSQUnit::WritebackEvent::process() +{ + if (!lsqPtr->isSwitchedOut()) { + lsqPtr->writeback(inst, pkt); + } + + if (pkt->senderState) + delete pkt->senderState; + + delete pkt->req; + delete pkt; +} + +template +const char * +LSQUnit::WritebackEvent::description() const +{ + return "Store writeback"; +} + +template +void +LSQUnit::completeDataAccess(PacketPtr pkt) +{ + LSQSenderState *state = dynamic_cast(pkt->senderState); + DynInstPtr inst = state->inst; + + DPRINTF(EdgeExe, "Writeback event [Bid:%lli][Iid:%lli][LSID:%i]\n", + inst->getBlockID(), inst->getInstID(), inst->staticInst->getLSID()); + DPRINTF(Activity, "Activity: Writeback event [sn:%lli]\n", inst->seqNum); + + assert(!pkt->wasNacked()); + + if (isSwitchedOut() || inst->isSquashed() || + (inst->isLoad() && inst->isBlockCompleted()) ) { + DPRINTF(EdgeExe, "Switch out or squashed or block " + "completed inst encounted\n"); + }else { + if (!state->noWB) { + + if(!inst ->isForward()) { + // No forward case, just writeback load results. + DPRINTF(EdgeLSQUnit, "Load has no forward.\n"); + writeback(inst, pkt); + } else { + // Forwarding case, handle it. + DPRINTF(EdgeLSQUnit, "Load has forward.\n"); + uint8_t p[8]; + pkt ->writeData(&(p[0])); + + // Check which byte has forwarding data. + for(int i = 0 ; i < Impl::MaxByteNum ; ++i){ + if(inst ->isForward(i)){ + p[i] = inst ->getForwardedData(i); + DPRINTF(EdgeLSQUnit, "Load has forward. Forward data[%i]:%x\n", + i, p[i]); + } + } + + // Write back data. + pkt ->setData(&(p[0])); + writeback(inst, pkt); + } + + } + + // Handling store inst. + if (inst->isStore()) { + completeStore(state->idx); + } + + } + + delete state; + delete pkt->req; + delete pkt; +} + +template +LSQUnit::LSQUnit() + : loads(0), stores(0), storesToWB(0), stalled(false), + isStoreBlocked(false), isLoadBlocked(false), + loadBlockedHandled(false) +{ +} + +template +void +LSQUnit::init(CPU *cpu_ptr, Execute *execute_ptr, SimpleEdgeCPUParams *params, + LSQ *lsq_ptr, unsigned maxLQEntries, unsigned maxSQEntries, + unsigned id) +{ + cpu = cpu_ptr; + executeStage = execute_ptr; + + DPRINTF(EdgeLSQUnit, "Creating LSQUnit%i object.\n",id); + + switchedOut = false; + + lsq = lsq_ptr; + + lsqID = id; + + // Add 1 for the sentinel entry (they are circular queues). + LQEntries = maxLQEntries + 1; + SQEntries = maxSQEntries + 1; + + loadQueue.resize(LQEntries); + storeQueue.resize(SQEntries); + + loadHead = loadTail = 0; + + storeHead = storeWBIdx = storeTail = 0; + + usedPorts = 0; + cachePorts = params->cachePorts; + + retryPkt = NULL; + memDepViolator = NULL; + + blockedLoadSeqNum = 0; +} + +template +std::string +LSQUnit::name() const +{ + if (Impl::MaxThreads == 1) { + return executeStage->name() + ".lsq"; + } else { + return executeStage->name() + ".lsq.thread." + to_string(lsqID); + } +} + +template +void +LSQUnit::regStats() +{ + lsqAllForwLoads + .name(name() + ".allForwLoads") + .desc("Number of loads that had all data forwarded from stores"); + + lsqPartialForwLoads + .name(name() + ".partiallForwLoads") + .desc("Number of loads that had partial data forwarded from stores"); + + lsqSquashedLoads + .name(name() + ".squashedLoads") + .desc("Number of loads squashed"); + + lsqIgnoredResponses + .name(name() + ".ignoredResponses") + .desc("Number of memory responses ignored because the instruction is squashed"); + + lsqSquashedStores + .name(name() + ".squashedStores") + .desc("Number of stores squashed"); + + lsqRescheduledLoads + .name(name() + ".rescheduledLoads") + .desc("Number of loads that were rescheduled"); + + lsqCacheBlocked + .name(name() + ".cacheBlocked") + .desc("Number of times an access to memory failed due to the cache being blocked"); +} + +template +void +LSQUnit::setDcachePort(Port *dcache_port) +{ + dcachePort = dcache_port; + +#if USE_CHECKER + if (cpu->checker) { + cpu->checker->setDcachePort(dcachePort); + } +#endif +} + +template +void +LSQUnit::clearLQ() +{ + loadQueue.clear(); +} + +template +void +LSQUnit::clearSQ() +{ + storeQueue.clear(); +} + +template +void +LSQUnit::switchOut() +{ + switchedOut = true; + for (int i = 0; i < loadQueue.size(); ++i) { + assert(!loadQueue[i]); + loadQueue[i] = NULL; + } + + assert(storesToWB == 0); +} + +template +void +LSQUnit::takeOverFrom() +{ + switchedOut = false; + loads = stores = storesToWB = 0; + + loadHead = loadTail = 0; + + storeHead = storeWBIdx = storeTail = 0; + + usedPorts = 0; + + memDepViolator = NULL; + + blockedLoadSeqNum = 0; + + stalled = false; + isLoadBlocked = false; + loadBlockedHandled = false; +} + +template +void +LSQUnit::resizeLQ(unsigned size) +{ + unsigned size_plus_sentinel = size + 1; + assert(size_plus_sentinel >= LQEntries); + + if (size_plus_sentinel > LQEntries) { + while (size_plus_sentinel > loadQueue.size()) { + DynInstPtr dummy; + loadQueue.push_back(dummy); + LQEntries++; + } + } else { + LQEntries = size_plus_sentinel; + } + +} + +template +void +LSQUnit::resizeSQ(unsigned size) +{ + unsigned size_plus_sentinel = size + 1; + if (size_plus_sentinel > SQEntries) { + while (size_plus_sentinel > storeQueue.size()) { + SQEntry dummy; + storeQueue.push_back(dummy); + SQEntries++; + } + } else { + SQEntries = size_plus_sentinel; + } +} + +template +void +LSQUnit::insert(DynInstPtr &inst) +{ + assert(inst->isMemRef()); + + assert(inst->isLoad() || inst->isStore()); + + if (inst->isLoad()) { + insertLoad(inst); + } else { + insertStore(inst); + } + + inst->setInLSQ(); +} + +template +void +LSQUnit::insertLoad(DynInstPtr &load_inst) +{ + assert((loadTail + 1) % LQEntries != loadHead); + assert(loads < LQEntries); + + DPRINTF(EdgeLSQUnit, "Inserting load PC %#x, idx:%i [Bid:%lli][Iid:%lli][LSID:%i]\n", + load_inst->readPC(), loadTail, + load_inst->getBlockID(), + load_inst->getInstID(), + load_inst->staticInst->getLSID()); + + load_inst->lqIdx = loadTail; + + loadQueue[loadTail] = load_inst; + + incrLdIdx(loadTail); + + ++loads; +} + +template +void +LSQUnit::insertStore(DynInstPtr &store_inst) +{ + // Make sure it is not full before inserting an instruction. + assert((storeTail + 1) % SQEntries != storeHead); + assert(stores < SQEntries); + + DPRINTF(EdgeLSQUnit, "Inserting store PC %#x, idx:%i [Bid:%lli][Iid:%lli][LSID:%i]\n", + store_inst->readPC(), storeTail, + store_inst->getBlockID(), + store_inst->getInstID(), + store_inst->staticInst->getLSID() ); + + store_inst->sqIdx = storeTail; + + storeQueue[storeTail] = SQEntry(store_inst); + + incrStIdx(storeTail); + + ++stores; +} + +template +typename Impl::DynInstPtr +LSQUnit::getMemDepViolator() +{ + DynInstPtr temp = memDepViolator; + + memDepViolator = NULL; + + return temp; +} + +template +unsigned +LSQUnit::numFreeEntries() +{ + unsigned free_lq_entries = LQEntries - loads; + unsigned free_sq_entries = SQEntries - stores; + + // Both the LQ and SQ entries have an extra dummy entry to differentiate + // empty/full conditions. Subtract 1 from the free entries. + if (free_lq_entries < free_sq_entries) { + return free_lq_entries - 1; + } else { + return free_sq_entries - 1; + } +} + +template +int +LSQUnit::numLoadsReady() +{ + int load_idx = loadHead; + int retval = 0; + + while (load_idx != loadTail) { + assert(loadQueue[load_idx]); + + if (loadQueue[load_idx]->readyToIssue()) { + ++retval; + } + } + + return retval; +} + +template +Fault +LSQUnit::executeLoad(DynInstPtr &inst) +{ + using namespace TheISA; + // Execute a specific load. + Fault load_fault = NoFault; + + DPRINTF(EdgeLSQUnit, "Executing load PC %#x, [sn:%lli]\n", + inst->readPC(),inst->seqNum); + + assert(!inst->isSquashed() && !inst->isNullified() ); + + load_fault = inst->initiateAcc(); + + // If the instruction faulted, then it will be handled in execute + // phase where the fault could be propagated through the whole + // block and finally handled when block committed. + if (load_fault != NoFault) { + DPRINTF(EdgeLSQUnit, "Load faulted.\n"); + executeStage->activityThisCycle(); + } else if (!loadBlocked()) { + assert(inst->effAddrValid); + int load_idx = inst->lqIdx; + incrLdIdx(load_idx); + //@todo: The following codes intend to handle the mem access violation + // cases however, edge model don't have that case right now. So leave it + // in useless. + #if 0 + while (load_idx != loadTail) { + // Really only need to check loads that have actually executed + + // @todo: For now this is extra conservative, detecting a + // violation if the addresses match assuming all accesses + // are quad word accesses. + if ((loadQueue[load_idx] ->getBlockID() > inst ->getBlockID())|| + ((loadQueue[load_idx] ->getBlockID() == inst ->getBlockID())&& + (loadQueue[load_idx] ->staticInst ->getLSID() > inst ->staticInst ->getLSID()))) { + // @todo: Fix this, magic number being used here + if (loadQueue[load_idx]->effAddrValid && + ((loadQueue[load_idx]->effAddr) == + (inst->effAddr))) { + // A load incorrectly passed this load. Squash and refetch. + // For now return a fault to show that it was unsuccessful. + DPRINTF(EdgeLSQUnit,"load-load violation occurs between inst[LSID:%d][EA:%#x]" + "and inst[LSID:%d][EA:%#x].\n", + loadQueue[load_idx] ->staticInst ->getLSID(), + loadQueue[load_idx]->effAddr, + inst ->staticInst ->getLSID(), + inst->effAddr); + DynInstPtr violator = loadQueue[load_idx]; + if (!memDepViolator || + (violator->seqNum < memDepViolator->seqNum)) { + memDepViolator = violator; + } else { + break; + } + + ++lsqMemOrderViolation; + + //Fix me!! + return new TripsFault(); + } + } + incrLdIdx(load_idx); + } + #endif + } + + return load_fault; +} + +template +Fault +LSQUnit::executeStore(DynInstPtr &store_inst) +{ + using namespace TheISA; + // Make sure that a store exists. + assert(stores != 0); + + int store_idx = store_inst->sqIdx; + + DPRINTF(EdgeLSQUnit, "Executing store PC %#x [sn:%lli]\n", + store_inst->readPC(), store_inst->seqNum); + + assert(!store_inst->isSquashed()); + + // Check the recently completed loads to see if any match this store's + // address. If so, then we have a memory ordering violation. + //int load_idx = store_inst->lqIdx; + + int load_idx = loadHead; + + Fault store_fault = store_inst->initiateAcc(); + + if (storeQueue[store_idx].size == 0) { + DPRINTF(EdgeLSQUnit,"Fault on Store PC %#x, [sn:%lli],Size = 0\n", + store_inst->readPC(),store_inst->seqNum); + + return store_fault; + } + + assert(store_fault == NoFault); + + if (store_inst->isStoreConditional()) { + // Store conditionals need to set themselves as able to + // writeback if we haven't had a fault by here. + storeQueue[store_idx].canWB = true; + + ++storesToWB; + } + + // Check for Load/Store violation + // Firstly, figure out the idx in LQ that could lead to + // a violation. + while(load_idx != loadTail){ + // Find the idx of load inst that can not executed before this + // store inst. In the same block, all the load inst having greater + // LSID than this store can not executed before this store. + if((loadQueue[load_idx]->getBlockID() == store_inst->getBlockID())&& + (loadQueue[load_idx]->staticInst ->getLSID() > + store_inst->staticInst ->getLSID())) { + break; + } + + // All the load inst in the younger block can not executed + // before this store. + if (loadQueue[load_idx]->getBlockID() > store_inst->getBlockID() ) { + break; + } + + incrLdIdx(load_idx); + } + + assert(store_inst->effAddrValid); + +#if 0 + // Check for Load/Store violation with respect to load_idx + // sovled before. + while (load_idx != loadTail){ + + assert((loadQueue[load_idx] ->getBlockID() > store_inst ->getBlockID()) + || ((loadQueue[load_idx] ->getBlockID() == store_inst ->getBlockID()) + &&(loadQueue[load_idx] ->staticInst ->getLSID() > + store_inst ->staticInst ->getLSID()))); + + // Really only need to check loads that have actually executed + // It's safe to check all loads because effAddr is set to + // InvalAddr when the dyn inst is created. + + // @todo: For now this is extra conservative, detecting a + // violation if the addresses match assuming all accesses + // are quad word accesses. + + // @todo: Fix this, magic number being used here + if (loadQueue[load_idx]->effAddrValid && + (loadQueue[load_idx]->effAddr >> 8) == + (store_inst->effAddr >> 8)) { + + DPRINTF(EdgeLSQUnit,"load-store violation occurs between inst[Bid:%lli][LSID:%d][EA:%#x]" + "and inst[Bid:%lli][LSID:%d][EA:%#x].\n", + loadQueue[load_idx]->getBlockID(), + loadQueue[load_idx] ->staticInst ->getLSID(), + loadQueue[load_idx]->effAddr, + store_inst->getBlockID(), + store_inst ->staticInst ->getLSID(), + store_inst->effAddr); + + // A load incorrectly passed this store. Squash and refetch. + // For now return a fault to show that it was unsuccessful. + DynInstPtr violator = loadQueue[load_idx]; + if (!memDepViolator || + (violator->seqNum < memDepViolator->seqNum)) { + memDepViolator = violator; + } else { + break; + } + + ++lsqMemOrderViolation; + + //Fix me!! + return new TripsFault(); + + } + + incrLdIdx(load_idx); + } +#endif + + return store_fault; +} + +template +void +LSQUnit::nullifyStore(DynInstPtr &inst) +{ + int store_idx = inst->sqIdx; + + DPRINTF( EdgeLSQUnit, "Nullify store with PC@%lli and LSID %i in store queue %i.\n", + inst->readPC(), inst->staticInst->getLSID(), store_idx); + + storeQueue[store_idx].nullified = true; +} + +template +void +LSQUnit::commitInstBlock(BlockID blockID) +{ + //@todo: This seems not used right now. + for(int idx=0 ; idxgetBlockID() <= blockID){ + loadQueue[idx] = NULL; + --loads; + } + } + for(int idx=0 ; idxgetBlockID() <= blockID){ + storeQueue[idx].canWB = true; + ++storesToWB; + } + } +} + +template +void +LSQUnit::commitLoad() +{ + assert(loadQueue[loadHead]); + + DPRINTF(EdgeLSQUnit, "Committing head load instruction, PC %#x\n", + loadQueue[loadHead]->readPC()); + + if ( !loadQueue[loadHead]->isExecuted() && + loadQueue[loadHead]->isBlockCompleted() ) { + + DPRINTF(EdgeLSQUnit, "Unexecuted load when commit," + " complete it in memDepUnit.\n"); + + executeStage->instQueue.completeMemInst(loadQueue[loadHead]); + } + + loadQueue[loadHead] = NULL; + + incrLdIdx(loadHead); + + --loads; +} + +template +void +LSQUnit::commitLoads(BlockID blockID) +{ + assert(loads == 0 || loadQueue[loadHead]); + + while (loads != 0 && loadQueue[loadHead]->getBlockID() <= blockID) { + commitLoad(); + } +} + +template +void +LSQUnit::commitStores(BlockID blockID) +{ + assert(stores == 0 || storeQueue[storeHead].inst); + + int store_idx = storeHead; + + while (store_idx != storeTail) { + assert(storeQueue[store_idx].inst); + // Mark any stores that are now committed and have not yet + // been marked as able to write back. + if (!storeQueue[store_idx].canWB) { + if ((storeQueue[store_idx].inst)->getBlockID() <= blockID) { + storeQueue[store_idx].canWB = true; + ++storesToWB; + } + } + incrStIdx(store_idx); + } +} + +template +void +LSQUnit::writebackStores() +{ + DPRINTF(EdgeLSQUnit,"store head=%d,store tail = %d, store wbidx=%d.\n", + storeHead,storeTail,storeWBIdx); + + while (storesToWB > 0 && + storeWBIdx != storeTail && + storeQueue[storeWBIdx].inst && + storeQueue[storeWBIdx].canWB && + usedPorts < cachePorts) { + + DPRINTF(EdgeLSQUnit,"%d stores to write back.\n",storesToWB); + + // Stores have been nullified, can not write back. + if ( storeQueue[storeWBIdx].nullified ) { + DPRINTF(EdgeLSQUnit, "Nullified store!\n"); + completeStore(storeWBIdx); + incrStIdx(storeWBIdx); + continue; + } + + if( !storeQueue[storeWBIdx].inst->isExecuted()&& + storeQueue[storeWBIdx].inst->isBlockCompleted()) { + + DPRINTF(EdgeLSQUnit, "Unexecuted store with block completed encounted." + "Complete it in memDepUnit. \n"); + + // This means the store hasn't executed because of + // predication ( multi stores have the same LSID ). + // We have to complete the unexecuted inst in + // memDepUnit here. + executeStage->instQueue.completeMemInst(storeQueue[storeWBIdx].inst); + completeStore(storeWBIdx); + incrStIdx(storeWBIdx); + continue; + + } + + if (isStoreBlocked || lsq->cacheBlocked()) { + DPRINTF(EdgeLSQUnit, "Unable to write back any more stores, cache" + " is blocked!\n"); + break; + } + + // Store didn't write any data so no need to write it back to + // memory. + if (storeQueue[storeWBIdx].size == 0) { + DPRINTF(EdgeLSQUnit, "Store has no data to write back.\n"); + completeStore(storeWBIdx); + incrStIdx(storeWBIdx); + continue; + } + + ++usedPorts; + + if (storeQueue[storeWBIdx].inst->isDataPrefetch()) { + incrStIdx(storeWBIdx); + + continue; + } + + assert(storeQueue[storeWBIdx].req); + assert(!storeQueue[storeWBIdx].committed); + + DynInstPtr inst = storeQueue[storeWBIdx].inst; + + Request *req = storeQueue[storeWBIdx].req; + storeQueue[storeWBIdx].committed = true; + + assert(!inst->memData); + inst->memData = new uint8_t[64]; + + memcpy(inst->memData, storeQueue[storeWBIdx].data, req->getSize()); + + MemCmd command = + req->isSwap() ? MemCmd::SwapReq : + (req->isLLSC() ? MemCmd::StoreCondReq : MemCmd::WriteReq); + PacketPtr data_pkt = new Packet(req, command, + Packet::Broadcast); + data_pkt->dataStatic(inst->memData); + + LSQSenderState *state = new LSQSenderState; + state->isLoad = false; + state->idx = storeWBIdx; + state->inst = inst; + data_pkt->senderState = state; + + DPRINTF(EdgeLSQWriteBack, "D-Cache: Writing back store idx:%i PC:%#x " + " %i bytes to physical Addr:%#x[Vaddr:%#x], data:%#x [sn:%lli]\n", + storeWBIdx, inst->readPC(), req->getSize(), + req->getPaddr(), req->getVaddr(), + TheISA::gtoh((*(uint64_t *)inst->memData)), + inst->seqNum); + + // @todo: Remove this SC hack once the memory system handles it. + if (inst->isStoreConditional()) { + // Disable recording the result temporarily. Writing to + // misc regs normally updates the result, but this is not + // the desired behavior when handling store conditionals. + inst->recordResult = false; + bool success = TheISA::handleLockedWrite(inst.get(), req); + inst->recordResult = true; + + if (!success) { + // Instantly complete this store. + DPRINTF(EdgeLSQUnit, "Store conditional [sn:%lli] failed. " + "Instantly completing it.\n", + inst->seqNum); + WritebackEvent *wb = new WritebackEvent(inst, data_pkt, this); + cpu->schedule(wb, curTick + 1); + completeStore(storeWBIdx); + incrStIdx(storeWBIdx); + continue; + } + } else { + // Non-store conditionals do not need a writeback. + state->noWB = true; + } + + if (!dcachePort->sendTiming(data_pkt)) { + // Need to handle becoming blocked on a store. + DPRINTF(EdgeExe, "D-Cache became blocked when writing [sn:%lli], will" + "retry later\n", + inst->seqNum); + isStoreBlocked = true; + ++lsqCacheBlocked; + assert(retryPkt == NULL); + retryPkt = data_pkt; + lsq->setRetryTid(lsqID); + } else { + DPRINTF(EdgeLSQUnit,"Store send pkt.\n"); + storePostSend(data_pkt); + } + } + + // Not sure this should set it to 0. + usedPorts = 0; + + assert(stores >= 0 && storesToWB >= 0); +} + +/*template +void +LSQUnit::removeMSHR(InstSeqNum seqNum) +{ + list::iterator mshr_it = find(mshrSeqNums.begin(), + mshrSeqNums.end(), + seqNum); + + if (mshr_it != mshrSeqNums.end()) { + mshrSeqNums.erase(mshr_it); + DPRINTF(LSQUnit, "Removing MSHR. count = %i\n",mshrSeqNums.size()); + } +}*/ + +template +void +LSQUnit::squash(const TheISA::BlockID &squashed_num) +{ + DPRINTF(EdgeLSQUnit, "Squashing until [Bid:%lli]!" + "(Loads:%i Stores:%i)\n", squashed_num, loads, stores); + + int load_idx = loadTail; + decrLdIdx(load_idx); + + while (loads != 0 && loadQueue[load_idx]->getBlockID() > squashed_num) { + DPRINTF(EdgeLSQUnit,"Load Instruction PC %#x squashed, " + "[Bid:%lli][Iid:%lli]\n", + loadQueue[load_idx]->readPC(), + loadQueue[load_idx]->getBlockID(), + loadQueue[load_idx]->getInstID()); + + if (isStalled() && load_idx == stallingLoadIdx) { + stalled = false; + stallingStoreIsn = 0; + stallingLoadIdx = 0; + } + + // Clear the smart pointer to make sure it is decremented. + loadQueue[load_idx]->setSquashed(); + loadQueue[load_idx] = NULL; + --loads; + + // Inefficient! + loadTail = load_idx; + + decrLdIdx(load_idx); + ++lsqSquashedLoads; + } + + if (isLoadBlocked) { + if (squashed_num < blockedLoadSeqNum) { + isLoadBlocked = false; + loadBlockedHandled = false; + blockedLoadSeqNum = 0; + } + } + + if (memDepViolator && squashed_num < memDepViolator->seqNum) { + memDepViolator = NULL; + } + + int store_idx = storeTail; + decrStIdx(store_idx); + + while (stores != 0 && + storeQueue[store_idx].inst->getBlockID() > squashed_num) { + // Instructions marked as can WB are already committed. + if (storeQueue[store_idx].canWB) { + break; + } + + DPRINTF(EdgeLSQUnit,"Store Instruction PC %#x squashed, " + "idx:%i [Bid:%lli][Iid:%lli]\n", + storeQueue[store_idx].inst->readPC(), + store_idx, storeQueue[store_idx].inst->getBlockID(), + storeQueue[store_idx].inst->getInstID() ); + + // I don't think this can happen. It should have been cleared + // by the stalling load. + if (isStalled() && + storeQueue[store_idx].inst->seqNum == stallingStoreIsn) { + panic("Is stalled should have been cleared by stalling load!\n"); + stalled = false; + stallingStoreIsn = 0; + } + + // Clear the smart pointer to make sure it is decremented. + storeQueue[store_idx].inst->setSquashed(); + storeQueue[store_idx].inst = NULL; + storeQueue[store_idx].canWB = 0; + + // Must delete request now that it wasn't handed off to + // memory. This is quite ugly. @todo: Figure out the proper + // place to really handle request deletes. + delete storeQueue[store_idx].req; + + storeQueue[store_idx].req = NULL; + --stores; + + // Inefficient! + storeTail = store_idx; + + decrStIdx(store_idx); + ++lsqSquashedStores; + } +} + +template +void +LSQUnit::storePostSend(PacketPtr pkt) +{ + if (isStalled() && + storeQueue[storeWBIdx].inst->seqNum == stallingStoreIsn) { + DPRINTF(EdgeLSQUnit, "Unstalling, stalling store [sn:%lli] " + "load idx:%i\n", + stallingStoreIsn, stallingLoadIdx); + stalled = false; + stallingStoreIsn = 0; + executeStage->replayMemInst(loadQueue[stallingLoadIdx]); + } + + if (!storeQueue[storeWBIdx].inst->isStoreConditional()) { + // The store is basically completed at this time. This + // only works so long as the checker doesn't try to + // verify the value in memory for stores. + storeQueue[storeWBIdx].inst->setCompleted(); +#if USE_CHECKER + if (cpu->checker) { + cpu->checker->verify(storeQueue[storeWBIdx].inst); + } +#endif + } + + incrStIdx(storeWBIdx); +} + +template +void +LSQUnit::writeback(DynInstPtr &inst, PacketPtr pkt) +{ + executeStage->wakeCPU(); + + assert(inst->isMemRef()); + assert(inst->isLoad()); + + // Squashed instructions do not need to complete their access. + if (inst->isSquashed() || inst->isBlockCompleted() ) { + DPRINTF(EdgeLSQUnit, "Squashed or block completed " + "load[Bid:%lli][Iid:%lli][LSID:%i] encounterd.\n", + + inst->getBlockID(), inst->getInstID(), inst->staticInst->getLSID() ); + assert(!inst->isStore()); + ++lsqIgnoredResponses; + return; + } + + if (!inst->isExecuted()) { + inst->setExecuted(); + + assert(inst->isMemRef()); + DPRINTF(EdgeLSQWriteBack,"Load from %i bytes p:%#x[v:%#x] with data %#x\n", + pkt->getSize(), + pkt->req->getPaddr(), + pkt->req->getVaddr(), + TheISA::gtoh(*(uint64_t*)pkt->getPtr())); + + // Complete access to copy data to proper place. + inst->completeAcc(pkt); + } + + // Load write-back, wake up dependents. + executeStage->instQueue.wakeDependents(inst); + + executeStage->activityThisCycle(); +} + +template +void +LSQUnit::completeStore(int store_idx) +{ + assert(storeQueue[store_idx].inst); + storeQueue[store_idx].completed = true; + --storesToWB; + DPRINTF(EdgeLSQUnit,"Store[LSID:%i] is completed.\n", + storeQueue[store_idx].inst->staticInst->getLSID()); + + // A bit conservative because a store completion may not free up entries, + // but hopefully avoids two store completions in one cycle from making + // the CPU tick twice. + cpu->wakeCPU(); + cpu->activityThisCycle(); + +#if 0 + if (store_idx == storeHead) { + do { + + incrStIdx(storeHead); + + --stores; + } while (storeQueue[storeHead].completed && + storeHead != storeTail); + + executeStage->updateLSQNextCycle = true; + } +#endif + + DPRINTF(EdgeLSQUnit, "Completing store [sn:%lli], idx:%i, store head " + "idx:%i\n", + storeQueue[store_idx].inst->seqNum, store_idx, storeHead); + + if (isStalled() && + storeQueue[store_idx].inst->seqNum == stallingStoreIsn) { + DPRINTF(EdgeLSQUnit, "Unstalling, stalling store [sn:%lli] " + "load idx:%i\n", + stallingStoreIsn, stallingLoadIdx); + stalled = false; + stallingStoreIsn = 0; + executeStage->replayMemInst(loadQueue[stallingLoadIdx]); + } + + storeQueue[store_idx].inst->setCompleted(); + + if (store_idx == storeHead) { + do { + // Delete this reference. + storeQueue[storeHead].inst = NULL; + + incrStIdx(storeHead); + + --stores; + } while (storeQueue[storeHead].completed && + storeHead != storeTail); + + executeStage->updateLSQNextCycle = true; + } + // Delete this reference to store inst + //storeQueue[store_idx].inst = NULL; + + // Tell the checker we've completed this instruction. Some stores + // may get reported twice to the checker, but the checker can + // handle that case. +#if USE_CHECKER + if (cpu->checker) { + cpu->checker->verify(storeQueue[store_idx].inst); + } +#endif +} + +template +void +LSQUnit::recvRetry() +{ + if (isStoreBlocked) { + DPRINTF(EdgeLSQUnit, "Receiving retry: store blocked\n"); + assert(retryPkt != NULL); + + if (dcachePort->sendTiming(retryPkt)) { + storePostSend(retryPkt); + retryPkt = NULL; + isStoreBlocked = false; + lsq->setRetryTid(InvalidThreadID); + } else { + // Still blocked! + ++lsqCacheBlocked; + lsq->setRetryTid(lsqID); + } + } else if (isLoadBlocked) { + DPRINTF(EdgeLSQUnit, "Loads squash themselves and all younger insts, " + "no need to resend packet.\n"); + } else { + DPRINTF(EdgeLSQUnit, "Retry received but LSQ is no longer blocked.\n"); + } +} + +#if 0 +template +inline void +LSQUnit::incrStIdx(int &store_idx) +{ + if (++store_idx >= SQEntries) + store_idx = 0; +} + +template +inline void +LSQUnit::decrStIdx(int &store_idx) +{ + if (--store_idx < 0) + store_idx += SQEntries; +} + +template +inline void +LSQUnit::incrLdIdx(int &load_idx) +{ + if (++load_idx >= LQEntries) + load_idx = 0; +} + +template +inline void +LSQUnit::decrLdIdx(int &load_idx) +{ + if (--load_idx < 0) + load_idx += LQEntries; +} + +#endif + +template +void +LSQUnit::dumpInsts() +{ + cprintf("Load store queue: Dumping instructions.\n"); + cprintf("Load queue size: %i\n", loads); + cprintf("Load queue: "); + + int load_idx = loadHead; + + while (load_idx != loadTail && loadQueue[load_idx]) { + cprintf("%#x ", loadQueue[load_idx]->readPC()); + + incrLdIdx(load_idx); + } + + cprintf("Store queue size: %i\n", stores); + cprintf("Store queue: "); + + int store_idx = storeHead; + + while (store_idx != storeTail && storeQueue[store_idx].inst) { + cprintf("%#x ", storeQueue[store_idx].inst->readPC()); + + incrStIdx(store_idx); + } + + cprintf("\n"); +} diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/map.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/map.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,306 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#ifndef __CPU_EDGE_MAP_HH__ +#define __CPU_EDGE_MAP_HH__ + +#include + +#include "base/statistics.hh" +#include "base/timebuf.hh" + +class SimpleEdgeCPUParams; + +template +class SimpleEdgeMap +{ + private: + // Typedefs from the Impl. + typedef typename Impl::CPU CPU; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::EdgeBlockPtr BlockPtr; + + typedef typename Impl::CPUPol CPUPol; + + typedef TheISA::BlockID BlockID; + + // Typedefs from the CPU policy. + typedef typename CPUPol::Fetch2Map Fetch2Map; + typedef typename CPUPol::Map2Execute Map2Execute; + typedef typename CPUPol::TimeStruct TimeStruct; + + public: + /** Overall map stage status. Used to determine if the CPU can + * deschedule itself due to a lack of activity. + */ + enum MapStatus { + Active, + Inactive + }; + + /** Individual thread status. */ + enum ThreadStatus { + Running, + Idle, + StartSquash, + Squashing, + Blocked, + Unblocking + }; + + private: + /** Map status. */ + MapStatus _status; + + /** Per-thread status. */ + ThreadStatus mapStatus[Impl::MaxThreads]; + + public: + /** SimpleEdgeMap constructor. */ + SimpleEdgeMap(CPU *_cpu, SimpleEdgeCPUParams *params); + + /** Returns the name of map. */ + std::string name() const; + + /** Registers statistics. */ + void regStats(); + + /** Sets the main backwards communication time buffer pointer. */ + void setTimeBuffer(TimeBuffer *tb_ptr); + + /** Sets pointer to time buffer used to communicate to the next stage. */ + void setMapQueue(TimeBuffer *dq_ptr); + + /** Sets pointer to time buffer coming from fetch. */ + void setFetchQueue(TimeBuffer *fq_ptr); + + /** Sets pointer to list of active threads. */ + void setActiveThreads(std::list *at_ptr); + + /** Drains the map stage. */ + bool drain(); + + /** Resumes execution after a drain. */ + void resume() { } + + /** Switches out the map stage. */ + void switchOut() { } + + /** Takes over from another CPU's thread. */ + void takeOverFrom(); + + /** Ticks map, processing all input signals and decoding as many + * instructions as possible. + */ + void tick(); + + /** Determines what to do based on map's current status. + * @param status_change map() sets this variable if there was a status + * change (ie switching from from blocking to unblocking). + * @param tid Thread id to map instructions from. + */ + void map(bool &status_change, ThreadID tid); + + /** Processes instructions from fetch and passes them on to rename. + * Decoding of instructions actually happens when they are created in + * fetch, so this function mostly checks if PC-relative branches are + * correct. + */ + void mapInsts(ThreadID tid); + + private: + /** Inserts a thread's instructions into the skid buffer, to be mapd + * once map unblocks. + */ + void skidInsert(ThreadID tid); + + /** Returns if all of the skid buffers are empty. */ + bool skidsEmpty(); + + /** Updates overall map status based on all of the threads' statuses. */ + void updateStatus(); + + /** Separates instructions from fetch into individual lists of instructions + * sorted by thread. + */ + void sortInstBlocks(); + + /** Reads all stall signals from the backwards communication timebuffer. */ + void readStallSignals(ThreadID tid); + + /** Checks all input signals and updates map's status appropriately. */ + bool checkSignalsAndUpdate(ThreadID tid); + + /** Checks all stall signals, and returns if any are true. */ + bool checkStall(ThreadID tid) const; + + /** Returns if there any instructions from fetch on this cycle. */ + inline bool fetchInstsValid(); + + /** Switches map to blocking, and signals back that map has + * become blocked. + * @return Returns true if there is a status change. + */ + bool block(ThreadID tid); + + /** Switches map to unblocking if the skid buffer is empty, and + * signals back that map has unblocked. + * @return Returns true if there is a status change. + */ + bool unblock(ThreadID tid); + + /** Squashes if there is a PC-relative branch that was predicted + * incorrectly. Sends squash information back to fetch. + */ + //void squash(DynInstPtr &inst, ThreadID tid); + void squash(BlockPtr &inst_block, ThreadID tid); + + public: + /** Squashes due to commit signalling a squash. Changes status to + * squashing and clears block/unblock signals as needed. + */ + unsigned squash(ThreadID tid); + + private: + // Interfaces to objects outside of map. + /** CPU interface. */ + CPU *cpu; + + /** Time buffer interface. */ + TimeBuffer *timeBuffer; + + /** Wire to get execute's output from backwards time buffer. */ + typename TimeBuffer::wire fromExecute; + + /** Wire to get commit's information from backwards time buffer. */ + typename TimeBuffer::wire fromCommit; + + /** Wire to write information heading to previous stages. */ + // Might not be the best name as not only fetch will read it. + typename TimeBuffer::wire toFetch; + + /** Map instruction queue. */ + TimeBuffer *map2executeQueue; + + /** Wire used to write any information heading to rename. */ + typename TimeBuffer::wire toExecute; + + /** Fetch instruction queue interface. */ + TimeBuffer *fetch2mapQueue; + + /** Wire to get fetch's output from fetch queue. */ + typename TimeBuffer::wire fromFetch; + + /** Queue of all instructions coming from fetch this cycle. */ + + //std::queue insts[Impl::MaxThreads]; + std::queue instBlocks[Impl::MaxThreads]; + + /** Skid buffer between fetch and map. */ + //std::queue skidBuffer[Impl::MaxThreads]; + std::queue skidBuffer[Impl::MaxThreads]; + + /** Variable that tracks if map has written to the time buffer this + * cycle. Used to tell CPU if there is activity this cycle. + */ + bool wroteToTimeBuffer; + + /** Source of possible stalls. */ + struct Stalls { + bool execute; + bool commit; + }; + + /** Tracks which stages are telling map to stall. */ + Stalls stalls[Impl::MaxThreads]; + + /** Rename to map delay, in ticks. */ + unsigned executeToMapDelay; + + /** Commit to map delay, in ticks. */ + unsigned commitToMapDelay; + + /** Fetch to map delay, in ticks. */ + unsigned fetchToMapDelay; + + /** The width of map, in instructions. */ + unsigned mapWidth; + + /** Index of instructions being sent to execute. */ + unsigned toExecuteIndex; + + /** number of Active Threads*/ + ThreadID numThreads; + + /** List of active thread ids */ + std::list *activeThreads; + + /** Maximum size of the skid buffer. */ + unsigned skidBufferMax; + + #if 0 + /** Number of branches in flight. */ + unsigned branchCount[Impl::MaxThreads]; + + /** Tells when their is a pending delay slot inst. to send + * to rename. If there is, then wait squash after the next + * instruction (used for MIPS). + */ + bool squashAfterDelaySlot[Impl::MaxThreads]; + + /** Instruction used for squashing branch (used for MIPS)*/ + DynInstPtr squashInst[Impl::MaxThreads]; + + /** SeqNum of Squashing Branch Delay Instruction (used for MIPS)*/ + Addr bdelayDoneSeqNum[Impl::MaxThreads]; + #endif + + /** Stat for total number of idle cycles. */ + Stats::Scalar mapIdleCycles; + /** Stat for total number of blocked cycles. */ + Stats::Scalar mapBlockedCycles; + /** Stat for total number of normal running cycles. */ + Stats::Scalar mapRunCycles; + /** Stat for total number of unblocking cycles. */ + Stats::Scalar mapUnblockCycles; + /** Stat for total number of squashing cycles. */ + Stats::Scalar mapSquashCycles; + /** Stat for total number of mapd instructions. */ + Stats::Scalar mapMappedInstBlocks; + /** Stat for total number of squashed instructions. */ + Stats::Scalar mapSquashedInstBlocks; +}; + +#endif // __CPU_EDGE_MAP_HH__ diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/map.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/map.cc Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,39 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#include "cpu/edge/isa_specific.hh" +#include "cpu/edge/map_impl.hh" + +template class SimpleEdgeMap; diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/map_impl.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/map_impl.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,701 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#include "config/the_isa.hh" +#include "cpu/edge/map.hh" +#include "params/SimpleEdgeCPU.hh" + +using namespace std; + +template +SimpleEdgeMap::SimpleEdgeMap(CPU *_cpu, SimpleEdgeCPUParams *params) + : cpu(_cpu), + executeToMapDelay(params->executeToMapDelay), + commitToMapDelay(params->commitToMapDelay), + fetchToMapDelay(params->fetchToMapDelay), + mapWidth(params->mapWidth), + numThreads(params->numThreads) +{ + _status = Inactive; + + // Setup status, make sure stall signals are clear. + for (ThreadID tid = 0; tid < numThreads; ++tid) { + mapStatus[tid] = Idle; + + stalls[tid].execute = false; + stalls[tid].commit = false; + } + + // @todo: Make into a parameter + skidBufferMax = (fetchToMapDelay * params->fetchWidth) + mapWidth; +} + +template +std::string +SimpleEdgeMap::name() const +{ + return cpu->name() + ".map"; +} + +template +void +SimpleEdgeMap::regStats() +{ + mapIdleCycles + .name(name() + ".MAP:IdleCycles") + .desc("Number of cycles map is idle") + .prereq(mapIdleCycles); + + mapBlockedCycles + .name(name() + ".MAP:BlockedCycles") + .desc("Number of cycles map is blocked") + .prereq(mapBlockedCycles); + + mapRunCycles + .name(name() + ".MAP:RunCycles") + .desc("Number of cycles map is running") + .prereq(mapRunCycles); + + mapUnblockCycles + .name(name() + ".MAP:UnblockCycles") + .desc("Number of cycles map is unblocking") + .prereq(mapUnblockCycles); + + mapSquashCycles + .name(name() + ".MAP:SquashCycles") + .desc("Number of cycles map is squashing") + .prereq(mapSquashCycles); + + mapMappedInstBlocks + .name(name() + ".MAP:MapdInsts") + .desc("Number of instructions handled by map") + .prereq(mapMappedInstBlocks); + + mapSquashedInstBlocks + .name(name() + ".MAP:SquashedInsts") + .desc("Number of squashed instructions handled by map") + .prereq(mapSquashedInstBlocks); +} + +template +void +SimpleEdgeMap::setTimeBuffer(TimeBuffer *tb_ptr) +{ + timeBuffer = tb_ptr; + + // Setup wire to write information back to fetch. + toFetch = timeBuffer->getWire(0); + + // Create wires to get information from proper places in time buffer. + fromExecute = timeBuffer->getWire(-executeToMapDelay); + fromCommit = timeBuffer->getWire(-commitToMapDelay); +} + +template +void +SimpleEdgeMap::setMapQueue(TimeBuffer *dq_ptr) +{ + map2executeQueue = dq_ptr; + + // Setup wire to write information to proper place in map queue. + toExecute = map2executeQueue->getWire(0); +} + +template +void +SimpleEdgeMap::setFetchQueue(TimeBuffer *fq_ptr) +{ + fetch2mapQueue = fq_ptr; + + // Setup wire to read information from fetch queue. + fromFetch = fetch2mapQueue->getWire(-fetchToMapDelay); +} + +template +void +SimpleEdgeMap::setActiveThreads(std::list *at_ptr) +{ + activeThreads = at_ptr; +} + +template +bool +SimpleEdgeMap::drain() +{ + // Map is done draining at any time. + cpu->signalDrained(); + return true; +} + +template +void +SimpleEdgeMap::takeOverFrom() +{ + _status = Inactive; + + // Be sure to reset state and clear out any old instructions. + for (ThreadID tid = 0; tid < numThreads; ++tid) { + mapStatus[tid] = Idle; + + stalls[tid].execute = false; + stalls[tid].commit = false; + while (!instBlocks[tid].empty()) + instBlocks[tid].pop(); + while (!skidBuffer[tid].empty()) + skidBuffer[tid].pop(); + } + wroteToTimeBuffer = false; +} + +template +bool +SimpleEdgeMap::checkStall(ThreadID tid) const +{ + bool ret_val = false; + + if (stalls[tid].execute) { + DPRINTF(EdgeMap,"[tid:%i]: Stall fom Execute stage detected.\n", tid); + ret_val = true; + } else if (stalls[tid].commit) { + DPRINTF(EdgeMap,"[tid:%i]: Stall fom Commit stage detected.\n", tid); + ret_val = true; + } + + return ret_val; +} + +template +inline bool +SimpleEdgeMap::fetchInstsValid() +{ + return fromFetch->size > 0; +} + +template +bool +SimpleEdgeMap::block(ThreadID tid) +{ + DPRINTF(EdgeMap, "[tid:%u]: Blocking.\n", tid); + + // Add the current inputs to the skid buffer so they can be + // reprocessed when this stage unblocks. + skidInsert(tid); + + // If the map status is blocked or unblocking then map has not yet + // signalled fetch to unblock. In that case, there is no need to tell + // fetch to block. + if (mapStatus[tid] != Blocked) { + // Set the status to Blocked. + mapStatus[tid] = Blocked; + + if (mapStatus[tid] != Unblocking) { + toFetch->mapBlock[tid] = true; + wroteToTimeBuffer = true; + } + + return true; + } + + return false; +} + +template +bool +SimpleEdgeMap::unblock(ThreadID tid) +{ + // Map is done unblocking only if the skid buffer is empty. + if (skidBuffer[tid].empty()) { + DPRINTF(EdgeMap, "[tid:%u]: Done unblocking.\n", tid); + toFetch->mapUnblock[tid] = true; + wroteToTimeBuffer = true; + + mapStatus[tid] = Running; + return true; + } + + DPRINTF(EdgeMap, "[tid:%u]: Currently unblocking.\n", tid); + + return false; +} + +template +void +SimpleEdgeMap::squash(BlockPtr &inst_block, ThreadID tid) +{ + DPRINTF(EdgeMap, "[tid:%i]: [block:%i] Squashing due to incorrect branch prediction " + "detected at map.\n", tid, inst_block->getBlockID()); + + // Send back mispredict information. + toFetch->mapInfo[tid].branchMispredict = true; + toFetch->mapInfo[tid].predIncorrect = true; + toFetch->mapInfo[tid].squash = true; + toFetch->mapInfo[tid].doneSeqNum = inst_block->getBlockID(); + + toFetch->mapInfo[tid].nextPC = inst_block->getBranchTarget(); + toFetch->mapInfo[tid].branchTaken = + inst_block->getNextBlockPC() != inst_block->getBranchTarget(); + + BlockID squash_block_id = inst_block->getBlockID(); + + // Might have to tell fetch to unblock. + if (mapStatus[tid] == Blocked || + mapStatus[tid] == Unblocking) { + toFetch->mapUnblock[tid] = 1; + } + + // Set status to squashing. + mapStatus[tid] = Squashing; + + for (int i=0; isize; i++) { + if (fromFetch->instBlocks[i]->getTid() == tid && + fromFetch->instBlocks[i]->getBlockID() > squash_block_id) { + fromFetch->instBlocks[i]->setSquashed(); + } + } + + // Clear the instruction list and skid buffer in case they have any + // insts in them. + while (!instBlocks[tid].empty()) { + instBlocks[tid].pop(); + } + + while (!skidBuffer[tid].empty()) { + skidBuffer[tid].pop(); + } + + // Squash instructions up until this one + cpu->removeBlocksUntil(squash_block_id, tid); +} + +template +unsigned +SimpleEdgeMap::squash(ThreadID tid) +{ + DPRINTF(EdgeMap, "[tid:%i]: Squashing.\n",tid); + + if (mapStatus[tid] == Blocked || + mapStatus[tid] == Unblocking) { +#if !FULL_SYSTEM + // In syscall emulation, we can have both a block and a squash due + // to a syscall in the same cycle. This would cause both signals to + // be high. This shouldn't happen in full system. + // @todo: Determine if this still happens. + if (toFetch->mapBlock[tid]) { + toFetch->mapBlock[tid] = 0; + } else { + toFetch->mapUnblock[tid] = 1; + } +#else + toFetch->mapUnblock[tid] = 1; +#endif + } + + // Set status to squashing. + mapStatus[tid] = Squashing; + + // Go through incoming instructions from fetch and squash them. + unsigned squash_count = 0; + + for (int i=0; isize; i++) { + if (fromFetch->instBlocks[i]->getTid() == tid) { + fromFetch->instBlocks[i]->setSquashed(); + squash_count++; + } + } + + // Clear the instruction list and skid buffer in case they have any + // inst blocks in them. + while (!instBlocks[tid].empty()) { + + mapSquashedInstBlocks += instBlocks[tid].size(); + + instBlocks[tid].pop(); + } + + while (!skidBuffer[tid].empty()) { + + mapSquashedInstBlocks += skidBuffer[tid].size(); + + skidBuffer[tid].pop(); + } + + return squash_count; +} + +template +void +SimpleEdgeMap::skidInsert(ThreadID tid) +{ + BlockPtr inst_block = NULL; + + while (!instBlocks[tid].empty()) { + inst_block = instBlocks[tid].front(); + + instBlocks[tid].pop(); + + assert(tid == inst_block->getTid()); + + DPRINTF(EdgeMap,"Inserting Block[id:%lli] PC:%#x into map skidBuffer %i\n", + inst_block->getBlockID(), inst_block->getStartPC(), inst_block->getTid()); + + skidBuffer[tid].push(inst_block); + } + + // @todo: Eventually need to enforce this by not letting a thread + // fetch past its skidbuffer + assert(skidBuffer[tid].size() <= skidBufferMax); + +} + +template +bool +SimpleEdgeMap::skidsEmpty() +{ + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + if (!skidBuffer[tid].empty()) + return false; + } + + return true; +} + +template +void +SimpleEdgeMap::updateStatus() +{ + bool any_unblocking = false; + + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + if (mapStatus[tid] == Unblocking) { + any_unblocking = true; + break; + } + } + + // Map will have activity if it's unblocking. + if (any_unblocking) { + if (_status == Inactive) { + _status = Active; + + DPRINTF(Activity, "Activating stage.\n"); + + cpu->activateStage(CPU::MapIdx); + } + } else { + // If it's not unblocking, then map will not have any internal + // activity. Switch it to inactive. + if (_status == Active) { + _status = Inactive; + DPRINTF(Activity, "Deactivating stage.\n"); + + cpu->deactivateStage(CPU::MapIdx); + } + } +} + +template +void +SimpleEdgeMap::sortInstBlocks() +{ + int blocks_from_fetch = fromFetch->size; +#ifdef DEBUG + for (ThreadID tid = 0; tid < numThreads; tid++) + assert(instBlocks[tid].empty()); +#endif + for (int i = 0; i < blocks_from_fetch; ++i) { + instBlocks[fromFetch->instBlocks[i]->getTid()].push(fromFetch->instBlocks[i]); + } +} + +template +void +SimpleEdgeMap::readStallSignals(ThreadID tid) +{ + if (fromExecute->executeBlock[tid]) { + stalls[tid].execute = true; + } + + if (fromExecute->executeUnblock[tid]) { + assert(stalls[tid].execute); + stalls[tid].execute = false; + } + + if (fromCommit->commitBlock[tid]) { + stalls[tid].commit = true; + } + + if (fromCommit->commitUnblock[tid]) { + assert(stalls[tid].commit); + stalls[tid].commit = false; + } +} + +template +bool +SimpleEdgeMap::checkSignalsAndUpdate(ThreadID tid) +{ + // Check if there's a squash signal, squash if there is. + // Check stall signals, block if necessary. + // If status was blocked + // Check if stall conditions have passed + // if so then go to unblocking + // If status was Squashing + // check if squashing is not high. Switch to running this cycle. + + // Update the per thread stall statuses. + readStallSignals(tid); + + // Check squash signals from commit. + if (fromCommit->commitInfo[tid].squash) { + + DPRINTF(EdgeMap, "[tid:%u]: Squashing blocks due to squash " + "from commit.\n", tid); + + squash(tid); + + return true; + } + + // Check ROB squash signals from commit. + if (fromCommit->commitInfo[tid].robSquashing) { + DPRINTF(EdgeMap, "[tid:%u]: ROB is still squashing.\n", tid); + + // Continue to squash. + mapStatus[tid] = Squashing; + + return true; + } + + if (checkStall(tid)) { + return block(tid); + } + + if (mapStatus[tid] == Blocked) { + DPRINTF(EdgeMap, "[tid:%u]: Done blocking, switching to unblocking.\n", + tid); + + mapStatus[tid] = Unblocking; + + unblock(tid); + + return true; + } + + if (mapStatus[tid] == Squashing) { + // Switch status to running if map isn't being told to block or + // squash this cycle. + DPRINTF(EdgeMap, "[tid:%u]: Done squashing, switching to running.\n", + tid); + + mapStatus[tid] = Running; + + return false; + } + + // If we've reached this point, we have not gotten any signals that + // cause map to change its status. Map remains the same as before. + return false; +} + +template +void +SimpleEdgeMap::tick() +{ + wroteToTimeBuffer = false; + + bool status_change = false; + + toExecuteIndex = 0; + + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + sortInstBlocks(); + + //Check stall and squash signals. + while (threads != end) { + ThreadID tid = *threads++; + + DPRINTF(EdgeMap,"Processing [tid:%i]\n",tid); + status_change = checkSignalsAndUpdate(tid) || status_change; + + map(status_change, tid); + } + + if (status_change) { + updateStatus(); + } + + if (wroteToTimeBuffer) { + DPRINTF(Activity, "Activity this cycle.\n"); + + cpu->activityThisCycle(); + } +} + +template +void +SimpleEdgeMap::map(bool &status_change, ThreadID tid) +{ + // If status is Running or idle, + // call mapInsts() + // If status is Unblocking, + // buffer any instructions coming from fetch + // continue trying to empty skid buffer + // check if stall conditions have passed + + if (mapStatus[tid] == Blocked) { + ++mapBlockedCycles; + } else if (mapStatus[tid] == Squashing) { + ++mapSquashCycles; + } + + // Map should try to map as many instructions as its bandwidth + // will allow, as long as it is not currently blocked. + if (mapStatus[tid] == Running || + mapStatus[tid] == Idle) { + DPRINTF(EdgeMap, "[tid:%u]: Not blocked, so attempting to run " + "stage.\n",tid); + + mapInsts(tid); + } else if (mapStatus[tid] == Unblocking) { + // Make sure that the skid buffer has something in it if the + // status is unblocking. + assert(!skidsEmpty()); + + // If the status was unblocking, then instructions from the skid + // buffer were used. Remove those instructions and handle + // the rest of unblocking. + mapInsts(tid); + + if (fetchInstsValid()) { + // Add the current inputs to the skid buffer so they can be + // reprocessed when this stage unblocks. + skidInsert(tid); + } + + status_change = unblock(tid) || status_change; + } +} + +template +void +SimpleEdgeMap::mapInsts(ThreadID tid) +{ + // Instructions can come either from the skid buffer or the list of + // instructions coming from fetch, depending on map's status. + int instblocks_available = mapStatus[tid] == Unblocking ? + skidBuffer[tid].size() : instBlocks[tid].size(); + + if (instblocks_available == 0) { + DPRINTF(EdgeMap, "[tid:%u] Nothing to do, breaking out" + " early.\n",tid); + // Should I change the status to idle? + ++mapIdleCycles; + return; + } else if (mapStatus[tid] == Unblocking) { + DPRINTF(EdgeMap, "[tid:%u] Unblocking, removing insts from skid " + "buffer.\n",tid); + ++mapUnblockCycles; + } else if (mapStatus[tid] == Running) { + ++mapRunCycles; + } + + BlockPtr inst_block; + + std::queue + &instblocks_to_map = mapStatus[tid] == Unblocking ? + skidBuffer[tid] : instBlocks[tid]; + + DPRINTF(EdgeMap, "[tid:%u]: Sending instruction to execute.\n",tid); + + while (instblocks_available > 0 && toExecuteIndex < mapWidth) { + assert(!instblocks_to_map.empty()); + + inst_block = instblocks_to_map.front(); + + instblocks_to_map.pop(); + + DPRINTF(EdgeMap, "[tid:%u]: Processing block [id:%lli] at " + "PC %#x with %lli insts\n", + tid, inst_block->getBlockID(), inst_block->getStartPC(), inst_block->getNumInst()); + + if (inst_block->isSquashed()) { + DPRINTF(EdgeMap, "[tid:%u]: Instruction %i with PC %#x is " + "squashed, skipping.\n", + tid, inst_block->getBlockID(), inst_block->getStartPC()); + + ++mapSquashedInstBlocks; + + --instblocks_available; + + continue; + } + + // Map this block + inst_block->map(); + + toExecute->instBlocks[toExecuteIndex] = inst_block; + //cpu->addToBlockRemoveList(inst_block); + + ++(toExecute->size); + ++toExecuteIndex; + ++mapMappedInstBlocks; + --instblocks_available; + + } + + // If we didn't process all instructions, then we will need to block + // and put all those instructions into the skid buffer. + if (!instblocks_to_map.empty()) { + block(tid); + } + + // Record that map has written to the time buffer for activity + // tracking. + if (toExecuteIndex) { + wroteToTimeBuffer = true; + } +} + diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/mem_dep_unit.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/mem_dep_unit.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,301 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#ifndef __CPU_EDGE_MEM_DEP_UNIT_HH__ +#define __CPU_EDGE_MEM_DEP_UNIT_HH__ + +#include +#include +#include + +#include "base/hashmap.hh" +#include "base/refcnt.hh" +#include "base/statistics.hh" +#include "cpu/inst_seq.hh" + +#if 0 +struct SNHash { + size_t operator() (const InstSeqNum &seq_num) const { + unsigned a = (unsigned)seq_num; + unsigned hash = (((a >> 14) ^ ((a >> 2) & 0xffff))) & 0x7FFFFFFF; + + return hash; + } +}; +#endif + +class SimpleEdgeCPUParams; + +template +class InstructionQueue; + +/** + * Memory dependency unit class. This holds the memory dependence predictor. + * As memory operations are issued to the IQ, they are also issued to this + * unit, which then looks up the prediction as to what they are dependent + * upon. This unit must be checked prior to a memory operation being able + * to issue. Although this is templated, it's somewhat hard to make a generic + * memory dependence unit. This one is mostly for store sets; it will be + * quite limited in what other memory dependence predictions it can also + * utilize. Thus this class should be most likely be rewritten for other + * dependence prediction schemes. + * + * @todo: In EDGE model, memory dependence is maintianed in an extremely conservative + * way right now that all loads most wait util its prior stores executed. This is really + * inefficient to detailly model EDGE. Fix this! + */ +template +class MemDepUnit +{ + protected: + std::string _name; + + public: + typedef typename Impl::DynInstPtr DynInstPtr; + + /** Empty constructor. Must call init() prior to using in this case. */ + MemDepUnit(); + + /** Constructs a MemDepUnit with given parameters. */ + MemDepUnit(SimpleEdgeCPUParams *params); + + /** Frees up any memory allocated. */ + ~MemDepUnit(); + + /** Returns the name of the memory dependence unit. */ + std::string name() const { return _name; } + + /** Initializes the unit with parameters and a thread id. */ + void init(SimpleEdgeCPUParams *params, ThreadID tid); + + /** Registers statistics. */ + void regStats(); + + /** Switches out the memory dependence predictor. */ + void switchOut(); + + /** Takes over from another CPU's thread. */ + void takeOverFrom(); + + /** Sets the pointer to the IQ. */ + void setIQ(InstructionQueue *iq_ptr); + + /** Inserts a memory instruction. */ + void insert(DynInstPtr &inst); + + /** Inserts a non-speculative memory instruction. */ + void insertNonSpec(DynInstPtr &inst); + + /** Inserts a barrier instruction. */ + void insertBarrier(DynInstPtr &barr_inst); + + /** Indicate that an instruction has its registers ready. */ + void opsReady(DynInstPtr &inst); + + /** Indicate that a non-speculative instruction is ready. */ + void nonSpecInstReady(DynInstPtr &inst); + + /** Reschedules an instruction to be re-executed. */ + void reschedule(DynInstPtr &inst); + + /** Replays all instructions that have been rescheduled by moving them to + * the ready list. + */ + void replay(DynInstPtr &inst); + + /** Completes a memory instruction. */ + void completed(DynInstPtr &inst); + + /** Completes a barrier instruction. */ + void completeBarrier(DynInstPtr &inst); + + /** Wakes any dependents of a memory instruction. */ + void wakeDependents(DynInstPtr &inst); + + /** Squashes all instructions up until a given sequence number for a + * specific thread. + */ + void squash(const TheISA::BlockID &squashed_num, ThreadID tid); + + /** Indicates an ordering violation between a store and a younger load. */ + void violation(DynInstPtr &store_inst, DynInstPtr &violating_load); + + /** Issues the given instruction */ + void issue(DynInstPtr &inst); + + /** Debugging function to dump the lists of instructions. */ + void dumpLists(); + + /** Find the last store inst prior to a specific mem ref inst. + * This is a conservative way to guarantee mem dependence + * that every mem ref inst should have waited until the prior last + * store inst finished. + */ + int64_t findLastStore(DynInstPtr &inst, ThreadID tid); + + private: + typedef typename std::list::iterator ListIt; + + class MemDepEntry; + + typedef RefCountingPtr MemDepEntryPtr; + + /** Memory dependence entries that track memory operations, marking + * when the instruction is ready to execute and what instructions depend + * upon it. + */ + class MemDepEntry : public RefCounted { + public: + /** Constructs a memory dependence entry. */ + MemDepEntry(DynInstPtr &new_inst) + : inst(new_inst), opsReady(false), memDepReady(false), + completed(false), squashed(false), issued(false) + { +#ifdef DEBUG + ++memdep_count; + + DPRINTF(EdgeMemDepUnit, "Memory dependency entry created. " + "memdep_count=%i\n", memdep_count); +#endif + } + + /** Frees any pointers. */ + ~MemDepEntry() + { + for (int i = 0; i < dependInsts.size(); ++i) { + dependInsts[i] = NULL; + } + + // Bug of m5 for not adding this? + //inst = NULL; + +#ifdef DEBUG + --memdep_count; + + DPRINTF(EdgeMemDepUnit, "Memory dependency entry deleted. " + "memdep_count=%i\n", memdep_count); +#endif + } + + /** Returns the name of the memory dependence entry. */ + std::string name() const { return "memdepentry"; } + + /** The instruction being tracked. */ + DynInstPtr inst; + + /** The iterator to the instruction's location inside the list. */ + ListIt listIt; + + /** A vector of any dependent instructions. */ + std::vector dependInsts; + + /** If the operands are ready or not. */ + bool opsReady; + /** If all memory dependencies have been satisfied. */ + bool memDepReady; + /** If the instruction is completed. */ + bool completed; + /** If the instruction is squashed. */ + bool squashed; + /** If the instruction has been issued in mem dep unit. */ + bool issued; + + /** For debugging. */ +#ifdef DEBUG + static int memdep_count; + static int memdep_insert; + static int memdep_erase; +#endif + }; + + /** Finds the memory dependence entry in the hash map. */ + inline MemDepEntryPtr &findInHash(const DynInstPtr &inst); + + /** Moves an entry to the ready list. */ + inline void moveToReady(MemDepEntryPtr &ready_inst_entry); +#if 0 + typedef m5::hash_map MemDepHash; + + typedef typename MemDepHash::iterator MemDepHashIt; + + /** A hash map of all memory dependence entries. */ + MemDepHash memDepHash; +#endif + + typedef std::multimap MemDepHash; + + typedef typename MemDepHash::iterator MemDepHashIt; + + MemDepHash memDepHash; + + /** A list of all instructions in the memory dependence unit. */ + std::list instList[Impl::MaxThreads]; + + /** A list of all instructions that are going to be replayed. */ + std::list instsToReplay; + + /** The memory dependence predictor. It is accessed upon new + * instructions being added to the IQ, and responds by telling + * this unit what instruction the newly added instruction is dependent + * upon. + */ + MemDepPred depPred; + + /** Is there an outstanding load barrier that loads must wait on. */ + bool loadBarrier; + /** The sequence number of the load barrier. */ + InstSeqNum loadBarrierSN; + /** Is there an outstanding store barrier that loads must wait on. */ + bool storeBarrier; + /** The sequence number of the store barrier. */ + InstSeqNum storeBarrierSN; + + /** Pointer to the IQ. */ + InstructionQueue *iqPtr; + + /** The thread id of this memory dependence unit. */ + int id; + + /** Stat for number of inserted loads. */ + Stats::Scalar insertedLoads; + /** Stat for number of inserted stores. */ + Stats::Scalar insertedStores; + /** Stat for number of conflicting loads that had to wait for a store. */ + Stats::Scalar conflictingLoads; + /** Stat for number of conflicting stores that had to wait for a store. */ + Stats::Scalar conflictingStores; +}; + +#endif // __CPU_EDGE_MEM_DEP_UNIT_HH__ diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/mem_dep_unit.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/mem_dep_unit.cc Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,54 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#include "cpu/edge/isa_specific.hh" +#include "cpu/edge/store_set.hh" +#include "cpu/edge/mem_dep_unit_impl.hh" + +// Force instantation of memory dependency unit using store sets and +// O3CPUImpl. +template class MemDepUnit; + +#ifdef DEBUG +template <> +int +MemDepUnit::MemDepEntry::memdep_count = 0; +template <> +int +MemDepUnit::MemDepEntry::memdep_insert = 0; +template <> +int +MemDepUnit::MemDepEntry::memdep_erase = 0; +#endif diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/mem_dep_unit_impl.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/mem_dep_unit_impl.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,752 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#include + +#include "cpu/edge/inst_queue.hh" +#include "cpu/edge/mem_dep_unit.hh" + +#include "params/SimpleEdgeCPU.hh" + +template +MemDepUnit::MemDepUnit() + : loadBarrier(false), loadBarrierSN(0), storeBarrier(false), + storeBarrierSN(0), iqPtr(NULL) +{ +} + +template +MemDepUnit::MemDepUnit(SimpleEdgeCPUParams *params) + : _name(params->name + ".memdepunit"), + depPred(params->SSITSize, params->LFSTSize), loadBarrier(false), + loadBarrierSN(0), storeBarrier(false), storeBarrierSN(0), iqPtr(NULL) +{ + DPRINTF(EdgeMemDepUnit, "Creating MemDepUnit object.\n"); +} + +template +MemDepUnit::~MemDepUnit() +{ + memDepHash.clear(); + + instsToReplay.clear(); + + for (ThreadID tid = 0; tid < Impl::MaxThreads; tid++) { + instList[tid].clear(); + #if 0 + + ListIt inst_list_it = instList[tid].begin(); + + MemDepHashIt hash_it; + + while (!instList[tid].empty()) { + hash_it = memDepHash.find((*inst_list_it)->seqNum); + + assert(hash_it != memDepHash.end()); + + memDepHash.erase(hash_it); + + instList[tid].erase(inst_list_it++); + } + #endif + } + +#ifdef DEBUG + assert(MemDepEntry::memdep_count == 0); +#endif +} + +template +void +MemDepUnit::init(SimpleEdgeCPUParams *params, ThreadID tid) +{ + DPRINTF(EdgeMemDepUnit, "Creating MemDepUnit %i object.\n",tid); + + _name = csprintf("%s.memDep%d", params->name, tid); + id = tid; + + depPred.init(params->SSITSize, params->LFSTSize); +} + +template +void +MemDepUnit::regStats() +{ + insertedLoads + .name(name() + ".insertedLoads") + .desc("Number of loads inserted to the mem dependence unit."); + + insertedStores + .name(name() + ".insertedStores") + .desc("Number of stores inserted to the mem dependence unit."); + + conflictingLoads + .name(name() + ".conflictingLoads") + .desc("Number of conflicting loads."); + + conflictingStores + .name(name() + ".conflictingStores") + .desc("Number of conflicting stores."); +} + +template +void +MemDepUnit::switchOut() +{ + assert(instList[0].empty()); + assert(instsToReplay.empty()); + assert(memDepHash.empty()); + // Clear any state. + for (int i = 0; i < Impl::MaxThreads; ++i) { + instList[i].clear(); + } + instsToReplay.clear(); + memDepHash.clear(); +} + +template +void +MemDepUnit::takeOverFrom() +{ + // Be sure to reset all state. + loadBarrier = storeBarrier = false; + loadBarrierSN = storeBarrierSN = 0; + depPred.clear(); +} + +template +void +MemDepUnit::setIQ(InstructionQueue *iq_ptr) +{ + iqPtr = iq_ptr; +} + +template +void +MemDepUnit::insert(DynInstPtr &inst) +{ + ThreadID tid = inst->threadNumber; + + MemDepEntryPtr inst_entry = new MemDepEntry(inst); + + // Add the MemDepEntry to the hash. + memDepHash.insert( + std::pair( + inst->staticInst->getLSID() | (inst->getBlockID() << 5), + inst_entry)); + DPRINTF(EdgeMemDepUnit, "Adding key %x, PC @%#x, [Bid:%lli][LSID:%i]\n", + inst->staticInst->getLSID() | (inst->getBlockID() << 5), + inst->readPC(),inst->getBlockID(), inst->staticInst->getLSID() ); +#ifdef DEBUG + MemDepEntry::memdep_insert++; +#endif + + instList[tid].push_back(inst); + + inst_entry->listIt = --(instList[tid].end()); + + // Check any barriers and the dependence predictor for any + // producing memrefs/stores. + int64_t key; + if (inst->isLoad() && loadBarrier) { + panic("Not supportted inst load barrier.\n"); + } else if (inst->isStore() && storeBarrier) { + panic("Not supportted inst store barrier.\n"); + } else { + // For normal mem insts, last store in LSID + // sequence could be the producer. + // @todo: Memory predictor should be put here. + key = findLastStore(inst, tid); + } + + MemDepEntryPtr store_entry = NULL; + + MemDepHashIt pos; + if (key <0 ){ + DPRINTF(EdgeMemDepUnit, "No dependency for inst PC " + "%#x [Bid:%lli][Iid:%lli][LSID:%i].\n", inst->readPC(), + inst->getBlockID(), + inst->getInstID(), + inst->staticInst->getLSID()); + + inst_entry->memDepReady = true; + + if (inst->readyToIssue()) { + inst_entry->opsReady = true; + moveToReady(inst_entry); + } + }else { + DPRINTF(EdgeMemDepUnit, "Find a prior store, key = %#x.\n", key); + // If there is a producing store, try to find the entry. + for ( pos = memDepHash.lower_bound(key); + pos != memDepHash.upper_bound(key); + ++ pos ) { + DPRINTF(EdgeMemDepUnit, "Searching for producer.\n"); + + if (pos != memDepHash.end()) { + store_entry = (*pos).second; + DPRINTF(EdgeMemDepUnit, "Producer found.\n"); + } + + // We are supposed to find the store entry. + if(store_entry) { + // Otherwise make the instruction dependent on the store/barrier. + DPRINTF(EdgeMemDepUnit, "Adding to dependency list; " + "inst PC %#x is dependent on [Bid:%lli][key:%x].\n", + inst->readPC(), inst->getBlockID(), key); + + if (inst->readyToIssue()) { + inst_entry->opsReady = true; + } + + // Clear the bit saying this instruction can issue. + inst->clearCanIssue(); + + DPRINTF(EdgeMemDepUnit, "Size of dependInsts of this store entry is %i\n", + store_entry->dependInsts.size()); + // Add this instruction to the list of dependents. + store_entry->dependInsts.push_back(inst_entry); + + if (inst->isLoad()) { + ++conflictingLoads; + } else { + ++conflictingStores; + } + } else { + // Store_entry is supporsed to be existed. + assert(store_entry); + } + } + + if (pos == memDepHash.lower_bound(key)){ + // If no entry is found, means the entry has been executed + // and the mem dependence is solved. + DPRINTF(EdgeMemDepUnit,"Store executed, no dependence will be built for this inst.\n"); + if (inst_entry->opsReady && !inst_entry->squashed) { + moveToReady(inst_entry); + } else { + inst_entry->memDepReady = true; + } + } + + } + + if (inst->isStore()) { + DPRINTF(EdgeMemDepUnit, "Inserting store PC %#x [sn:%lli].\n", + inst->readPC(), inst->seqNum); + + depPred.insertStore(inst->readPC(), inst->seqNum, inst->threadNumber); + + ++insertedStores; + } else if (inst->isLoad()) { + ++insertedLoads; + } else { + panic("Unknown type! (most likely a barrier)."); + } +} + +template +void +MemDepUnit::insertNonSpec(DynInstPtr &inst) +{ + ThreadID tid = inst->threadNumber; + + MemDepEntryPtr inst_entry = new MemDepEntry(inst); + + // Insert the MemDepEntry into the hash. + memDepHash.insert( \ + std::pair( \ + inst->staticInst->getLSID() | (inst->getBlockID() << 5), \ + inst_entry)); +#ifdef DEBUG + MemDepEntry::memdep_insert++; +#endif + + // Add the instruction to the list. + instList[tid].push_back(inst); + + inst_entry->listIt = --(instList[tid].end()); + + // Might want to turn this part into an inline function or something. + // It's shared between both insert functions. + if (inst->isStore()) { + DPRINTF(EdgeMemDepUnit, "Inserting store PC %#x [sn:%lli].\n", + inst->readPC(), inst->seqNum); + + depPred.insertStore(inst->readPC(), inst->seqNum, inst->threadNumber); + + ++insertedStores; + } else if (inst->isLoad()) { + ++insertedLoads; + } else { + panic("Unknown type! (most likely a barrier)."); + } +} + +template +void +MemDepUnit::insertBarrier(DynInstPtr &barr_inst) +{ + InstSeqNum barr_sn = barr_inst->staticInst->getLSID() | (barr_inst->getBlockID() << 5); + // Memory barriers block loads and stores, write barriers only stores. + if (barr_inst->isMemBarrier()) { + loadBarrier = true; + loadBarrierSN = barr_sn; + storeBarrier = true; + storeBarrierSN = barr_sn; + DPRINTF(EdgeMemDepUnit, "Inserted a memory barrier\n"); + } else if (barr_inst->isWriteBarrier()) { + storeBarrier = true; + storeBarrierSN = barr_sn; + DPRINTF(EdgeMemDepUnit, "Inserted a write barrier\n"); + } + + ThreadID tid = barr_inst->threadNumber; + + MemDepEntryPtr inst_entry = new MemDepEntry(barr_inst); + + // Add the MemDepEntry to the hash. + memDepHash.insert( + std::pair(barr_sn, inst_entry)); +#ifdef DEBUG + MemDepEntry::memdep_insert++; +#endif + + // Add the instruction to the instruction list. + instList[tid].push_back(barr_inst); + + inst_entry->listIt = --(instList[tid].end()); +} + +template +void +MemDepUnit::opsReady(DynInstPtr &inst) +{ + DPRINTF(EdgeMemDepUnit, "Marking operands as ready for " + "instruction PC %#x [sn:%lli].\n", + inst->readPC(), inst->seqNum); + + MemDepEntryPtr inst_entry = findInHash(inst); + + inst_entry->opsReady = true; + + if (inst_entry->memDepReady) { + DPRINTF(EdgeMemDepUnit, "Instruction has its memory " + "dependencies resolved, adding it to the ready list.\n"); + + moveToReady(inst_entry); + } else { + DPRINTF(EdgeMemDepUnit, "Instruction still waiting on " + "memory dependency.\n"); + } +} + +template +void +MemDepUnit::nonSpecInstReady(DynInstPtr &inst) +{ + DPRINTF(EdgeMemDepUnit, "Marking non speculative " + "instruction PC %#x as ready [sn:%lli].\n", + inst->readPC(), inst->seqNum); + + MemDepEntryPtr inst_entry = findInHash(inst); + + moveToReady(inst_entry); +} + +template +void +MemDepUnit::reschedule(DynInstPtr &inst) +{ + instsToReplay.push_back(inst); +} + +template +void +MemDepUnit::replay(DynInstPtr &inst) +{ + DynInstPtr temp_inst; + + // For now this replay function replays all waiting memory ops. + while (!instsToReplay.empty()) { + temp_inst = instsToReplay.front(); + + MemDepEntryPtr inst_entry = findInHash(temp_inst); + + DPRINTF(EdgeMemDepUnit, "Replaying mem instruction PC %#x " + "[sn:%lli].\n", + temp_inst->readPC(), temp_inst->seqNum); + + moveToReady(inst_entry); + + instsToReplay.pop_front(); + } +} + +template +void +MemDepUnit::completed(DynInstPtr &inst) +{ + DPRINTF(EdgeMemDepUnit, "Completed mem instruction PC %#x " + "[sn:%lli].\n", + inst->readPC(), inst->seqNum); + + ThreadID tid = inst->threadNumber; + + MemDepHashIt pos; + + uint64_t key = inst->staticInst->getLSID() | (inst->getBlockID() << 5); + + for ( pos = memDepHash.lower_bound(key); + pos != memDepHash.upper_bound(key); + ++ pos ) { + + if ( (pos->second)->inst == inst ) { + + // Remove the instruction from the hash and the list. + + assert(pos != memDepHash.end()); + + + break; + } + } + + DPRINTF(EdgeMemDepUnit, "Deleting key:%x\n", key); + + assert(pos != memDepHash.end()); + + instList[tid].erase((*pos).second->listIt); + + (*pos).second = NULL; + + memDepHash.erase(pos); + #ifdef DEBUG + MemDepEntry::memdep_erase++; + #endif + + return; +} + +template +void +MemDepUnit::completeBarrier(DynInstPtr &inst) +{ + wakeDependents(inst); + completed(inst); + + InstSeqNum barr_sn = inst->seqNum; + + if (inst->isMemBarrier()) { + assert(loadBarrier && storeBarrier); + if (loadBarrierSN == barr_sn) + loadBarrier = false; + if (storeBarrierSN == barr_sn) + storeBarrier = false; + } else if (inst->isWriteBarrier()) { + assert(storeBarrier); + if (storeBarrierSN == barr_sn) + storeBarrier = false; + } +} + +template +void +MemDepUnit::wakeDependents(DynInstPtr &inst) +{ + // Only stores and barriers have dependents. + if (!inst->isStore() && !inst->isMemBarrier() && !inst->isWriteBarrier()) { + return; + } + + MemDepEntryPtr inst_entry = findInHash(inst); + + if(inst->isNullified()){ + DPRINTF(EdgeMemDepUnit,"Mem inst[Bid:%lli][Iid:%lli][LSID:%i] is nullified", + inst->getBlockID(), inst->getInstID(), + inst->staticInst->getLSID()); + } + + for (int i = 0; i < inst_entry->dependInsts.size(); ++i ) { + MemDepEntryPtr woken_inst = inst_entry->dependInsts[i]; + + if (!woken_inst->inst) { + // Potentially removed mem dep entries could be on this list + continue; + } + + if (woken_inst->issued){ + DPRINTF(EdgeMemDepUnit, "Dependent inst[Bid:%lli][LSID:%i] has been issued, " + "no need to wake it up again.\n", + woken_inst->inst->getBlockID(), + woken_inst->inst->staticInst->getLSID()); + continue; + } + + DPRINTF(EdgeMemDepUnit, "Waking up a dependent inst, " + "[Bid:%lli][Iid:%lli][LSID:%i].\n", + woken_inst->inst->getBlockID(), + woken_inst->inst->getInstID(), + woken_inst->inst->staticInst->getLSID() ); + + if (woken_inst->opsReady && !woken_inst->squashed) { + moveToReady(woken_inst); + } else { + woken_inst->memDepReady = true; + } + } + + inst_entry->dependInsts.clear(); +} + +template +void +MemDepUnit::squash(const TheISA::BlockID &squashed_num, + ThreadID tid) +{ +#if 0 + if (!instsToReplay.empty()) { + ListIt replay_it = instsToReplay.begin(); + while (replay_it != instsToReplay.end()) { + if ((*replay_it)->threadNumber == tid && + (*replay_it)->seqNum > squashed_num) { + instsToReplay.erase(replay_it++); + } else { + ++replay_it; + } + } + } +#endif + + ListIt squash_it = instList[tid].end(); + --squash_it; + + while (!instList[tid].empty() && + (*squash_it)->getBlockID() > squashed_num) { + + MemDepHashIt pos; + + uint64_t key = (*squash_it)->staticInst->getLSID() | + ((*squash_it)->getBlockID() << 5); + + DPRINTF(EdgeMemDepUnit, "Squashing inst [Bid:%lli][Iid:%lli][LSID:%x][key:%x]\n", + (*squash_it)->getBlockID(), + (*squash_it)->getInstID(), + (*squash_it)->staticInst->getLSID(), + key); + + for ( pos = memDepHash.lower_bound(key); + pos != memDepHash.upper_bound(key); + ++ pos ) { + + if ( (pos->second)->inst == (*squash_it) ) { + + assert(pos != memDepHash.end()); + + break; + } + } + + (*pos).second->squashed = true; + + (*pos).second = NULL; + +#ifdef DEBUG + MemDepEntry::memdep_erase++; +#endif + memDepHash.erase(pos); + instList[tid].erase(squash_it--); + } + + // Tell the dependency predictor to squash as well. + depPred.squash(squashed_num, tid); +} + +template +void +MemDepUnit::violation(DynInstPtr &store_inst, + DynInstPtr &violating_load) +{ + DPRINTF(EdgeMemDepUnit, "Passing violating PCs to store sets," + " load: %#x, store: %#x\n", violating_load->readPC(), + store_inst->readPC()); + // Tell the memory dependence unit of the violation. + depPred.violation(violating_load->readPC(), store_inst->readPC()); +} + +template +void +MemDepUnit::issue(DynInstPtr &inst) +{ + DPRINTF(EdgeMemDepUnit, "Issuing instruction PC %#x [sn:%lli].\n", + inst->readPC(), inst->seqNum); + + depPred.issued(inst->readPC(), inst->seqNum, inst->isStore()); +} + +template +inline typename MemDepUnit::MemDepEntryPtr & +MemDepUnit::findInHash(const DynInstPtr &inst) +{ + MemDepHashIt pos; + + uint64_t key = inst->staticInst->getLSID() | (inst->getBlockID() << 5); + + for ( pos = memDepHash.lower_bound(key); + pos != memDepHash.upper_bound(key); + ++ pos ) { + + assert(pos != memDepHash.end()); + + if ( ((pos->second)->inst) == inst ) { + + return pos->second; + } + + } + + // Should never reach this + return pos->second; + +} + +template +inline void +MemDepUnit::moveToReady(MemDepEntryPtr &woken_inst_entry) +{ + DPRINTF(EdgeMemDepUnit, "Adding instruction [sn:%lli] " + "to the ready list.\n", woken_inst_entry->inst->seqNum); + + assert(!woken_inst_entry->squashed); + + woken_inst_entry->issued = true; + + iqPtr->addReadyMemInst(woken_inst_entry->inst); +} + + +template +void +MemDepUnit::dumpLists() +{ + for (ThreadID tid = 0; tid < Impl::MaxThreads; tid++) { + cprintf("Instruction list %i size: %i\n", + tid, instList[tid].size()); + + ListIt inst_list_it = instList[tid].begin(); + int num = 0; + + while (inst_list_it != instList[tid].end()) { + cprintf("Instruction:%i\nPC:%#x\n[sn:%i]\n[tid:%i]\nIssued:%i\n" + "Squashed:%i\n\n", + num, (*inst_list_it)->readPC(), + (*inst_list_it)->seqNum, + (*inst_list_it)->threadNumber, + (*inst_list_it)->isIssued(), + (*inst_list_it)->isSquashed()); + inst_list_it++; + ++num; + } + } + + cprintf("Memory dependence hash size: %i\n", memDepHash.size()); + +#ifdef DEBUG + cprintf("Memory dependence entries: %i\n", MemDepEntry::memdep_count); +#endif +} + +template +int64_t +MemDepUnit::findLastStore(DynInstPtr &inst, ThreadID tid) +{ + int producing_store; + + // Find the last store in its own block + producing_store = inst->getLastStore(); + + if (producing_store >= 0 ) { + DPRINTF(EdgeMemDepUnit, "Find a prior store: [Bid:%lli][LSID:%i]!\n", + inst->getBlockID(), producing_store); + + //Return the key of this store + return (producing_store | (inst->getBlockID() << 5)); + } + + if (!inst->isBlockSpeculative()) { + DPRINTF(EdgeMemDepUnit, "Find no prior store!\n"); + + // This is the non-speculative block which means + // it's the head of RoB, so just return a negative + // flag indicating there's no prior store insts. + return -1; + } + + if (instList[tid].empty()) { + // If instList is empty, no need to lookup. + DPRINTF(EdgeMemDepUnit, "No mem ref in prior inst blocks, no dependence will be built.\n"); + // Hit this means no prior stores. + return -1; + } + + ListIt list_it = instList[tid].end(); + list_it--; + + do { + // Find one inst in last block by iterating. + // Inst List will have the youngest inst in tail and + // the block id may not be adjacent to each other, + // so we can just get the elder inst block instead of + // knowing the exact block id. As a result , + // to judge whether its an elder inst block or not, we + // should use the 'less' operator. + if ((*list_it)->getBlockID() < inst->getBlockID() ){ + DPRINTF(EdgeMemDepUnit, "Search last inst block[Bid:%lli] for dependence.\n", + (*list_it)->getBlockID()); + + if ((*list_it)->isStore()){ + return ((*list_it)->staticInst->getLSID() | + ((*list_it)->getBlockID() << 5)); + } + + return findLastStore(*list_it, tid); + } + }while ((list_it--) != instList[tid].begin()); + + DPRINTF(EdgeMemDepUnit, "No mem insts is elder.\n"); + // Hit this means no prior stores. + return -1; +} + diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/pred/2bit_local.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/pred/2bit_local.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#ifndef __CPU_EDGE_2BIT_LOCAL_PRED_HH__ +#define __CPU_EDGE_2BIT_LOCAL_PRED_HH__ + +#include + +#include "cpu/edge/sat_counter.hh" +#include "base/types.hh" + +/** + * Implements a local predictor that uses the PC to index into a table of + * counters. Note that any time a pointer to the bp_history is given, it + * should be NULL using this predictor because it does not have any branch + * predictor state that needs to be recorded or updated; the update can be + * determined solely by the branch being taken or not taken. + */ +class LocalBP +{ + public: + /** + * Default branch predictor constructor. + * @param localPredictorSize Size of the local predictor. + * @param localCtrBits Number of bits per counter. + * @param instShiftAmt Offset amount for instructions to ignore alignment. + */ + LocalBP(unsigned localPredictorSize, unsigned localCtrBits, + unsigned instShiftAmt); + + /** + * Looks up the given address in the branch predictor and returns + * a true/false value as to whether it is taken. + * @param branch_addr The address of the branch to look up. + * @param bp_history Pointer to any bp history state. + * @return Whether or not the branch is taken. + */ + bool lookup(Addr &branch_addr, void * &bp_history); + + /** + * Updates the branch predictor with the actual result of a branch. + * @param branch_addr The address of the branch to update. + * @param taken Whether or not the branch was taken. + */ + void update(Addr &branch_addr, bool taken, void *bp_history); + + void squash(void *bp_history) + { assert(bp_history == NULL); } + + void reset(); + + private: + /** + * Returns the taken/not taken prediction given the value of the + * counter. + * @param count The value of the counter. + * @return The prediction based on the counter value. + */ + inline bool getPrediction(uint8_t &count); + + /** Calculates the local index based on the PC. */ + inline unsigned getLocalIndex(Addr &PC); + + /** Array of counters that make up the local predictor. */ + std::vector localCtrs; + + /** Size of the local predictor. */ + unsigned localPredictorSize; + + /** Number of sets. */ + unsigned localPredictorSets; + + /** Number of bits of the local predictor's counters. */ + unsigned localCtrBits; + + /** Number of bits to shift the PC when calculating index. */ + unsigned instShiftAmt; + + /** Mask to get index bits. */ + unsigned indexMask; +}; + +#endif // __CPU_EDGE_2BIT_LOCAL_PRED_HH__ diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/pred/2bit_local.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/pred/2bit_local.cc Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#include "base/intmath.hh" +#include "base/misc.hh" +#include "base/trace.hh" +#include "cpu/edge/pred/2bit_local.hh" + +LocalBP::LocalBP(unsigned _localPredictorSize, + unsigned _localCtrBits, + unsigned _instShiftAmt) + : localPredictorSize(_localPredictorSize), + localCtrBits(_localCtrBits), + instShiftAmt(_instShiftAmt) +{ + if (!isPowerOf2(localPredictorSize)) { + fatal("Invalid local predictor size!\n"); + } + + localPredictorSets = localPredictorSize / localCtrBits; + + if (!isPowerOf2(localPredictorSets)) { + fatal("Invalid number of local predictor sets! Check localCtrBits.\n"); + } + + // Setup the index mask. + indexMask = localPredictorSets - 1; + + DPRINTF(EdgePredUnit, "Branch predictor: index mask: %#x\n", indexMask); + + // Setup the array of counters for the local predictor. + localCtrs.resize(localPredictorSets); + + for (unsigned i = 0; i < localPredictorSets; ++i) + localCtrs[i].setBits(_localCtrBits); + + DPRINTF(EdgePredUnit, "Branch predictor: local predictor size: %i\n", + localPredictorSize); + + DPRINTF(EdgePredUnit, "Branch predictor: local counter bits: %i\n", localCtrBits); + + DPRINTF(EdgePredUnit, "Branch predictor: instruction shift amount: %i\n", + instShiftAmt); +} + +void +LocalBP::reset() +{ + for (unsigned i = 0; i < localPredictorSets; ++i) { + localCtrs[i].reset(); + } +} + +bool +LocalBP::lookup(Addr &branch_addr, void * &bp_history) +{ + bool taken; + uint8_t counter_val; + unsigned local_predictor_idx = getLocalIndex(branch_addr); + + DPRINTF(EdgePredUnit, "Branch predictor: Looking up index %#x\n", + local_predictor_idx); + + counter_val = localCtrs[local_predictor_idx].read(); + + DPRINTF(EdgePredUnit, "Branch predictor: prediction is %i.\n", + (int)counter_val); + + taken = getPrediction(counter_val); + +#if 0 + // Speculative update. + if (taken) { + DPRINTF(EdgePredUnit, "Branch predictor: Branch updated as taken.\n"); + localCtrs[local_predictor_idx].increment(); + } else { + DPRINTF(EdgePredUnit, "Branch predictor: Branch updated as not taken.\n"); + localCtrs[local_predictor_idx].decrement(); + } +#endif + + return taken; +} + +void +LocalBP::update(Addr &branch_addr, bool taken, void *bp_history) +{ + assert(bp_history == NULL); + unsigned local_predictor_idx; + + // Update the local predictor. + local_predictor_idx = getLocalIndex(branch_addr); + + DPRINTF(EdgePredUnit, "Branch predictor: Looking up index %#x\n", + local_predictor_idx); + + if (taken) { + DPRINTF(EdgePredUnit, "Branch predictor: Branch updated as taken.\n"); + localCtrs[local_predictor_idx].increment(); + } else { + DPRINTF(EdgePredUnit, "Branch predictor: Branch updated as not taken.\n"); + localCtrs[local_predictor_idx].decrement(); + } +} + +inline +bool +LocalBP::getPrediction(uint8_t &count) +{ + // Get the MSB of the count + return (count >> (localCtrBits - 1)); +} + +inline +unsigned +LocalBP::getLocalIndex(Addr &branch_addr) +{ + return (branch_addr >> instShiftAmt) & indexMask; +} diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/pred/SConscript --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/pred/SConscript Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,42 @@ +# -*- mode:python -*- + +# Copyright (c) 2006 The Regents of The University of Michigan +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer; +# redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution; +# neither the name of the copyright holders nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +# +# Authors: Steve Reinhardt + +Import('*') + +if 'SimpleEdgeCPU' in env['CPU_MODELS']: + Source('2bit_local.cc') + Source('btb.cc') + Source('ibtb.cc') + Source('btp.cc') + Source('ctb.cc') + Source('ras.cc') + Source('tournament.cc') + + TraceFlag('FreeList') diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/pred/btb.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/pred/btb.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#ifndef __CPU_EDGE_BTB_HH__ +#define __CPU_EDGE_BTB_HH__ + +#include + +#include "base/misc.hh" +#include "base/types.hh" +#include "config/the_isa.hh" +#include "arch/isa_traits.hh" + +class DefaultBTB +{ + private: + typedef TheISA::ExitID ExitID; + + struct BTBEntry + { + Addr offset; + bool hysteresis; + bool isupdated; + }; + public: + /** Creates a BTB with the given number of entries, number of bits per + * tag, and instruction offset amount. + * @param numEntries Number of entries for the BTB. + * @param tagBits Number of bits for each tag in the BTB. + * @param instShiftAmt Offset amount for instructions to ignore alignment. + */ + DefaultBTB(unsigned numEntries, unsigned blockShiftAmt, + unsigned offsetLength); + + void reset(); + + Addr targetGen(Addr block_PC, Addr offset); + + /** Looks up an address in the BTB. Must call valid() first on the address. + * @param inst_PC The address of the branch to look up. + * @param tid The thread id. + * @return Returns the target of the branch. + */ + Addr lookup(const Addr &block_PC, int addr_space_id, ExitID exitID); + + /** Updates the BTB with the target of a branch. + * @param inst_PC The address of the branch being updated. + * @param target_PC The target address of the branch. + * @param tid The thread id. + */ + void update(const Addr &block_PC, int addr_space_id, ExitID exitID, + const Addr &target); + + private: + /** Returns the index into the BTB, based on the branch's PC. + * @param inst_PC The branch to look up. + * @return Returns the index into the BTB. + */ + inline unsigned getIndex(const Addr &block_PC, int addr_space_id, ExitID exitID); + + /** Returns the tag bits of a given address. + * @param inst_PC The branch's address. + * @return Returns the tag bits. + */ + + /** The actual BTB. */ + std::vector btb; + + /** The number of entries in the BTB. */ + unsigned numEntries; + + /** The index mask. */ + unsigned idxMask; + + /** The index length. */ + unsigned idxLength; + + /** The index length. */ + Addr offsetLength; + + /** The index length. */ + Addr offsetMask; + + /** Number of bits to shift PC when calculating index. */ + unsigned blockShiftAmt; + +}; + +#endif // __CPU_EDGE_BTB_HH__ diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/pred/btb.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/pred/btb.cc Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,147 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#include "base/intmath.hh" +#include "base/trace.hh" +#include "cpu/edge/pred/btb.hh" + +DefaultBTB::DefaultBTB(unsigned _numEntries, + unsigned _offsetLength, + unsigned _blockShiftAmt) + : numEntries(_numEntries), + offsetLength(_offsetLength), + blockShiftAmt(_blockShiftAmt) +{ + DPRINTF(EdgePredUnit, "BTB: Creating BTB object.\n"); + + if (!isPowerOf2(numEntries)) { + fatal("BTB entries is not a power of 2!"); + } + + btb.resize(numEntries); + + for (unsigned i = 0; i < numEntries; ++i) { + btb[i].offset = 0; + btb[i].hysteresis = 0; + btb[i].isupdated = false; + } + + idxMask = numEntries - 1; + idxLength = floorLog2(numEntries); + + offsetMask = (1<= 0 && addr_space_id <= 15); + int reversed_as_id = ((addr_space_id & 0x1) << 3) + | ((addr_space_id & 0x2) << 1) + | ((addr_space_id & 0x4) >> 1) + | ((addr_space_id & 0x8) >> 3); + + assert (exitID>= 0 && exitID <= 7); + int reversed_exit = ((exitID & 0x1) << 2) + | (exitID & 0x2) + | ((exitID & 0x4) >> 2); + + int index = ((block_PC >> blockShiftAmt) + ^ ( (reversed_exit << (idxLength - 3)) | + (reversed_as_id << (idxLength - 7)) ) ) + & idxMask; + + return index; +} + +Addr +DefaultBTB::targetGen(Addr block_PC, Addr offset) +{ + Addr newOffset; + newOffset = offset & offsetMask; + DPRINTF(EdgePredUnit,"newoffset = %#x.\n",newOffset); + if ((newOffset >> (offsetLength- 1)) & 0x1) // negative within those 9 bits + newOffset = newOffset | (~offsetMask); + DPRINTF(EdgePredUnit,"newoffset = %#x.\n",newOffset); + DPRINTF(EdgePredUnit,"target = %#x.\n",block_PC + (newOffset << blockShiftAmt)); + return block_PC + (newOffset << blockShiftAmt); +} + +// @todo Create some sort of return struct that has both whether or not the +// address is valid, and also the address. For now will just use addr = 0 to +// represent invalid entry. +Addr +DefaultBTB::lookup(const Addr &block_PC, int addr_space_id, ExitID exitID) +{ + unsigned btb_idx = getIndex(block_PC, addr_space_id, exitID); + Addr offset = btb[btb_idx].offset; + Addr target = targetGen(block_PC,offset); + if(btb[btb_idx].isupdated) + DPRINTF(EdgePredUnit,"Block [PC=%#x,Exit=%i,idx=%#x] BTB updated before.\n",block_PC,exitID,btb_idx); + else + DPRINTF(EdgePredUnit,"Block [PC=%#x,Exit=%i,idx=%#x] BTB first access.\n",block_PC,exitID,btb_idx); + return target; +} + +void +DefaultBTB::update(const Addr &block_PC, int addr_space_id, ExitID exitID, + const Addr &target) +{ + BTBEntry oldBTBEntry,newBTBEntry; + unsigned btb_idx = getIndex(block_PC, addr_space_id, exitID); + Addr offset = (target - block_PC) >> blockShiftAmt; + assert(btb_idx < numEntries); + oldBTBEntry = btb[btb_idx]; + newBTBEntry.offset = oldBTBEntry.hysteresis ? oldBTBEntry.offset : offset; + newBTBEntry.hysteresis = (oldBTBEntry.offset == offset); + newBTBEntry.isupdated = true; + btb[btb_idx] = newBTBEntry; + DPRINTF(EdgePredUnit,"Block [PC=%#x,Exit=%i,idx=%#x] BTB updated(target = %#x,%#x).\n" + "Offset change from %#x to %#x.\n",block_PC,exitID,btb_idx,target,target-block_PC, + oldBTBEntry.offset,offset); + if(newBTBEntry.hysteresis) + DPRINTF(EdgePredUnit,"hyst = 1.\n"); + else + DPRINTF(EdgePredUnit,"hyst = 0.\n"); +} + diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/pred/btp.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/pred/btp.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#ifndef __CPU_EDGE_BTP_HH__ +#define __CPU_EDGE_BTP_HH__ + +#include + +#include "base/types.hh" +#include "config/the_isa.hh" +#include "arch/isa_traits.hh" + + +class BranchTypePredictor +{ + private: + typedef TheISA::ExitID ExitID; + typedef TheISA::ExitType ExitType; + struct BTPEntry + { + ExitType branchType; + bool hyst; + bool updated; + }; + + public: + + BranchTypePredictor() {} + + ~BranchTypePredictor() {} + + void init(unsigned numEntries, unsigned blockShiftAmt); + + void reset(); + + ExitType lookup(Addr PC, int addr_space_id, ExitID exitID); + + void update(Addr PC, int addr_space_id, ExitID exitID, ExitType branchType); + + private: + unsigned idxGen(Addr PC, int addr_space_id, ExitID exitID); + + std::vector btp; + + unsigned numEntries; + + unsigned idxLength; + + unsigned idxMask; + + unsigned blockShiftAmt; +}; + +#endif //_CPU_EDGE_BTP_HH diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/pred/btp.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/pred/btp.cc Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,118 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ +#include "base/intmath.hh" +#include "base/trace.hh" +#include "cpu/edge/pred/btp.hh" + + +void +BranchTypePredictor::init(unsigned _numEntries, unsigned _blockShiftAmt) +{ + numEntries = _numEntries; + + blockShiftAmt = _blockShiftAmt; + + btp.resize(numEntries); + + for(unsigned i = 0 ; i < numEntries ; ++i) { + btp[i].branchType = TheISA::seq; + btp[i].hyst = 0; + } + + idxLength = floorLog2(numEntries); + + idxMask = (1 << idxLength) - 1; +} + +void +BranchTypePredictor::reset() +{ + for(unsigned i = 0 ; i < numEntries ; ++i) { + btp[i].branchType = TheISA::branch; + btp[i].hyst = 0; + btp[i].updated = false; + } +} + +unsigned +BranchTypePredictor::idxGen(Addr PC, int addr_space_id, ExitID exitID) +{ + assert (addr_space_id >= 0 && addr_space_id <= 15); + int reversed_as_id = ((addr_space_id & 0x1) << 3) + | ((addr_space_id & 0x2) << 1) + | ((addr_space_id & 0x4) >> 1) + | ((addr_space_id & 0x8) >> 3); + + assert (exitID >= 0 && exitID <= 7); + int reversed_exit = ((exitID & 0x1) << 2) + | (exitID & 0x2) + | ((exitID & 0x4) >> 2); + + unsigned index = ((PC >> blockShiftAmt) + ^ ( (reversed_exit << (idxLength - 3)) | + (reversed_as_id << (idxLength- 7)) ) ) + & idxMask; + return index; +} + +TheISA::ExitType +BranchTypePredictor::lookup(Addr PC, int addr_space_id, ExitID exitID) +{ + unsigned index = idxGen(PC, addr_space_id, exitID); + BTPEntry btpEntry = btp[index]; + if(btp[index].updated) + DPRINTF(EdgePredUnit,"Block [PC:%#x,idx:%i]:BTP updated before.\n",PC,index); + else + DPRINTF(EdgePredUnit,"Block [PC:%#x,idx:%i]:BTP first accessed.\n",PC,index); + return btpEntry.branchType; +} + +void +BranchTypePredictor::update(Addr PC, int addr_space_id, ExitID exitID, ExitType branchType) +{ + unsigned index = idxGen(PC, addr_space_id, exitID); + BTPEntry btpEntry = btp[index]; + BTPEntry new_btpEntry; + new_btpEntry.branchType = btpEntry.hyst ? btpEntry.branchType : branchType; + new_btpEntry.hyst = (btpEntry.branchType == branchType); + new_btpEntry.updated = true; + btp[index] = new_btpEntry; + if(new_btpEntry.branchType == TheISA::branch){ + DPRINTF(EdgePredUnit,"Block [PC:%#x,Exit:%i,idx:%i]:BTP updated(branch).\n",PC,exitID,index); + }else if(new_btpEntry.branchType == TheISA::call){ + DPRINTF(EdgePredUnit,"Block [PC:%#x,Exit:%i,idx:%i]:BTP updated(call).\n",PC,exitID,index); + }else if(new_btpEntry.branchType == TheISA::ret){ + DPRINTF(EdgePredUnit,"Block [PC:%#x,Exit:%i,idx:%i]:BTP updated(return).\n",PC,exitID,index); + }else if(new_btpEntry.branchType == TheISA::seq){ + DPRINTF(EdgePredUnit,"Block [PC:%#x,Exit:%i,idx:%i]:BTP updated(seq).\n",PC,exitID,index); + }else if(new_btpEntry.branchType == TheISA::ibranch){ + DPRINTF(EdgePredUnit,"Block [PC:%#x,Exit:%i,idx:%i]:BTP updated(ibranch).\n",PC,exitID,index); + } +} diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/pred/ctb.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/pred/ctb.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,92 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#ifndef __CPU_EDGE_CTB_HH__ +#define __CPU_EDGE_CTB_HH__ + +#include "base/misc.hh" +#include "base/types.hh" +#include "config/the_isa.hh" +#include "arch/isa_traits.hh" + +class DefaultCTB +{ + private: + typedef TheISA::ExitID ExitID; + struct CTBEntry + { + Addr call_target; + bool call_target_hyst; + bool call_updated; + Addr ret_addr_offset; + bool ret_addr_hyst; + bool ret_updated; + }; + public: + + DefaultCTB() {} + + ~DefaultCTB() {} + + void init(unsigned numEntries, + unsigned blockShiftAmt, + unsigned offsetLength); + + void reset(); + + unsigned CTBIdxGen(Addr PC, int addr_space_ID, ExitID exitID); + + Addr lookupTarget(Addr PC,int addr_space_ID, ExitID pred_exitID); + + Addr lookupRetAddr(Addr PC,int addr_space_ID, ExitID pred_exitID); + + /**Update when a call commit. */ + void update(unsigned index, Addr target); + + /** Update when a return commit. */ + void update(unsigned index, Addr block_addr, Addr target); + + private: + std::vector ctb; + + unsigned numEntries; + + unsigned blockShiftAmt; + + unsigned ctbMask; + + unsigned ctbIdxLength; + + unsigned offsetLength; + + unsigned offsetMask; +}; + +#endif // _CPU_EDGE_CTB_HH_ diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/pred/ctb.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/pred/ctb.cc Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,161 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#include "base/intmath.hh" +#include "base/trace.hh" +#include "cpu/edge/pred/ctb.hh" + +void +DefaultCTB::init(unsigned _numEntries, + unsigned _blockShiftAmt, + unsigned _offsetLength) +{ + numEntries = _numEntries; + + blockShiftAmt = _blockShiftAmt; + + offsetLength = _offsetLength; + + ctb.resize(numEntries); + + for(unsigned i = 0; i < numEntries; ++i) { + ctb[i].call_target = 0; + ctb[i].call_target_hyst = 0; + ctb[i].call_updated = false; + ctb[i].ret_addr_offset = 0; + ctb[i].ret_addr_hyst = 0; + ctb[i].ret_updated = false; + } + + ctbMask = numEntries - 1; + + ctbIdxLength = floorLog2(numEntries); + + offsetMask = (1 << offsetLength) - 1; + +} + +void +DefaultCTB::reset() +{ + for(unsigned i = 0; i < numEntries; ++i) { + ctb[i].call_target = 0; + ctb[i].call_target_hyst = 0; + ctb[i].call_updated = false; + ctb[i].ret_addr_offset = 0; + ctb[i].ret_addr_hyst = 0; + ctb[i].ret_updated = false; + } +} + +unsigned +DefaultCTB::CTBIdxGen(Addr PC, int addr_space_ID, ExitID exitID) +{ + assert (addr_space_ID >= 0 && addr_space_ID <= 15); + int reversed_as_id = ((addr_space_ID & 0x1) << 3) + | ((addr_space_ID & 0x2) << 1) + | ((addr_space_ID & 0x4) >> 1) + | ((addr_space_ID & 0x8) >> 3); + + assert (exitID >= 0 && exitID <= 7); + int reversed_exit = ((exitID & 0x1) << 2) + | (exitID & 0x2) + | ((exitID & 0x4) >> 2); + unsigned index = ((PC >> blockShiftAmt) + ^ ( (reversed_as_id << (ctbIdxLength - 4)) | + (reversed_exit << (ctbIdxLength - 7)) ) ) + & ctbMask; + return index; +} + +Addr +DefaultCTB::lookupTarget(Addr PC,int addr_space_ID, ExitID pred_exitID) +{ + unsigned index = CTBIdxGen(PC, addr_space_ID, pred_exitID); + CTBEntry ctbEntry = ctb[index]; + if(ctb[index].call_updated) + DPRINTF(EdgePredUnit,"Block [PC:%#x]:CTB call updated before.\n",PC); + else + DPRINTF(EdgePredUnit,"Block [PC:%#x]:CTB call first accessed.\n",PC); + return ctbEntry.call_target << blockShiftAmt; +} + +Addr +DefaultCTB::lookupRetAddr(Addr PC,int addr_space_ID, ExitID pred_exitID) +{ + unsigned index = CTBIdxGen(PC, addr_space_ID, pred_exitID); + CTBEntry ctbEntry = ctb[index]; + Addr offset = ctbEntry.ret_addr_offset; + if ((offset >> (offsetLength- 1)) & 0x1) // negative within those 9 bits + offset = offset | (~offsetMask); + if(ctb[index].ret_updated) + DPRINTF(EdgePredUnit,"Block [PC:%#x]:CTB return updated before.\n",PC); + else + DPRINTF(EdgePredUnit,"Block [PC:%#x]:CTB return first accessed.\n",PC); + DPRINTF(EdgePredUnit,"Block [PC:%#x]:Return address is %#x. Offset is %#x.\n",PC, + PC + (offset<> blockShiftAmt) + offset; +} + +void +DefaultCTB::update(unsigned index, Addr target) +{ + CTBEntry ctbEntry = ctb[index]; + CTBEntry new_ctbEntry; + target = target >> blockShiftAmt; + new_ctbEntry.call_target = ctbEntry.call_target_hyst ? ctbEntry.call_target : target; + new_ctbEntry.call_target_hyst = (ctbEntry.call_target == target) ; + new_ctbEntry.call_updated = true; + new_ctbEntry.ret_addr_offset = ctbEntry.ret_addr_offset; + new_ctbEntry.ret_addr_hyst = ctbEntry.ret_addr_hyst; + new_ctbEntry.ret_updated = ctbEntry.ret_updated; + ctb[index] = new_ctbEntry; + DPRINTF(EdgePredUnit,"CTB call target update:%#x.\n",target<> blockShiftAmt) & offsetMask ; + new_ctbEntry.ret_addr_offset = ctbEntry.ret_addr_hyst ? ctbEntry.ret_addr_offset : offset; + new_ctbEntry.ret_addr_hyst = (ctbEntry.ret_addr_offset == offset) ; + new_ctbEntry.ret_updated = true; + new_ctbEntry.call_target = ctbEntry.call_target; + new_ctbEntry.call_target_hyst = ctbEntry.call_target_hyst; + new_ctbEntry.call_updated = ctbEntry.call_updated; + ctb[index] = new_ctbEntry; + DPRINTF(EdgePredUnit,"Block [PC:%#x]: Return updated. Target = %#x.\n" + "Offset is %#x(%#x).\n", block_addr,target,offset,new_ctbEntry.ret_addr_offset); + } + + diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/pred/ibtb.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/pred/ibtb.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,105 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#ifndef __CPU_EDGE_IBTB_HH__ +#define __CPU_EDGE_IBTB_HH__ + +#include + +#include "base/misc.hh" +#include "base/types.hh" +#include "config/the_isa.hh" +#include "arch/isa_traits.hh" + +class DefaultIBTB +{ + private: + typedef TheISA::ExitID ExitID; + + struct IExitPredictorEntry + { + unsigned iExit; + bool hysteresis; + }; + + struct IBTBEntry + { + Addr target; + bool hysteresis; + }; + + public: + DefaultIBTB(unsigned iExitPredictorEntries, unsigned numEntries, + unsigned iExitLength, unsigned blockShiftAmt); + + void reset(); + + /**Predict. + *index is passed from tournament so that ibtb need not to squash. + */ + Addr lookup(const Addr &PC, unsigned index, ExitID exitID); + + void update(const Addr &PC, unsigned index, ExitID actual_exit, Addr corr_target); + + private: + unsigned predictIndirectExit(unsigned index, ExitID exitID); + + unsigned calcIExitPredictorIdx(unsigned index, ExitID exitID); + + unsigned calcIBTBIdx(Addr PC, unsigned iExit, ExitID exitID); + + Addr targetGen(Addr target, unsigned iExit); + + unsigned iExitGen(Addr target); + + std::vector iExitPredictor; + + std::vector ibtb; + + unsigned iExitPredictorEntries; + + unsigned numEntries; + + unsigned iExitLength; + + unsigned idxLength; + + unsigned iExitPredictorIdxLength; + + unsigned iExitPredictorIdxMask; + + unsigned idxMask; + + unsigned iExitMask; + + unsigned blockShiftAmt; +}; + +#endif //__CPU_EDGE_IBTB_HH__ diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/pred/ibtb.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/pred/ibtb.cc Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#include "base/intmath.hh" +#include "base/trace.hh" +#include "cpu/edge/pred/ibtb.hh" + +DefaultIBTB::DefaultIBTB(unsigned _iExitPredictorEntries, + unsigned _numEntries, + unsigned _iExitLength, + unsigned _blockShiftAmt) + :iExitPredictorEntries(_iExitPredictorEntries), + numEntries(_numEntries), + iExitLength(_iExitLength), + blockShiftAmt(_blockShiftAmt) +{ + DPRINTF(EdgePredUnit,"IBTB: Creating IBTB objects.\n"); + + if (!isPowerOf2(numEntries)) { + fatal("IBTB entries is not a power of 2!"); + } + + if (!isPowerOf2(iExitPredictorEntries)) { + fatal("IBTB exit predictor entries is not a power of 2!"); + } + + iExitPredictor.resize(iExitPredictorEntries); + + for(unsigned i = 0 ; i < iExitPredictorEntries ; ++i) { + iExitPredictor[i].iExit = 0; + iExitPredictor[i].hysteresis = false; + } + + ibtb.resize(numEntries); + + for(unsigned i = 0 ; i < numEntries ; ++i) { + ibtb[i].target = 0; + ibtb[i].hysteresis = false; + } + + iExitPredictorIdxMask = iExitPredictorEntries - 1; + + idxMask = numEntries - 1; + + iExitMask = (1 << iExitLength) - 1; + + idxLength = floorLog2(numEntries); + + iExitPredictorIdxLength = floorLog2(iExitPredictorEntries); +} + +void +DefaultIBTB::reset() +{ + for(unsigned i = 0 ; i < iExitPredictorEntries ; ++i) { + iExitPredictor[i].iExit = 0; + iExitPredictor[i].hysteresis = false; + } + + for(unsigned i = 0 ; i < numEntries ; ++i) { + ibtb[i].target = 0; + ibtb[i].hysteresis = false; + } +} + +unsigned +DefaultIBTB::calcIExitPredictorIdx(unsigned index, ExitID exitID) +{ + unsigned idx = (index ^ (exitID << (iExitPredictorIdxLength - 3))) & iExitPredictorIdxMask; + return idx; +} + +unsigned +DefaultIBTB::calcIBTBIdx(Addr PC, unsigned iExit, ExitID exitID) +{ + unsigned idx = (PC ^ ((exitID << (idxLength - 3)) | (iExit << (idxLength - 1)))) & idxMask; + return idx; +} + +Addr +DefaultIBTB::targetGen(Addr target, unsigned iExit) +{ + Addr temp_target = ((target << iExitLength) | iExit ) << blockShiftAmt; + return temp_target; +} + +unsigned +DefaultIBTB::iExitGen(Addr target) +{ + unsigned iExit = (target >> blockShiftAmt) & iExitMask; + return iExit; +} + +Addr +DefaultIBTB::lookup(const Addr &PC, unsigned index, ExitID exitID) +{ + unsigned iExitPredictorIdx = calcIExitPredictorIdx(index, exitID); + unsigned predicted_iExit = iExitPredictor[iExitPredictorIdx].iExit; + unsigned ibtbIdx = calcIBTBIdx(PC,predicted_iExit,exitID); + Addr tempTarget = ibtb[ibtbIdx].target; + Addr target = targetGen(tempTarget,predicted_iExit); + return target; +} + +void +DefaultIBTB::update(const Addr &PC, unsigned index, + ExitID actual_exit, Addr corr_target) +{ + //update iExitPredictor table. + unsigned iExitPredictorIdx = calcIExitPredictorIdx(index, actual_exit); + unsigned corr_iExit = iExitGen(corr_target); + IExitPredictorEntry iExitPredictorEntry= iExitPredictor[iExitPredictorIdx]; + IExitPredictorEntry new_iExitPredictorEntry; + new_iExitPredictorEntry.iExit = iExitPredictorEntry.hysteresis? + iExitPredictorEntry.iExit:corr_iExit; + new_iExitPredictorEntry.hysteresis = (iExitPredictorEntry.iExit == corr_iExit); + iExitPredictor[iExitPredictorIdx] = new_iExitPredictorEntry; + //update ibtb table. + unsigned ibtbIdx =calcIBTBIdx(PC,corr_iExit,actual_exit); + IBTBEntry ibtbEntry = ibtb[ibtbIdx]; + IBTBEntry new_ibtbEntry; + Addr target = corr_target >> (blockShiftAmt + iExitLength); + new_ibtbEntry.target = ibtbEntry.hysteresis ? ibtbEntry.target : target; + new_ibtbEntry.hysteresis = (ibtbEntry.target == target); + ibtb[ibtbIdx] = new_ibtbEntry; +} diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/pred/ras.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/pred/ras.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,152 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#ifndef __CPU_EDGE_RAS_HH__ +#define __CPU_EDGE_RAS_HH__ + +#include +#include "base/misc.hh" +#include "base/types.hh" +#include "config/the_isa.hh" +#include "arch/isa_traits.hh" + +/** Return address stack class, implements a simple RAS. */ +template +class ReturnAddrStack +{ + private: + struct RasHFEntry + { + unsigned tos; + Addr target; + }; + struct LSEntry + { + unsigned index; + Addr block_addr; + }; + public: + /** Creates a return address stack, but init() must be called prior to + * use. + */ + typedef TheISA::BlockID BlockID; + typedef TheISA::ExitID ExitID; + ReturnAddrStack() {} + + ~ReturnAddrStack(){} + /** Initializes RAS with a specified number of entries. + * @param numEntries Number of entries in the RAS. + */ + void init(unsigned numEntries, unsigned lsEntries,unsigned blockShiftAmt); + + void reset(); + + /** Returns the top address on the RAS. */ + Addr top() + { return addrStack[tos]; } + + /** Returns the index of the top of the RAS. */ + unsigned topIdx() + { return tos; } + + /** Pushes an address onto the RAS. */ + void push(const Addr return_addr, ThreadID tid); + + /** Pops the top address from the RAS. */ + void pop(ThreadID tid); + + Addr targetGen(Addr target); + + Addr lookup(BlockID blockID,ThreadID tid); + + void squash(BlockID blockID,ThreadID tid); + + void lspush(Addr block_addr,unsigned index, ThreadID tid); + + /**lsPopAddr() must be called right after IsPopIdx() is called. */ + unsigned lsPopIdx(ThreadID tid); + + Addr lsPopAddr(ThreadID tid); + /** Changes index to the top of the RAS, and replaces the top address with + * a new target. + * @param top_entry_idx The index of the RAS that will now be the top. + * @param restored_target The new target address of the new top of the RAS. + */ + void restore(unsigned top_entry_idx, const Addr &restored_target); + + bool empty() { return usedEntries == 0; } + + bool full() { return usedEntries == numEntries; } + private: + /** Increments the top of stack index. */ + inline void incrTos() + { if (++tos == numEntries) tos = 0; } + + /** Decrements the top of stack index. */ + inline void decrTos() + { tos = (tos == 0 ? numEntries - 1 : tos - 1); } + + /** The RAS itself. */ + std::vector addrStack; + + /** The RAS history file. */ + std::vector rasHistoryFile; + + /** The RAS stack pointer. */ + std::vector rasStackPointer; + + /** The RAS link stack pointer. */ + std::vector rasLSPointer; + + /** The RAS link stack. */ + std::vector rasLinkStack; + + /** The number of entries in the LS. */ + unsigned lsEntries; + + /** The number of entries in the RAS. */ + unsigned numEntries; + + /** The number of used entries in the RAS. */ + unsigned usedEntries; + + /** The top of stack index. */ + unsigned tos; + + /** The mask of tos in ras. */ + unsigned rasStackMask; + /** The mask of tos in ls. */ + unsigned rasLSMask; + + /** Number of bits to shift PC when calculating index. */ + unsigned blockShiftAmt; +}; + +#endif // __CPU_EDGE_RAS_HH__ diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/pred/ras.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/pred/ras.cc Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,214 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#include "cpu/edge/pred/ras.hh" +#include "cpu/edge/isa_specific.hh" + +template +void +ReturnAddrStack::init(unsigned _numEntries, unsigned _lsEntries,unsigned _blockShiftAmt) +{ + numEntries = _numEntries; + lsEntries = _lsEntries; + usedEntries = 0; + tos = 0; + + addrStack.resize(numEntries); + + for (unsigned i = 0; i < numEntries; ++i) + addrStack[i] = 0; + + rasStackMask = numEntries - 1; + + rasStackPointer.resize(Impl::MaxThreads); + + for (unsigned i = 0; i < Impl::MaxThreads; ++i) + rasStackPointer[i] = 0; + + rasHistoryFile.resize(Impl::MaxInFlightBlockNum); + + for (unsigned i = 0; i < Impl::MaxInFlightBlockNum; ++i) { + rasHistoryFile[i].tos = 0; + rasHistoryFile[i].target = 0; + } + + rasLSPointer.resize(Impl::MaxThreads); + + for (unsigned i = 0; i < Impl::MaxThreads; ++i) + rasLSPointer[i] = 0; + + rasLSMask = lsEntries - 1; + + rasLinkStack.resize(lsEntries); + + for (unsigned i = 0; i < lsEntries ; ++i) { + rasLinkStack[i].index = 0; + rasLinkStack[i].block_addr = 0; + } + + blockShiftAmt = _blockShiftAmt; + + DPRINTF(EdgeRAS,"RAS: blockShiftAmt = %i.\n",blockShiftAmt); +} + +template +void +ReturnAddrStack::reset() +{ + usedEntries = 0; + tos = 0; + for (unsigned i = 0; i < numEntries; ++i) + addrStack[i] = 0; + + for (unsigned i = 0; i < Impl::MaxThreads; ++i) + rasStackPointer[i] = 0; + + + for (unsigned i = 0; i < Impl::MaxInFlightBlockNum; ++i) { + rasHistoryFile[i].tos = 0; + rasHistoryFile[i].target = 0; + } + + for (unsigned i = 0; i < Impl::MaxThreads; ++i) + rasLSPointer[i] = 0; + + for (unsigned i = 0; i < lsEntries ; ++i) { + rasLinkStack[i].index = 0; + rasLinkStack[i].block_addr = 0; + } +} + +template +void +ReturnAddrStack::push(const Addr return_addr, ThreadID tid) +{ + unsigned tos = rasStackPointer[tid]; + unsigned tos_new = (tos + 1) & rasStackMask; + rasStackPointer[tid] = tos_new; + addrStack[tos_new] = return_addr; + if (usedEntries != numEntries) { + ++usedEntries; + } + DPRINTF(EdgeRAS,"RAS push:TOS = %i. Addr = %#x.\n",tos_new,return_addr<<7); +} + +template +void +ReturnAddrStack::pop(ThreadID tid) +{ + unsigned tos = rasStackPointer[tid]; + unsigned tos_new = (tos - 1) & rasStackMask; + rasStackPointer[tid] = tos_new; + if (usedEntries > 0) { + --usedEntries; + } + DPRINTF(EdgeRAS,"RAS pop:TOS = %i.\n",tos_new); +} + +template +Addr +ReturnAddrStack::targetGen(Addr target) +{ + return target << blockShiftAmt; +} + +template +Addr +ReturnAddrStack::lookup(BlockID blockID,ThreadID tid) +{ + unsigned tos = rasStackPointer[tid]; + Addr target = addrStack[tos]; + RasHFEntry rasHFEntry; + rasHFEntry.tos = tos; + rasHFEntry.target = target; + rasHistoryFile[blockID&(Impl::MaxInFlightBlockNum -1)] = rasHFEntry; + return target; +} + +template +void +ReturnAddrStack::squash(BlockID blockID,ThreadID tid) +{ + unsigned index = blockID&(Impl::MaxInFlightBlockNum -1); + unsigned tos; + RasHFEntry rasHFEntry; + rasHFEntry = rasHistoryFile[index]; + tos = rasHFEntry.tos; + rasStackPointer[tid] = tos; + addrStack[tos] = rasHFEntry.target; + DPRINTF(EdgeRAS,"Squashing RAS to block [bid:%i]." + "TOS = %i and target is %#x.\n",blockID,tos,addrStack[tos]); +} + +template +void +ReturnAddrStack::lspush(Addr block_addr,unsigned index, ThreadID tid) +{ + unsigned tos = rasLSPointer[tid]; + unsigned tos_new = (tos + 1) & rasLSMask; + LSEntry lsEntry; + rasLSPointer[tid] = tos_new; + lsEntry.index = index; + lsEntry.block_addr = block_addr; + rasLinkStack[tos_new] = lsEntry; + DPRINTF(EdgeRAS,"Store index(%i) and PC(%#x) to LS. LS TOS is %i\n", + index,block_addr,tos_new); +} + +template +unsigned +ReturnAddrStack::lsPopIdx(ThreadID tid) +{ + unsigned tos = rasLSPointer[tid]; + return rasLinkStack[tos].index; +} + +template +Addr +ReturnAddrStack::lsPopAddr(ThreadID tid) +{ + unsigned tos = rasLSPointer[tid]; + unsigned tos_new = (tos - 1) & rasLSMask; + rasLSPointer[tid] = tos_new; + return rasLinkStack[tos].block_addr; +} + +template +void +ReturnAddrStack::restore(unsigned top_entry_idx, + const Addr &restored_target) +{ + tos = top_entry_idx; + + addrStack[tos] = restored_target; +} + +template class ReturnAddrStack; + diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/pred/tournament.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/pred/tournament.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,220 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#ifndef __CPU_EDGE_TOURNAMENT_PRED_HH__ +#define __CPU_EDGE_TOURNAMENT_PRED_HH__ + +#include + +#include "base/types.hh" +#include "cpu/edge/sat_counter.hh" +#include "config/the_isa.hh" +#include "arch/isa_traits.hh" + +/** + * Implements a tournament branch predictor, hopefully identical to the one + * used in the 21264. It has a local predictor, which uses a local history + * table to index into a table of counters, and a global predictor, which + * uses a global history to index into a table of counters. A choice + * predictor chooses between the two. Only the global history register + * is speculatively updated, the rest are updated upon branches committing + * or misspeculating. + */ +template +class TournamentBP +{ + public: + typedef TheISA::BlockID BlockID; + typedef TheISA::ExitID ExitID; + /** + * Default branch predictor constructor. + */ + TournamentBP(unsigned localPredictorSize, + unsigned localHistoryTableSize, + unsigned localHistoryBits, + unsigned globalPredictorSize, + unsigned globalHistoryBits, + unsigned choiceHistoryBits, + unsigned choicePredictorSize, + unsigned choiceCtrBits, + unsigned blockShiftAmt); + + void reset(); + /** + * Looks up the given block address in the branch predictor and returns + * an exit ID of the block. Also creates a + * BPHistory object to store any state it will need on squash/update. + * @param block_addr The address of the block to look up. + * @param bp_history Pointer that will be set to the BPHistory object. + * @return the ID of the exit of the block. + */ + TheISA::ExitID lookup(BlockID blockID, BlockID oldest_blockID, + Addr &block_addr, int addr_space_ID, ThreadID tid); + + /** + * Updates the branch predictor with the actual result of a branch. + * @param branch_addr The address of the branch to update. + * @param taken Whether or not the branch was taken. + * @param bp_history Pointer to the BPHistory object that was created + * when the branch was predicted. + */ + void update(BlockID blockID, Addr &block_addr, int addr_space_id, + ExitID actual_exitID,ThreadID tid); + + /** + * Restores the global branch history on a squash. + */ + void squash(BlockID blockID, ExitID exitID, ThreadID tid); + + /** + * Restores the global branch history on a squash. + */ + void squash(BlockID blockID,ThreadID tid); + + unsigned lookupGetIndex(Addr &block_PC, ThreadID tid); + + unsigned updateGetIndex(BlockID blockID, Addr &block_PC, ThreadID tid); + + private: + + /** + * Returns the local history index, given a branch address. + * @param branch_addr The branch's PC address. + */ + inline unsigned calcLocHistIdx(Addr &block_addr,int addr_space_ID); + + inline unsigned calcLocPredIdx(Addr &block_addr,uint32_t history); + + inline unsigned calcChoiPredIdx(Addr &block_addr,uint32_t history); + + inline unsigned calcGloPredIdx(Addr &block_addr,uint32_t history); + /** + * The branch history information that is created upon predicting + * a branch. It will be passed back upon updating and squashing, + * when the BP can use this information to update/restore its + * state properly. + */ + struct BPHistory { + unsigned globalHistory; + ExitID localPredExitID; + ExitID globalPredExitID; + bool globalUsed; + }; + + struct PredictionTableEntry { + PredictionTableEntry() + { hysteresis = 0;} + ~PredictionTableEntry() + {} + ExitID exitID; + bool hysteresis; + }; + + struct LFFEntry { + unsigned index; + unsigned history; + }; + /** Local counters. */ + std::vector localCtrs; + + /** Size of the local predictor. */ + unsigned localPredictorSize; + + /** Mask to get the proper index bits into the predictor. */ + unsigned localPredictorMask; + + /** Array of local history table entries. */ + std::vector localHistoryTable; + + /** Array of local future file. */ + std::vector localFutureFile; + + /** Size of the local history table. */ + unsigned localHistoryTableSize; + + /** Number of bits for each entry of the local history table. + * @todo Doesn't this come from the size of the local predictor? + */ + unsigned localHistoryBits; + + /** Mask to get the proper local history. */ + unsigned localHistoryMask; + + /** Array of counters that make up the global predictor. */ + std::vector globalCtrs; + + /** Size of the global predictor. */ + unsigned globalPredictorSize; + + /** Global history register. */ + unsigned globalHistory[Impl::MaxThreads]; + + /** Global history file. */ + std::vector globalHistoryFile; + + /** Number of bits for the global history. */ + unsigned globalHistoryBits; + + /** Mask to get the proper global history. */ + unsigned globalHistoryMask; + + /** Choice history register. */ + unsigned choiceHistory[Impl::MaxThreads]; + + /** Choice history file. */ + std::vector choiceHistoryFile; + + /** Number of bits for the choice history. */ + unsigned choiceHistoryBits; + + /** Mask to get the proper choice history. */ + unsigned choiceHistoryMask; + + /** Array of counters that make up the choice predictor. */ + std::vector choiceCtrs; + + /** Size of the choice predictor (identical to the global predictor). */ + unsigned choicePredictorSize; + + /** Number of bits of the choice predictor's counters. */ + unsigned choiceCtrBits; + + /** Number of bits to shift the instruction over to get rid of the word + * offset. + */ + unsigned blockShiftAmt; + + /** Threshold for the counter value; above the threshold is taken, + * equal to or below the threshold is not taken. + */ + unsigned threshold; +}; + +#endif // __CPU_EDGE_TOURNAMENT_PRED_HH__ diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/pred/tournament.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/pred/tournament.cc Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,430 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + + +#include "base/intmath.hh" +#include "cpu/edge/pred/tournament.hh" +#include "cpu/edge/isa_specific.hh" + +template +TournamentBP::TournamentBP(unsigned _localPredictorSize, + unsigned _localHistoryTableSize, + unsigned _localHistoryBits, + unsigned _globalPredictorSize, + unsigned _globalHistoryBits, + unsigned _choiceHistoryBits, + unsigned _choicePredictorSize, + unsigned _choiceCtrBits, + unsigned _blockShiftAmt) + : localPredictorSize(_localPredictorSize), + localHistoryTableSize(_localHistoryTableSize), + localHistoryBits(_localHistoryBits), + globalPredictorSize(_globalPredictorSize), + globalHistoryBits(_globalHistoryBits), + choiceHistoryBits(_choiceHistoryBits), + choicePredictorSize(_globalPredictorSize), + choiceCtrBits(_choiceCtrBits), + blockShiftAmt(_blockShiftAmt) +{ + if (!isPowerOf2(localPredictorSize)) { + fatal("Invalid local predictor size!\n"); + } + + //Setup the array of counters for the local predictor + localCtrs.resize(localPredictorSize); + + for (int i = 0; i < localPredictorSize; ++i) { + localCtrs[i].exitID = 0; + localCtrs[i].hysteresis = 0; + } + + localPredictorMask = floorPow2(localPredictorSize) - 1; + + if (!isPowerOf2(localHistoryTableSize)) { + fatal("Invalid local history table size!\n"); + } + + //Setup the history table for the local table + localHistoryTable.resize(localHistoryTableSize); + + for (int i = 0; i < localHistoryTableSize; ++i) + localHistoryTable[i] = 0; + + //Setup the history table for the local futrue file + localFutureFile.resize(Impl::MaxInFlightBlockNum); + + for (int i = 0; i < Impl::MaxInFlightBlockNum; ++i) { + localFutureFile[i].index = 0; + localFutureFile[i].history = 0; + } + // Setup the local history mask + localHistoryMask = (1 << localHistoryBits) - 1; + + if (!isPowerOf2(globalPredictorSize)) { + fatal("Invalid global predictor size!\n"); + } + + //Setup the array of counters for the global predictor + globalCtrs.resize(globalPredictorSize); + + for (int i = 0; i < globalPredictorSize; ++i) { + globalCtrs[i].hysteresis = 0; + globalCtrs[i].exitID = 0; + } + //Clear the global history + for (int i = 0; i < Impl::MaxThreads ;++i ) + globalHistory[i] = 0; + // Setup the global history mask + globalHistoryMask = (1 << globalHistoryBits) - 1; + + globalHistoryFile.resize(Impl::MaxInFlightBlockNum); + + for (int i = 0; i < Impl::MaxInFlightBlockNum ;++i ) + globalHistoryFile[i] = 0; + + choiceHistoryFile.resize(Impl::MaxInFlightBlockNum); + + for (int i = 0; i < Impl::MaxInFlightBlockNum ;++i ) + choiceHistoryFile[i] = 0; + + if (!isPowerOf2(choicePredictorSize)) { + fatal("Invalid choice predictor size!\n"); + } + //Clear the choice history + for (int i = 0; i < Impl::MaxThreads; ++i) + choiceHistory[i] = 0; + // Setup the choice history mask + choiceHistoryMask = (1 << choiceHistoryBits) - 1; + + //Setup the array of counters for the choice predictor + choiceCtrs.resize(choicePredictorSize); + + for (int i = 0; i < choicePredictorSize; ++i) + choiceCtrs[i].setBits(choiceCtrBits); + + // @todo: Allow for different thresholds between the predictors. + threshold = (1 << (choiceCtrBits - 1)) - 1; + threshold = threshold / 2; +} + +template +void +TournamentBP::reset() +{ + for (int i = 0; i < localPredictorSize; ++i) { + localCtrs[i].exitID = 0; + localCtrs[i].hysteresis = 0; + } + + for (int i = 0; i < globalPredictorSize; ++i) { + globalCtrs[i].hysteresis = 0; + globalCtrs[i].exitID = 0; + } + + //Clear the global history + for (int i = 0; i < Impl::MaxThreads ;++i ) + globalHistory[i] = 0; + + for (int i = 0; i < Impl::MaxInFlightBlockNum ;++i ) + globalHistoryFile[i] = 0; + + for (int i = 0; i < Impl::MaxInFlightBlockNum ;++i ) + choiceHistoryFile[i] = 0; + + //Clear the choice history + for (int i = 0; i < Impl::MaxThreads; ++i) + choiceHistory[i] = 0; + + for (int i = 0; i < choicePredictorSize; ++i) + choiceCtrs[i].setBits(choiceCtrBits); +} + +template +inline +uint32_t +TournamentBP::calcLocHistIdx(Addr &block_addr,int addr_space_ID) +{ + // Get low order bits after removing instruction offset. + assert(addr_space_ID >= 0&&addr_space_ID <= 15); + int reverse_as_id = ((addr_space_ID & 0x1) << 3) + | ((addr_space_ID & 0x2) << 1) + | ((addr_space_ID & 0x4) >> 1) + | ((addr_space_ID & 0x8) >> 3); + return ((block_addr >> blockShiftAmt) ^ (reverse_as_id<<(localHistoryBits -4))) + & (localHistoryTableSize - 1); +} + +template +inline +uint32_t +TournamentBP::calcLocPredIdx(Addr &block_addr,uint32_t history) +{ + uint32_t index = (history ^ (block_addr >> blockShiftAmt)) & localHistoryMask; + return index; +} + +template +inline +uint32_t +TournamentBP::calcGloPredIdx(Addr &block_addr,uint32_t history) +{ + uint32_t index = (history ^ (block_addr >> blockShiftAmt)) & globalHistoryMask; + return index; +} + +template +inline +uint32_t +TournamentBP::calcChoiPredIdx(Addr &block_addr,uint32_t history) +{ + uint32_t index = (history ^ (block_addr >> blockShiftAmt)) & choiceHistoryMask; + return index; +} + +template +unsigned +TournamentBP::lookupGetIndex(Addr &block_PC, ThreadID tid) +{ + unsigned choice_history = choiceHistory[tid] & choiceHistoryMask; + unsigned choice_predictor_idx = calcChoiPredIdx(block_PC,choice_history); + return choice_predictor_idx; +} + +template +unsigned +TournamentBP::updateGetIndex(BlockID blockID, Addr &block_PC, ThreadID tid) +{ + unsigned index = blockID &(Impl::MaxInFlightBlockNum -1); + unsigned choice_predictor_hist = choiceHistoryFile[index]; + unsigned choice_predictor_idx = calcChoiPredIdx(block_PC, choice_predictor_hist); + return choice_predictor_idx; +} + +#if 0 +inline +void +TournamentBP::updateGlobalHistTaken() +{ + globalHistory = (globalHistory << 1) | 1; + globalHistory = globalHistory & globalHistoryMask; +} + +inline +void +TournamentBP::updateGlobalHistNotTaken() +{ + globalHistory = (globalHistory << 1); + globalHistory = globalHistory & globalHistoryMask; +} +#endif + + +#if 0 +inline +void +TournamentBP::updateLocalHistTaken(unsigned local_history_idx) +{ + localHistoryTable[local_history_idx] = + (localHistoryTable[local_history_idx] << 1) | 1; +} + +inline +void +TournamentBP::updateLocalHistNotTaken(unsigned local_history_idx) +{ + localHistoryTable[local_history_idx] = + (localHistoryTable[local_history_idx] << 1); +} +#endif + +template +TheISA::ExitID +TournamentBP::lookup(BlockID blockID, BlockID oldest_blockID, + Addr &block_addr, int addr_space_ID, ThreadID tid) +{ + ExitID local_prediction; + uint32_t local_history_idx; + uint32_t local_history; + uint32_t local_predictor_idx; + + uint32_t global_history; + uint32_t global_predictor_idx; + ExitID global_prediction; + + uint32_t choice_history; + uint32_t choice_predictor_idx; + bool choice_prediction; + ExitID prediction; + + uint32_t global_history_new; + uint32_t choice_history_new; + LFFEntry lff_entry_new; + //Get local history. + local_history_idx = calcLocHistIdx(block_addr,addr_space_ID); + local_history = localHistoryTable[local_history_idx] + & localPredictorMask; + + if(blockID != oldest_blockID && oldest_blockID != 0) { + BlockID tmp_bid = blockID; // latest block + do { + LFFEntry lff_entry; + tmp_bid = (tmp_bid - 1) & 0x7; + lff_entry = localFutureFile[tmp_bid]; + if (lff_entry.index == local_history_idx) + { + local_history = lff_entry.history; + break; + } + + }while (tmp_bid != ( oldest_blockID & 0x7 ) ); + + } + + //Lookup local predication + local_predictor_idx = calcLocPredIdx(block_addr,local_history); + local_prediction = localCtrs[local_predictor_idx].exitID; + + //Get global history + global_history = globalHistory[tid] & globalHistoryMask; + global_predictor_idx = calcGloPredIdx(block_addr,global_history); + + //Lookup in the global predictor to get its branch prediction + global_prediction = globalCtrs[global_predictor_idx].exitID; + + //Get choice history + choice_history = choiceHistory[tid] & choiceHistoryMask; + choice_predictor_idx = calcChoiPredIdx(block_addr,choice_history); + //Lookup in the choice predictor to see which one to use + choice_prediction = choiceCtrs[choice_predictor_idx].read() > threshold; + + // Final prediction + prediction= choice_prediction ? global_prediction : local_prediction; + DPRINTF(EdgePredUnit,"BP: global prediction = %i, local_prediction = %i for %#x.\n", + global_prediction,local_prediction,block_addr); + // Speculative update + uint32_t index = blockID&(Impl::MaxInFlightBlockNum -1); + globalHistoryFile[index] = global_history; + global_history_new = ((global_history << 2) | (prediction & 3) ) & globalHistoryMask ; + globalHistory[tid] = global_history_new; + choiceHistoryFile[index] = choice_history; + choice_history_new = ((choice_history << 3) | (prediction & 7) ) & choiceHistoryMask ; + choiceHistory[tid] = choice_history_new; + lff_entry_new.index = local_history_idx; + lff_entry_new.history = ((local_history << 2) | (prediction & 3) ) & localHistoryMask; + localFutureFile[index] = lff_entry_new; + return prediction; + } + +template +void +TournamentBP::update(BlockID blockID, Addr &block_addr, + int addr_space_id,ExitID actual_exitID,ThreadID tid) +{ + uint32_t local_history_idx; + uint32_t local_predictor_hist; + uint32_t global_predictor_hist; + uint32_t choice_predictor_hist; + uint32_t local_predictor_idx; + uint32_t global_predictor_idx; + uint32_t choice_predictor_idx; + uint32_t index = blockID &(Impl::MaxInFlightBlockNum -1); + PredictionTableEntry local_predictor_entry, local_predictor_entry_new; + PredictionTableEntry global_predictor_entry, global_predictor_entry_new; + // Read L1 tables of exit predictor + assert(actual_exitID >= 0 && actual_exitID <= 7); + local_history_idx = calcLocHistIdx(block_addr,addr_space_id); + local_predictor_hist = localHistoryTable[local_history_idx]; + global_predictor_hist = globalHistoryFile[index]; + choice_predictor_hist = choiceHistoryFile[index]; + + // Read and update L2 tables + local_predictor_idx = calcLocPredIdx(block_addr, local_predictor_hist); + local_predictor_entry = localCtrs[local_predictor_idx]; + local_predictor_entry_new.exitID = local_predictor_entry.hysteresis ? + local_predictor_entry.exitID : actual_exitID ; + local_predictor_entry_new.hysteresis = (local_predictor_entry.exitID == actual_exitID); + localCtrs[local_predictor_idx] = local_predictor_entry_new; + + global_predictor_idx = calcGloPredIdx(block_addr, global_predictor_hist); + global_predictor_entry = globalCtrs[global_predictor_idx]; + global_predictor_entry_new.exitID = global_predictor_entry.hysteresis ? + global_predictor_entry.exitID : actual_exitID ; + global_predictor_entry_new.hysteresis = (global_predictor_entry.exitID == actual_exitID); + globalCtrs[global_predictor_idx] = global_predictor_entry_new; + DPRINTF(EdgePredUnit,"BPUpdate: exitID = %i ,global predictor = %i(%i), " + "and local predictor = %i(%i).\n", actual_exitID, + global_predictor_entry_new.exitID,global_predictor_entry.exitID, + local_predictor_entry_new.exitID,local_predictor_entry.exitID); + choice_predictor_idx = calcChoiPredIdx(block_addr, choice_predictor_hist); + if(local_predictor_entry.exitID != global_predictor_entry.exitID) { + if(local_predictor_entry.exitID == actual_exitID) { + choiceCtrs[choice_predictor_idx].decrement(); + } else if(global_predictor_entry.exitID == actual_exitID) { + choiceCtrs[choice_predictor_idx].increment(); + } + } +} + +template +void +TournamentBP::squash(BlockID blockID,ExitID exitID,ThreadID tid) +{ + unsigned global_history; + unsigned choice_history; + unsigned index = blockID&(Impl::MaxInFlightBlockNum -1) ; + LFFEntry oldLFFEntry, newLFFEntry; + oldLFFEntry = localFutureFile[index]; + newLFFEntry.index = oldLFFEntry.index; + newLFFEntry.history = ((oldLFFEntry.history &(~3)) | (exitID & 3)) & localHistoryMask; + localFutureFile[index] = newLFFEntry ; + // Restore global history to state prior to this branch. + global_history = globalHistoryFile[index]; + globalHistory[tid] = ((global_history << 2) | (exitID & 3) ) & globalHistoryMask ; + choice_history = choiceHistoryFile[index]; + choiceHistory[tid] = ((choice_history << 3) | (exitID & 7) ) & choiceHistoryMask ; + DPRINTF(EdgePredUnit,"Squashing BP to Block [bid:%i].\n",blockID); + // Delete this BPHistory now that we're done with it. +} + +template +void +TournamentBP::squash(BlockID blockID,ThreadID tid) +{ + unsigned index = blockID&(Impl::MaxInFlightBlockNum -1); + unsigned global_history; + unsigned choice_history; + global_history = globalHistoryFile[index]; + globalHistory[tid] = global_history; + choice_history = choiceHistoryFile[index]; + choiceHistory[tid] = choice_history; +} + +template class TournamentBP; + diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/rob.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/rob.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,329 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#ifndef __CPU_EDGE_EdgeROB_HH__ +#define __CPU_EDGE_EdgeROB_HH__ + +#include +#include +#include + +#include "config/the_isa.hh" + +/** + * EdgeROB class. The EdgeROB is largely what drives squashing. + */ +template +class EdgeROB +{ + protected: + typedef TheISA::RegIndex RegIndex; + public: + //Typedefs from the Impl. + typedef typename Impl::CPU CPU; + typedef typename Impl::DynInstPtr DynInstPtr; + typedef typename Impl::EdgeBlockPtr BlockPtr; + + //typedef std::pair UnmapInfo; + typedef typename std::list::iterator InstIt; + typedef typename std::list::iterator InstBlockIt; + + /** Possible EdgeROB statuses. */ + enum Status { + Running, + Idle, + EdgeROBSquashing + }; + + /** SMT EdgeROB Sharing Policy */ + enum EdgeROBPolicy{ + Dynamic, + Partitioned, + Threshold + }; + + private: + /** Per-thread EdgeROB status. */ + Status robStatus[Impl::MaxThreads]; + + /** EdgeROB resource sharing policy for SMT mode. */ + EdgeROBPolicy robPolicy; + + public: + /** EdgeROB constructor. + * @param _numEntries Number of entries in EdgeROB. + * @param _squashWidth Number of instructions that can be squashed in a + * single cycle. + * @param _smtEdgeROBPolicy EdgeROB Partitioning Scheme for SMT. + * @param _smtEdgeROBThreshold Max Resources(by %) a thread can have in the EdgeROB. + * @param _numThreads The number of active threads. + */ + EdgeROB(CPU *_cpu, unsigned _numEntries, unsigned _squashWidth, + std::string smtEdgeROBPolicy, unsigned _smtEdgeROBThreshold, + ThreadID _numThreads); + + std::string name() const; + + /** Sets pointer to the list of active threads. + * @param at_ptr Pointer to the list of active threads. + */ + void setActiveThreads(std::list *at_ptr); + + /** Switches out the EdgeROB. */ + void switchOut(); + + /** Takes over another CPU's thread. */ + void takeOverFrom(); + + /** Function to insert an instruction into the EdgeROB. Note that whatever + * calls this function must ensure that there is enough space within the + * EdgeROB for the new instruction. + * @param inst The instruction being inserted into the EdgeROB. + */ + void insertInstBlock(BlockPtr &inst_block); + + /** Returns pointer to the head instruction within the EdgeROB. There is + * no guarantee as to the return value if the EdgeROB is empty. + * @retval Pointer to the DynInst that is at the head of the EdgeROB. + */ +// DynInstPtr readHeadInst(); + + /** Returns a pointer to the head instruction of a specific thread within + * the EdgeROB. + * @return Pointer to the DynInst that is at the head of the EdgeROB. + */ + BlockPtr readHeadInstBlock(ThreadID tid); + + /** + * Returns the block ID of head inst block. + */ + TheISA::BlockID readHeadInstBlockID(ThreadID tid); + + /** Returns pointer to the tail instruction within the EdgeROB. There is + * no guarantee as to the return value if the EdgeROB is empty. + * @retval Pointer to the DynInst that is at the tail of the EdgeROB. + */ +// DynInstPtr readTailInst(); + + /** Returns a pointer to the tail instruction of a specific thread within + * the EdgeROB. + * @return Pointer to the DynInst that is at the tail of the EdgeROB. + */ + BlockPtr readTailInstBlock(ThreadID tid); + + /** Retires the head instruction, removing it from the EdgeROB. */ +// void retireHead(); + + /** Retires the head instruction of a specific thread, removing it from the + * EdgeROB. + */ + void retireHead(ThreadID tid); + + /** Is the oldest instruction across all threads ready. */ +// bool isHeadReady(); + + /** Is the oldest instruction across a particular thread ready. */ + bool isHeadReady(ThreadID tid); + + /** Is there any commitable head instruction across all threads ready. */ + bool canCommit(); + + /** Re-adjust EdgeROB partitioning. */ + void resetEntries(); + + /** Number of entries needed For 'num_threads' amount of threads. */ + int entryAmount(ThreadID num_threads); + + /** Returns the number of total free entries in the EdgeROB. */ + unsigned numFreeEntries(); + + /** Returns the number of free entries in a specific EdgeROB paritition. */ + unsigned numFreeEntries(ThreadID tid); + + /** Returns the maximum number of entries for a specific thread. */ + unsigned getMaxEntries(ThreadID tid) + { return maxEntries[tid]; } + + /** Returns the number of entries being used by a specific thread. */ + unsigned getThreadEntries(ThreadID tid) + { return threadEntries[tid]; } + + /** Returns if the EdgeROB is full. */ + bool isFull() + { return numInstBlocksInEdgeROB == numEntries; } + + /** Returns if a specific thread's partition is full. */ + bool isFull(ThreadID tid) + { return threadEntries[tid] == numEntries; } + + /** Returns if the EdgeROB is empty. */ + bool isEmpty() + { return numInstBlocksInEdgeROB == 0; } + + /** Returns if a specific thread's partition is empty. */ + bool isEmpty(ThreadID tid) + { return threadEntries[tid] == 0; } + + /** Executes the squash, marking squashed instructions. */ + void doSquash(ThreadID tid); + + /** Squashes all instructions younger than the given sequence number for + * the specific thread. + */ + void squash(TheISA::BlockID squash_num, ThreadID tid); + + /** Updates the head instruction with the new oldest instruction. */ + void updateHead(); + + /** Updates the tail instruction with the new youngest instruction. */ + void updateTail(); + + /** Reads the PC of the oldest head instruction. */ +// uint64_t readHeadPC(); + + /** Reads the PC of the head instruction of a specific thread. */ +// uint64_t readHeadPC(ThreadID tid); + + /** Reads the next PC of the oldest head instruction. */ +// uint64_t readHeadNextPC(); + + /** Reads the next PC of the head instruction of a specific thread. */ +// uint64_t readHeadNextPC(ThreadID tid); + + /** Reads the sequence number of the oldest head instruction. */ +// InstSeqNum readHeadSeqNum(); + + /** Reads the sequence number of the head instruction of a specific thread. + */ +// InstSeqNum readHeadSeqNum(ThreadID tid); + + /** Reads the PC of the youngest tail instruction. */ +// uint64_t readTailPC(); + + /** Reads the PC of the tail instruction of a specific thread. */ +// uint64_t readTailPC(ThreadID tid); + + /** Reads the sequence number of the youngest tail instruction. */ +// InstSeqNum readTailSeqNum(); + + /** Reads the sequence number of tail instruction of a specific thread. */ +// InstSeqNum readTailSeqNum(ThreadID tid); + + /** Checks if the EdgeROB is still in the process of squashing instructions. + * @retval Whether or not the EdgeROB is done squashing. + */ + bool isDoneSquashing(ThreadID tid) const + { return doneSquashing[tid]; } + + /** Checks if the EdgeROB is still in the process of squashing instructions for + * any thread. + */ + bool isDoneSquashing(); + + /** This is more of a debugging function than anything. Use + * numInstBlocksInEdgeROB to get the instructions in the EdgeROB unless you are + * double checking that variable. + */ + int countInstBlocks(); + + /** This is more of a debugging function than anything. Use + * threadEntries to get the instructions in the EdgeROB unless you are + * double checking that variable. + */ + int countInstBlocks(ThreadID tid); + + private: + /** Pointer to the CPU. */ + CPU *cpu; + + /** Active Threads in CPU */ + std::list *activeThreads; + + /** Number of instructions in the EdgeROB. */ + unsigned numEntries; + + /** Entries Per Thread */ + unsigned threadEntries[Impl::MaxThreads]; + + /** Max Insts a Thread Can Have in the EdgeROB */ + unsigned maxEntries[Impl::MaxThreads]; + + /** EdgeROB List of Instructions */ + //std::list instList[Impl::MaxThreads]; + std::list instBlockList[Impl::MaxThreads]; + + /** Number of instructions that can be squashed in a single cycle. */ + unsigned squashWidth; + + public: + /** Iterator pointing to the instruction which is the last instruction + * in the EdgeROB. This may at times be invalid (ie when the EdgeROB is empty), + * however it should never be incorrect. + */ + InstBlockIt tail; + + /** Iterator pointing to the instruction which is the first instruction in + * in the EdgeROB*/ + InstBlockIt head; + + private: + /** Iterator used for walking through the list of instructions when + * squashing. Used so that there is persistent state between cycles; + * when squashing, the instructions are marked as squashed but not + * immediately removed, meaning the tail iterator remains the same before + * and after a squash. + * This will always be set to cpu->instList.end() if it is invalid. + */ + InstBlockIt squashIt[Impl::MaxThreads]; + + public: + /** Number of instructions in the EdgeROB. */ + int numInstBlocksInEdgeROB; + + /** Dummy instruction returned if there are no insts left. */ + BlockPtr dummyInstBlock; + + private: + /** The sequence number of the squashed instruction. */ + TheISA::BlockID squashedSeqNum[Impl::MaxThreads]; + + /** Is the EdgeROB done squashing. */ + bool doneSquashing[Impl::MaxThreads]; + + /** Number of active threads. */ + ThreadID numThreads; +}; + +#endif //__CPU_EDGE_ROB_HH__ diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/rob.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/rob.cc Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,40 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#include "cpu/edge/isa_specific.hh" +#include "cpu/edge/rob_impl.hh" + +// Force instantiation of InstructionQueue. +template class EdgeROB; diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/rob_impl.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/rob_impl.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,723 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#include + +#include "config/full_system.hh" +#include "cpu/edge/rob.hh" + +using namespace std; + +template +EdgeROB::EdgeROB(CPU *_cpu, unsigned _numEntries, unsigned _squashWidth, + std::string _smtEdgeROBPolicy, unsigned _smtEdgeROBThreshold, + ThreadID _numThreads) + : cpu(_cpu), + numEntries(_numEntries), + squashWidth(_squashWidth), + numInstBlocksInEdgeROB(0), + numThreads(_numThreads) +{ + for (ThreadID tid = 0; tid < numThreads; tid++) { + squashedSeqNum[tid] = 0; + doneSquashing[tid] = true; + threadEntries[tid] = 0; + } + + std::string policy = _smtEdgeROBPolicy; + + //Convert string to lowercase + std::transform(policy.begin(), policy.end(), policy.begin(), + (int(*)(int)) tolower); + + //Figure out rob policy + if (policy == "dynamic") { + robPolicy = Dynamic; + + //Set Max Entries to Total EdgeROB Capacity + for (ThreadID tid = 0; tid < numThreads; tid++) { + maxEntries[tid] = numEntries; + } + + } else if (policy == "partitioned") { + robPolicy = Partitioned; + DPRINTF(EdgeROB, "EdgeROB sharing policy set to Partitioned\n"); + + //@todo:make work if part_amt doesnt divide evenly. + int part_amt = numEntries / numThreads; + + //Divide EdgeROB up evenly + for (ThreadID tid = 0; tid < numThreads; tid++) { + maxEntries[tid] = part_amt; + } + DPRINTF(EdgeROB, "%i entries for each thread in ROB\n", part_amt); + + } else if (policy == "threshold") { + robPolicy = Threshold; + DPRINTF(EdgeROB, "EdgeROB sharing policy set to Threshold\n"); + + int threshold = _smtEdgeROBThreshold;; + + //Divide up by threshold amount + for (ThreadID tid = 0; tid < numThreads; tid++) { + maxEntries[tid] = threshold; + } + } else { + assert(0 && "Invalid EdgeROB Sharing Policy.Options Are:{Dynamic," + "Partitioned, Threshold}"); + } + + // Set the per-thread iterators to the end of the instruction list. + for (ThreadID tid = 0; tid < numThreads; tid++) { + squashIt[tid] = instBlockList[tid].end(); + } + + // Initialize the "universal" EdgeROB head & tail point to invalid + // pointers + head = instBlockList[0].end(); + tail = instBlockList[0].end(); +} + +template +std::string +EdgeROB::name() const +{ + return cpu->name() + ".rob"; +} + +template +void +EdgeROB::setActiveThreads(list *at_ptr) +{ + DPRINTF(EdgeROB, "Setting active threads list pointer.\n"); + activeThreads = at_ptr; +} + +template +void +EdgeROB::switchOut() +{ + for (ThreadID tid = 0; tid < numThreads; tid++) { + instBlockList[tid].clear(); + } +} + +template +void +EdgeROB::takeOverFrom() +{ + for (ThreadID tid = 0; tid < numThreads; tid++) { + doneSquashing[tid] = true; + threadEntries[tid] = 0; + squashIt[tid] = instBlockList[tid].end(); + } + numInstBlocksInEdgeROB = 0; + + // Initialize the "universal" EdgeROB head & tail point to invalid + // pointers + head = instBlockList[0].end(); + tail = instBlockList[0].end(); +} + +template +void +EdgeROB::resetEntries() +{ + if (robPolicy != Dynamic || numThreads > 1) { + int active_threads = activeThreads->size(); + + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + if (robPolicy == Partitioned) { + maxEntries[tid] = numEntries / active_threads; + } else if (robPolicy == Threshold && active_threads == 1) { + maxEntries[tid] = numEntries; + } + } + } +} + +template +int +EdgeROB::entryAmount(ThreadID num_threads) +{ + if (robPolicy == Partitioned) { + return numEntries / num_threads; + } else { + return 0; + } +} + +template +int +EdgeROB::countInstBlocks() +{ + int total = 0; + + for (ThreadID tid = 0; tid < numThreads; tid++) + total += countInstBlocks(tid); + + return total; +} + +template +int +EdgeROB::countInstBlocks(ThreadID tid) +{ + return instBlockList[tid].size(); +} + +template +void +EdgeROB::insertInstBlock(BlockPtr &inst_block) +{ + //assert(numInstBlocksInEdgeROB == countInsts()); + assert(inst_block); + + DPRINTF(EdgeROB, "Adding inst block PC %#x to the EdgeROB.\n", + inst_block->getStartPC()); + + assert(numInstBlocksInEdgeROB != numEntries); + + ThreadID tid = inst_block->getTid(); + + instBlockList[tid].push_back(inst_block); + + //Set Up head iterator if this is the 1st inst block in the EdgeROB + if (numInstBlocksInEdgeROB == 0) { + // Set the inst block as Non-Speculative + inst_block->setNonSpeculative(); + + head = instBlockList[tid].begin(); + assert((*head) == inst_block); + } + + //Must Decrement for iterator to actually be valid since __.end() + //actually points to 1 after the last inst + tail = instBlockList[tid].end(); + tail--; + + inst_block->setInEdgeROB(); + + ++numInstBlocksInEdgeROB; + ++threadEntries[tid]; + + assert((*tail) == inst_block); + + DPRINTF(EdgeROB, "[tid:%i] Now has %d inst blocks.\n", tid, threadEntries[tid]); +} + +// Whatever calls this function needs to ensure that it properly frees up +// registers prior to this function. +/* +template +void +EdgeROB::retireHead() +{ + //assert(numInstBlocksInEdgeROB == countInsts()); + assert(numInstBlocksInEdgeROB > 0); + + ThreadID tid = (*head)->threadNumber; + + retireHead(tid); + + if (numInstBlocksInEdgeROB == 0) { + tail = instList[tid].end(); + } +} +*/ + +template +void +EdgeROB::retireHead(ThreadID tid) +{ + //assert(numInstBlocksInEdgeROB == countInsts()); + assert(numInstBlocksInEdgeROB > 0); + + // Get the head EdgeROB instruction. + InstBlockIt head_it = instBlockList[tid].begin(); + + BlockPtr head_inst_block = (*head_it); + + assert(head_inst_block->readyToCommit()); + + DPRINTF(EdgeROB, "[tid:%u]: Retiring head instruction block, " + "PC %#x,[id:%lli]\n", tid, head_inst_block->getStartPC(), + head_inst_block->getBlockID()); + + --numInstBlocksInEdgeROB; + --threadEntries[tid]; + + head_inst_block->clearInEdgeROB(); + head_inst_block->setCommitted(); + + instBlockList[tid].erase(head_it); + + //Update "Global" Head of EdgeROB + updateHead(); + + // @todo: A special case is needed if the inst block being + // retired is the only inst block in the EdgeROB; otherwise the tail + // iterator will become invalidated. + cpu->removeFrontBlock(head_inst_block); +} +/* +template +bool +EdgeROB::isHeadReady() +{ + if (numInstBlocksInEdgeROB != 0) { + return (*head)->readyToCommit(); + } + + return false; +} +*/ +template +bool +EdgeROB::isHeadReady(ThreadID tid) +{ + if (threadEntries[tid] != 0) { + return (instBlockList[tid].front())->readyToCommit(); + } + + return false; +} + +template +bool +EdgeROB::canCommit() +{ + //@todo: set ActiveThreads through EdgeROB or CPU + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + if (isHeadReady(tid)) { + return true; + } + } + + return false; +} + +template +unsigned +EdgeROB::numFreeEntries() +{ + //assert(numInstBlocksInEdgeROB == countInsts()); + + return numEntries - numInstBlocksInEdgeROB; +} + +template +unsigned +EdgeROB::numFreeEntries(ThreadID tid) +{ + return maxEntries[tid] - threadEntries[tid]; +} + +template +void +EdgeROB::doSquash(ThreadID tid) +{ + DPRINTF(EdgeROB, "[tid:%u]: Squashing inst blocks until [sn:%i].\n", + tid, squashedSeqNum[tid]); + + assert(squashIt[tid] != instBlockList[tid].end()); + + if ((*squashIt[tid])->getBlockID() < squashedSeqNum[tid]) { + DPRINTF(EdgeROB, "[tid:%u]: Done squashing inst blocks.\n", + tid); + + squashIt[tid] = instBlockList[tid].end(); + + doneSquashing[tid] = true; + return; + } + + bool robTailUpdate = false; + + for (int numSquashed = 0; + numSquashed < squashWidth && + squashIt[tid] != instBlockList[tid].end() && + (*squashIt[tid])->getBlockID() > squashedSeqNum[tid]; + ++numSquashed) + { + DPRINTF(EdgeROB, "[tid:%u]: Squashing inst blocks PC %#x, Bid: %i.\n", + (*squashIt[tid])->getTid(), + (*squashIt[tid])->getStartPC(), + (*squashIt[tid])->getBlockID()); + + // Mark the instruction as squashed, and ready to commit so that + // it can drain out of the pipeline. + (*squashIt[tid])->setSquashed(); + + (*squashIt[tid])->setCanCommit(); + + + if (squashIt[tid] == instBlockList[tid].begin()) { + DPRINTF(EdgeROB, "Reached head of instruction list while " + "squashing.\n"); + + squashIt[tid] = instBlockList[tid].end(); + + doneSquashing[tid] = true; + + return; + } + + InstBlockIt tail_thread = instBlockList[tid].end(); + tail_thread--; + + if ((*squashIt[tid]) == (*tail_thread)) + robTailUpdate = true; + + squashIt[tid]--; + } + + + // Check if EdgeROB is done squashing. + if ((*squashIt[tid])->getBlockID() <= squashedSeqNum[tid]) { + DPRINTF(EdgeROB, "[tid:%u]: Done squashing inst blocks.\n", + tid); + + squashIt[tid] = instBlockList[tid].end(); + + doneSquashing[tid] = true; + } + + if (robTailUpdate) { + updateTail(); + } +} + + +template +void +EdgeROB::updateHead() +{ + //BlockPtr head_inst_block; + TheISA::BlockID lowest_num = 0; + bool first_valid = true; + + // @todo: set ActiveThreads through EdgeROB or CPU + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + if (instBlockList[tid].empty()) + continue; + + if (first_valid) { + head = instBlockList[tid].begin(); + lowest_num = (*head)->getBlockID(); + first_valid = false; + continue; + } + + InstBlockIt head_thread = instBlockList[tid].begin(); + + BlockPtr head_inst_block = (*head_thread); + + assert(head_inst_block != 0); + + if (head_inst_block->getBlockID() < lowest_num) { + head = head_thread; + lowest_num = head_inst_block->getBlockID(); + } + } + + if (first_valid) { + head = instBlockList[0].end(); + } else { // If there's head valid, mark it as non-speculative. + (*head)->setNonSpeculative(); + } + +} + +template +void +EdgeROB::updateTail() +{ + tail = instBlockList[0].end(); + bool first_valid = true; + + list::iterator threads = activeThreads->begin(); + list::iterator end = activeThreads->end(); + + while (threads != end) { + ThreadID tid = *threads++; + + if (instBlockList[tid].empty()) { + continue; + } + + // If this is the first valid then assign w/out + // comparison + if (first_valid) { + tail = instBlockList[tid].end(); + tail--; + first_valid = false; + continue; + } + + // Assign new tail if this thread's tail is younger + // than our current "tail high" + InstBlockIt tail_thread = instBlockList[tid].end(); + tail_thread--; + + if ((*tail_thread)->getBlockID() > (*tail)->getBlockID()) { + tail = tail_thread; + } + } +} + + +template +void +EdgeROB::squash(TheISA::BlockID squash_num, ThreadID tid) +{ + if (isEmpty()) { + DPRINTF(EdgeROB, "Does not need to squash due to being empty " + "[sn:%i]\n", + squash_num); + + return; + } + + DPRINTF(EdgeROB, "Starting to squash within the EdgeROB.\n"); + + robStatus[tid] = EdgeROBSquashing; + + doneSquashing[tid] = false; + + squashedSeqNum[tid] = squash_num; + + if (!instBlockList[tid].empty()) { + InstBlockIt tail_thread = instBlockList[tid].end(); + tail_thread--; + + squashIt[tid] = tail_thread; + + doSquash(tid); + } +} +/* +template +typename Impl::DynInstPtr +EdgeROB::readHeadInst() +{ + if (numInstBlocksInEdgeROB != 0) { + assert((*head)->isInEdgeROB()==true); + return *head; + } else { + return dummyInst; + } +} +*/ + +template +typename Impl::EdgeBlockPtr +EdgeROB::readHeadInstBlock(ThreadID tid) +{ + if (threadEntries[tid] != 0) { + InstBlockIt head_thread = instBlockList[tid].begin(); + + assert((*head_thread)->isInEdgeROB()==true); + + return *head_thread; + } else { + return dummyInstBlock; + } +} + +template +TheISA::BlockID +EdgeROB::readHeadInstBlockID(ThreadID tid) +{ + if (threadEntries[tid] != 0) { + InstBlockIt head_thread = instBlockList[tid].begin(); + + assert((*head_thread)->isInEdgeROB()==true); + + return (*head_thread)->getBlockID(); + } else { + return 0; + } +} + +/* +template +uint64_t +EdgeROB::readHeadPC() +{ + //assert(numInstBlocksInEdgeROB == countInsts()); + + DynInstPtr head_inst = *head; + + return head_inst->readPC(); +} + +template +uint64_t +EdgeROB::readHeadPC(ThreadID tid) +{ + //assert(numInstBlocksInEdgeROB == countInsts()); + InstIt head_thread = instList[tid].begin(); + + return (*head_thread)->readPC(); +} + + +template +uint64_t +EdgeROB::readHeadNextPC() +{ + //assert(numInstBlocksInEdgeROB == countInsts()); + + DynInstPtr head_inst = *head; + + return head_inst->readNextPC(); +} + +template +uint64_t +EdgeROB::readHeadNextPC(ThreadID tid) +{ + //assert(numInstBlocksInEdgeROB == countInsts()); + InstIt head_thread = instList[tid].begin(); + + return (*head_thread)->readNextPC(); +} + +template +InstSeqNum +EdgeROB::readHeadSeqNum() +{ + //assert(numInstBlocksInEdgeROB == countInsts()); + DynInstPtr head_inst = *head; + + return head_inst->seqNum; +} + +template +InstSeqNum +EdgeROB::readHeadSeqNum(ThreadID tid) +{ + InstIt head_thread = instList[tid].begin(); + + return ((*head_thread)->seqNum); +} + +template +typename Impl::DynInstPtr +EdgeROB::readTailInst() +{ + //assert(numInstBlocksInEdgeROB == countInsts()); + //assert(tail != instList[0].end()); + + return (*tail); +} +*/ +template +typename Impl::EdgeBlockPtr +EdgeROB::readTailInstBlock(ThreadID tid) +{ + //assert(tail_thread[tid] != instList[tid].end()); + + InstBlockIt tail_thread = instBlockList[tid].end(); + tail_thread--; + + return *tail_thread; +} + +/* +template +uint64_t +EdgeROB::readTailPC() +{ + //assert(numInstBlocksInEdgeROB == countInsts()); + + //assert(tail != instList[0].end()); + + return (*tail)->readPC(); +} + +template +uint64_t +EdgeROB::readTailPC(ThreadID tid) +{ + //assert(tail_thread[tid] != instList[tid].end()); + + InstIt tail_thread = instList[tid].end(); + tail_thread--; + + return (*tail_thread)->readPC(); +} + +template +InstSeqNum +EdgeROB::readTailSeqNum() +{ + // Return the last sequence number that has not been squashed. Other + // stages can use it to squash any instructions younger than the current + // tail. + return (*tail)->seqNum; +} + +template +InstSeqNum +EdgeROB::readTailSeqNum(ThreadID tid) +{ + // Return the last sequence number that has not been squashed. Other + // stages can use it to squash any instructions younger than the current + // tail. + // assert(tail_thread[tid] != instList[tid].end()); + + InstIt tail_thread = instList[tid].end(); + tail_thread--; + + return (*tail_thread)->seqNum; +} +*/ diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/sat_counter.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/sat_counter.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,116 @@ +/* + * Copyright (c) 2005-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#ifndef __CPU_EDGE_SAT_COUNTER_HH__ +#define __CPU_EDGE_SAT_COUNTER_HH__ + +#include "base/misc.hh" +#include "base/types.hh" + +/** + * Private counter class for the internal saturating counters. + * Implements an n bit saturating counter and provides methods to + * increment, decrement, and read it. + * @todo Consider making this something that more closely mimics a + * built in class so you can use ++ or --. + */ +class SatCounter +{ + public: + /** + * Constructor for the counter. + */ + SatCounter() + : initialVal(0), counter(0) + { } + + /** + * Constructor for the counter. + * @param bits How many bits the counter will have. + */ + SatCounter(unsigned bits) + : initialVal(0), maxVal((1 << bits) - 1), counter(0) + { } + + /** + * Constructor for the counter. + * @param bits How many bits the counter will have. + * @param initial_val Starting value for each counter. + */ + SatCounter(unsigned bits, uint8_t initial_val) + : initialVal(initialVal), maxVal((1 << bits) - 1), counter(initial_val) + { + // Check to make sure initial value doesn't exceed the max + // counter value. + if (initial_val > maxVal) { + fatal("BP: Initial counter value exceeds max size."); + } + } + + /** + * Sets the number of bits. + */ + void setBits(unsigned bits) { maxVal = (1 << bits) - 1; } + + void reset() { counter = initialVal; } + + /** + * Increments the counter's current value. + */ + void increment() + { + if (counter < maxVal) { + ++counter; + } + } + + /** + * Decrements the counter's current value. + */ + void decrement() + { + if (counter > 0) { + --counter; + } + } + + /** + * Read the counter's value. + */ + const uint8_t read() const + { return counter; } + + private: + uint8_t initialVal; + uint8_t maxVal; + uint8_t counter; +}; + +#endif // __CPU_EDGE_SAT_COUNTER_HH__ diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/sat_counter.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/sat_counter.cc Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#include "base/misc.hh" +#include "cpu/edge/sat_counter.hh" + +SatCounter::SatCounter() + : initialVal(0), counter(0) +{ +} + +SatCounter::SatCounter(unsigned bits) + : initialVal(0), maxVal((1 << bits) - 1), counter(0) +{ +} + +SatCounter::SatCounter(unsigned bits, uint8_t initial_val) + : initialVal(initialVal), maxVal((1 << bits) - 1), counter(initial_val) +{ + // Check to make sure initial value doesn't exceed the max counter value. + if (initial_val > maxVal) { + fatal("BP: Initial counter value exceeds max size."); + } +} + +void +SatCounter::setBits(unsigned bits) +{ + maxVal = (1 << bits) - 1; +} diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/static_inst.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/static_inst.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,462 @@ +/* + * Copyright (c) 2003-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Date: Nov. 2009 + * Author: Gou Pengfei + */ + +#ifndef __EDGECPU_STATIC_INST_HH__ +#define __EDGECPU_STATIC_INST_HH__ + +#include +#include + +#include "cpu/static_inst.hh" + +class EDGEAddrDecodePage; + +namespace Trace { + class EdgeInstRecord; +} +/** + * + * Author: Gou Pengfei + * Date: Dec. 2009 + * + * EDGE Static Inst class + * This will only hold ISA related infos like operation class, target id, lsid, + * and any infos encoded in ISA. + * Any thing related to the operation of this instruction will be put into + * EdgeDynInst class like operands and the interpretation of target id. + */ +class EDGEStaticInst : public StaticInstBase +{ + public: + + /// Binary machine instruction type. + typedef TheISA::MachInst MachInst; + /// Binary extended machine instruction type. + typedef TheISA::ExtMachInst ExtMachInst; + /// RegIndex + typedef TheISA::RegIndex RegIndex; + + /// Consumer ID type + typedef TheISA::ConsumerID ConsumerID; + /// LSID type + typedef TheISA::LsID LsID; + /// EXIT type + typedef TheISA::ExitID ExitID; + typedef TheISA::Predication Predication; + + /// Max number of consumers and operands for one edge inst. + enum { + MaxInstConsumers = TheISA::MaxInstConsumers, + MaxInstOperands = TheISA::MaxInstOperands, + MaxInstSrcRegs = TheISA::MaxInstSrcRegs, + MaxInstDestRegs = TheISA::MaxInstDestRegs + }; + + /// Return logical index (architectural reg num) of i'th destination reg. + /// Only the entries from 0 through numDestRegs()-1 are valid. + RegIndex destRegIdx(int i) const { return _destRegIdx[i]; } + + /// Return logical index (architectural reg num) of i'th source reg. + /// Only the entries from 0 through numSrcRegs()-1 are valid. + RegIndex srcRegIdx(int i) const { return _srcRegIdx[i]; } + + ///Return the num of consumers of this instruction + uint32_t getNumConsumers() const{ return _numConsumers; } + + uint32_t getNumOperands() const { return _numOperands; } + + /// Return the ID of a consumer of this instruction + ConsumerID getRawConsumerID(int i) const { return _consumerID[i]; } + /// Return LSID only when this is memory refenrence instruction + LsID getLSID() const { assert(isMemRef()); return _lsID;} + /// Return EXIT ID only when this is PC-control instruction + ExitID getEXIT() const { assert(isControl()); return _exitID;} + Predication getPredication() const { return _predication; } + + /// Pointer to a statically allocated "null" instruction object. + /// Used to give eaCompInst() and memAccInst() something to return + /// when called on non-memory instructions. + static EDGEStaticInstPtr nullEDGEStaticInstPtr; + + /** + * Memory references only: returns "fake" instruction representing + * the effective address part of the memory operation. Used to + * obtain the dependence info (numSrcRegs and srcRegIdx[]) for + * just the EA computation. + */ + virtual const + EDGEStaticInstPtr &eaCompInst() const { return nullEDGEStaticInstPtr; } + + /** + * Memory references only: returns "fake" instruction representing + * the memory access part of the memory operation. Used to + * obtain the dependence info (numSrcRegs and srcRegIdx[]) for + * just the memory access (not the EA computation). + */ + virtual const + EDGEStaticInstPtr &memAccInst() const { return nullEDGEStaticInstPtr; } + + /// The binary machine instruction. + const ExtMachInst machInst; + + protected: + + /// LSID for L/S instructions + LsID _lsID; + /// Exit ID for branch instructions + ExitID _exitID; + /// Predication indicator + Predication _predication; + /// Number of consumers of this instruction + uint32_t _numConsumers; + /// Number of operands of this instruction + uint32_t _numOperands; + + /// See consumerID() + ConsumerID _consumerID[MaxInstConsumers]; + + /// See destRegIdx(). + RegIndex _destRegIdx[MaxInstDestRegs]; + /// See srcRegIdx(). + RegIndex _srcRegIdx[MaxInstSrcRegs]; + + /** + * Base mnemonic (e.g., "add"). Used by generateDisassembly() + * methods. Also useful to readily identify instructions from + * within the debugger when #cachedDisassembly has not been + * initialized. + */ + const char *mnemonic; + + /** + * String representation of disassembly (lazily evaluated via + * disassemble()). + */ + mutable std::string *cachedDisassembly; + + /** + * Internal function to generate disassembly string. + */ + virtual std::string + generateDisassembly(Addr pc, const SymbolTable *symtab) const = 0; + + /// Constructor. + EDGEStaticInst(const char *_mnemonic, ExtMachInst _machInst, OpClass __opClass) + : StaticInstBase(__opClass), + machInst(_machInst), mnemonic(_mnemonic), cachedDisassembly(0) + { } + + public: + virtual ~EDGEStaticInst(); + +/** + * The execute() signatures are auto-generated by scons based on the + * set of EDGECPU models we are compiling in today. + */ +#include "cpu/static_inst_exec_sigs.hh" + + /** + * Return the target address for a PC-relative branch. + * Invalid if not a PC-relative branch (i.e. isDirectCtrl() + * should be true). + */ + virtual Addr branchTarget(Addr branchPC) const; + + /** + * Return the target address for an indirect branch (jump). The + * register value is read from the supplied thread context, so + * the result is valid only if the thread context is about to + * execute the branch in question. Invalid if not an indirect + * branch (i.e. isIndirectCtrl() should be true). + */ + virtual Addr branchTarget(ThreadContext *tc) const; + + /** + * Return true if the instruction is a control transfer, and if so, + * return the target address as well. + */ + bool hasBranchTarget(Addr pc, ThreadContext *tc, Addr &tgt) const; + + /** + * Return string representation of disassembled instruction. + * The default version of this function will call the internal + * virtual generateDisassembly() function to get the string, + * then cache it in #cachedDisassembly. If the disassembly + * should not be cached, this function should be overridden directly. + */ + virtual const std::string &disassemble(Addr pc, + const SymbolTable *symtab = 0) const; + + /// Decoded instruction cache type. + /// For now we're using a generic hash_map; this seems to work + /// pretty well. + typedef m5::hash_map DecodeCache; + + /// A cache of decoded instruction objects. + static DecodeCache decodeCache; + + /** + * Dump some basic stats on the decode cache hash map. + * Only gets called if DECODE_CACHE_HASH_STATS is defined. + */ + static void dumpDecodeCacheStats(); + + /// Decode a machine instruction. + /// @param mach_inst The binary instruction to decode. + /// @retval A pointer to the corresponding StaticInst object. + //This is defined as inlined below. + static EDGEStaticInstPtr decode(ExtMachInst mach_inst, Addr addr, + TheISA::BlockStatus blockstatus); + + /// Return name of machine instruction + std::string getName() { return mnemonic; } + + /// Decoded instruction cache type, for address decoding. + /// A generic hash_map is used. + typedef m5::hash_map AddrDecodeCache; + + /// A cache of decoded instruction objects from addresses. + static AddrDecodeCache addrDecodeCache; + + struct cacheElement + { + Addr page_addr; + EDGEAddrDecodePage *decodePage; + + cacheElement() : decodePage(NULL) { } + }; + + /// An array of recently decoded instructions. + // might not use an array if there is only two elements + static struct cacheElement recentDecodes[2]; + + /// Updates the recently decoded instructions entries + /// @param page_addr The page address recently used. + /// @param decodePage Pointer to decoding page containing the decoded + /// instruction. + static inline void + updateCache(Addr page_addr, EDGEAddrDecodePage *decodePage) + { + recentDecodes[1].page_addr = recentDecodes[0].page_addr; + recentDecodes[1].decodePage = recentDecodes[0].decodePage; + recentDecodes[0].page_addr = page_addr; + recentDecodes[0].decodePage = decodePage; + } + + /// Searches the decoded instruction cache for instruction decoding. + /// If it is not found, then we decode the instruction. + /// Otherwise, we get the instruction from the cache and move it into + /// the address-to-instruction decoding page. + /// @param mach_inst The binary instruction to decode. + /// @param addr The address that contained the binary instruction. + /// @param decodePage Pointer to decoding page containing the instruction. + /// @retval A pointer to the corresponding StaticInst object. + //This is defined as inlined below. + static EDGEStaticInstPtr searchCache(ExtMachInst mach_inst, Addr addr, + EDGEAddrDecodePage *decodePage, TheISA::BlockStatus blockstatus); +}; + + +/// Reference-counted pointer to a StaticInst object. +/// This type should be used instead of "StaticInst *" so that +/// StaticInst objects can be properly reference-counted. +class EDGEStaticInstPtr : public RefCountingPtr +{ + public: + /// Constructor. + EDGEStaticInstPtr() + : RefCountingPtr() + { + } + + /// Conversion from "StaticInst *". + EDGEStaticInstPtr(EDGEStaticInst *p) + : RefCountingPtr(p) + { + } + + /// Copy constructor. + EDGEStaticInstPtr(const EDGEStaticInstPtr &r) + : RefCountingPtr(r) + { + } + + /// Construct directly from machine instruction. + /// Calls StaticInst::decode(). + explicit EDGEStaticInstPtr(TheISA::ExtMachInst mach_inst, Addr addr, TheISA::BlockStatus blockstatus) + : RefCountingPtr(EDGEStaticInst::decode(mach_inst, addr, blockstatus)) + { + } + + /// Convert to pointer to StaticInstBase class. + operator const StaticInstBasePtr() + { + return this->get(); + } +}; + +/// A page of a list of decoded instructions from an address. +class EDGEAddrDecodePage +{ + typedef TheISA::ExtMachInst ExtMachInst; + protected: + EDGEStaticInstPtr instructions[TheISA::PageBytes]; + bool valid[TheISA::PageBytes]; + Addr lowerMask; + + public: + /// Constructor + EDGEAddrDecodePage() + { + lowerMask = TheISA::PageBytes - 1; + memset(valid, 0, TheISA::PageBytes); + } + + /// Checks if the instruction is already decoded and the machine + /// instruction in the cache matches the current machine instruction + /// related to the address + /// @param mach_inst The binary instruction to check + /// @param addr The address containing the instruction + /// @param blockstatus The block status of EDGE architecture. + bool + decoded(ExtMachInst mach_inst, Addr addr, TheISA::BlockStatus blockstatus) + { + bool status_valid; + // Check if this inst is match for the block status. + status_valid = valid[addr & lowerMask]&& + ((instructions[addr & lowerMask]->isGRegWR() && + (blockstatus == TheISA::Head)) || + (!instructions[addr & lowerMask]->isGRegWR() && + (blockstatus == TheISA::Normal))); + + return (status_valid && (instructions[addr & lowerMask]->machInst == mach_inst) ); + } + + /// Returns the instruction object. decoded should be called first + /// to check if the instruction is valid. + /// @param addr The address of the instruction. + /// @retval A pointer to the corresponding StaticInst object. + EDGEStaticInstPtr + getInst(Addr addr) + { + return instructions[addr & lowerMask]; + } + + /// Inserts a pointer to a StaticInst object into the list of decoded + /// instructions on the page. + /// @param addr The address of the instruction. + /// @param si A pointer to the corresponding StaticInst object. + void + insert(Addr addr, EDGEStaticInstPtr &si) + { + instructions[addr & lowerMask] = si; + valid[addr & lowerMask] = true; + } +}; + +inline EDGEStaticInstPtr +EDGEStaticInst::decode(EDGEStaticInst::ExtMachInst mach_inst, Addr addr, TheISA::BlockStatus blockstatus) +{ +#ifdef DECODE_CACHE_HASH_STATS + // Simple stats on decode hash_map. Turns out the default + // hash function is as good as anything I could come up with. + const int dump_every_n = 10000000; + static int decodes_til_dump = dump_every_n; + + if (--decodes_til_dump == 0) { + dumpDecodeCacheStats(); + decodes_til_dump = dump_every_n; + } +#endif + + Addr page_addr = addr & ~(TheISA::PageBytes - 1); + + // checks recently decoded addresses + if (recentDecodes[0].decodePage && + page_addr == recentDecodes[0].page_addr) { + if (recentDecodes[0].decodePage->decoded(mach_inst, addr, blockstatus)) + return recentDecodes[0].decodePage->getInst(addr); + + return searchCache(mach_inst, addr, recentDecodes[0].decodePage, blockstatus); + } + + if (recentDecodes[1].decodePage && + page_addr == recentDecodes[1].page_addr) { + if (recentDecodes[1].decodePage->decoded(mach_inst, addr, blockstatus)) + return recentDecodes[1].decodePage->getInst(addr); + + return searchCache(mach_inst, addr, recentDecodes[1].decodePage, blockstatus); + } + + // searches the page containing the address to decode + AddrDecodeCache::iterator iter = addrDecodeCache.find(page_addr); + if (iter != addrDecodeCache.end()) { + updateCache(page_addr, iter->second); + if (iter->second->decoded(mach_inst, addr, blockstatus)) + return iter->second->getInst(addr); + + return searchCache(mach_inst, addr, iter->second, blockstatus); + } + + // creates a new object for a page of decoded instructions + EDGEAddrDecodePage *decodePage = new EDGEAddrDecodePage; + addrDecodeCache[page_addr] = decodePage; + updateCache(page_addr, decodePage); + return searchCache(mach_inst, addr, decodePage, blockstatus); +} + +inline EDGEStaticInstPtr +EDGEStaticInst::searchCache(ExtMachInst mach_inst, Addr addr, + EDGEAddrDecodePage *decodePage, TheISA::BlockStatus blockstatus) +{ + DecodeCache::iterator iter = decodeCache.find(mach_inst); + + if (iter != decodeCache.end()) { + // We can only get reg-write/read inst in Head status and + // non-reg-write/read inst in Normal status + if ( (iter->second->isGRegWR() && blockstatus == TheISA::Head) + || (!iter->second->isGRegWR() && blockstatus == TheISA::Normal )) { + + decodePage->insert(addr, iter->second); + return iter->second; + } + } + + EDGEStaticInstPtr si = TheISA::decodeInst(mach_inst, blockstatus); + decodePage->insert(addr, si); + decodeCache[mach_inst] = si; + return si; +} + +#endif // __EDGECPU_STATIC_INST_HH__ diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/static_inst.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/static_inst.cc Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,113 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#include +#include "cpu/edge/static_inst.hh" +#include "sim/core.hh" + +EDGEStaticInstPtr EDGEStaticInst::nullEDGEStaticInstPtr; + +// Define the decode cache hash map. +EDGEStaticInst::DecodeCache EDGEStaticInst::decodeCache; +EDGEStaticInst::AddrDecodeCache EDGEStaticInst::addrDecodeCache; +EDGEStaticInst::cacheElement EDGEStaticInst::recentDecodes[2]; + +using namespace std; + +EDGEStaticInst::~EDGEStaticInst() +{ + if (cachedDisassembly) + delete cachedDisassembly; +} + +void +EDGEStaticInst::dumpDecodeCacheStats() +{ + cerr << "Decode hash table stats @ " << curTick << ":" << endl; + cerr << "\tnum entries = " << decodeCache.size() << endl; + cerr << "\tnum buckets = " << decodeCache.bucket_count() << endl; + vector hist(100, 0); + int max_hist = 0; + for (int i = 0; i < decodeCache.bucket_count(); ++i) { + int count = decodeCache.elems_in_bucket(i); + if (count > max_hist) + max_hist = count; + hist[count]++; + } + for (int i = 0; i <= max_hist; ++i) { + cerr << "\tbuckets of size " << i << " = " << hist[i] << endl; + } +} + +bool +EDGEStaticInst::hasBranchTarget(Addr pc, ThreadContext *tc, Addr &tgt) const +{ + if (isDirectCtrl()) { + tgt = branchTarget(pc); + return true; + } + + if (isIndirectCtrl()) { + tgt = branchTarget(tc); + return true; + } + + return false; +} + +Addr +EDGEStaticInst::branchTarget(Addr branchPC) const +{ + panic("StaticInst::branchTarget() called on instruction " + "that is not a PC-relative branch."); + M5_DUMMY_RETURN; +} + +Addr +EDGEStaticInst::branchTarget(ThreadContext *tc) const +{ + panic("StaticInst::branchTarget() called on instruction " + "that is not an indirect branch."); + M5_DUMMY_RETURN; +} + +const string & +EDGEStaticInst::disassemble(Addr pc, const SymbolTable *symtab) const +{ + if (!cachedDisassembly) + cachedDisassembly = new string(generateDisassembly(pc, symtab)); + + return *cachedDisassembly; +} diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/store_set.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/store_set.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,146 @@ +/* + * Copyright (c) 2004-2005 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#ifndef __CPU_EDGE_STORE_SET_HH__ +#define __CPU_EDGE_STORE_SET_HH__ + +#include +#include +#include +#include + +#include "base/types.hh" +#include "cpu/inst_seq.hh" + +struct ltseqnum { + bool operator()(const InstSeqNum &lhs, const InstSeqNum &rhs) const + { + return lhs > rhs; + } +}; + +/** + * Implements a store set predictor for determining if memory + * instructions are dependent upon each other. See paper "Memory + * Dependence Prediction using Store Sets" by Chrysos and Emer. SSID + * stands for Store Set ID, SSIT stands for Store Set ID Table, and + * LFST is Last Fetched Store Table. + */ +class StoreSet +{ + public: + typedef unsigned SSID; + + public: + /** Default constructor. init() must be called prior to use. */ + StoreSet() { }; + + /** Creates store set predictor with given table sizes. */ + StoreSet(int SSIT_size, int LFST_size); + + /** Default destructor. */ + ~StoreSet(); + + /** Initializes the store set predictor with the given table sizes. */ + void init(int SSIT_size, int LFST_size); + + /** Records a memory ordering violation between the younger load + * and the older store. */ + void violation(Addr store_PC, Addr load_PC); + + /** Inserts a load into the store set predictor. This does nothing but + * is included in case other predictors require a similar function. + */ + void insertLoad(Addr load_PC, InstSeqNum load_seq_num); + + /** Inserts a store into the store set predictor. Updates the + * LFST if the store has a valid SSID. */ + void insertStore(Addr store_PC, InstSeqNum store_seq_num, ThreadID tid); + + /** Checks if the instruction with the given PC is dependent upon + * any store. @return Returns the sequence number of the store + * instruction this PC is dependent upon. Returns 0 if none. + */ + InstSeqNum checkInst(Addr PC); + + /** Records this PC/sequence number as issued. */ + void issued(Addr issued_PC, InstSeqNum issued_seq_num, bool is_store); + + /** Squashes for a specific thread until the given sequence number. */ + void squash(InstSeqNum squashed_num, ThreadID tid); + + /** Resets all tables. */ + void clear(); + + /** Debug function to dump the contents of the store list. */ + void dump(); + + private: + /** Calculates the index into the SSIT based on the PC. */ + inline int calcIndex(Addr PC) + { return (PC >> offsetBits) & indexMask; } + + /** Calculates a Store Set ID based on the PC. */ + inline SSID calcSSID(Addr PC) + { return ((PC ^ (PC >> 10)) % LFSTSize); } + + /** The Store Set ID Table. */ + std::vector SSIT; + + /** Bit vector to tell if the SSIT has a valid entry. */ + std::vector validSSIT; + + /** Last Fetched Store Table. */ + std::vector LFST; + + /** Bit vector to tell if the LFST has a valid entry. */ + std::vector validLFST; + + /** Map of stores that have been inserted into the store set, but + * not yet issued or squashed. + */ + std::map storeList; + + typedef std::map::iterator SeqNumMapIt; + + /** Store Set ID Table size, in entries. */ + int SSITSize; + + /** Last Fetched Store Table size, in entries. */ + int LFSTSize; + + /** Mask to obtain the index. */ + int indexMask; + + // HACK: Hardcoded for now. + int offsetBits; +}; + +#endif // __CPU_EDGE_STORE_SET_HH__ diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/store_set.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/store_set.cc Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,351 @@ +/* + * Copyright (c) 2004-2006 The Regents of The University of Michigan + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Authors: Kevin Lim + */ + +#include "base/intmath.hh" +#include "base/misc.hh" +#include "base/trace.hh" +#include "cpu/edge/store_set.hh" + +StoreSet::StoreSet(int _SSIT_size, int _LFST_size) + : SSITSize(_SSIT_size), LFSTSize(_LFST_size) +{ + DPRINTF(StoreSet, "StoreSet: Creating store set object.\n"); + DPRINTF(StoreSet, "StoreSet: SSIT size: %i, LFST size: %i.\n", + SSITSize, LFSTSize); + + if (!isPowerOf2(SSITSize)) { + fatal("Invalid SSIT size!\n"); + } + + SSIT.resize(SSITSize); + + validSSIT.resize(SSITSize); + + for (int i = 0; i < SSITSize; ++i) + validSSIT[i] = false; + + if (!isPowerOf2(LFSTSize)) { + fatal("Invalid LFST size!\n"); + } + + LFST.resize(LFSTSize); + + validLFST.resize(LFSTSize); + + for (int i = 0; i < LFSTSize; ++i) { + validLFST[i] = false; + LFST[i] = 0; + } + + indexMask = SSITSize - 1; + + offsetBits = 2; +} + +StoreSet::~StoreSet() +{ +} + +void +StoreSet::init(int _SSIT_size, int _LFST_size) +{ + SSITSize = _SSIT_size; + LFSTSize = _LFST_size; + + DPRINTF(StoreSet, "StoreSet: Creating store set object.\n"); + DPRINTF(StoreSet, "StoreSet: SSIT size: %i, LFST size: %i.\n", + SSITSize, LFSTSize); + + SSIT.resize(SSITSize); + + validSSIT.resize(SSITSize); + + for (int i = 0; i < SSITSize; ++i) + validSSIT[i] = false; + + LFST.resize(LFSTSize); + + validLFST.resize(LFSTSize); + + for (int i = 0; i < LFSTSize; ++i) { + validLFST[i] = false; + LFST[i] = 0; + } + + indexMask = SSITSize - 1; + + offsetBits = 2; +} + + +void +StoreSet::violation(Addr store_PC, Addr load_PC) +{ + int load_index = calcIndex(load_PC); + int store_index = calcIndex(store_PC); + + assert(load_index < SSITSize && store_index < SSITSize); + + bool valid_load_SSID = validSSIT[load_index]; + bool valid_store_SSID = validSSIT[store_index]; + + if (!valid_load_SSID && !valid_store_SSID) { + // Calculate a new SSID here. + SSID new_set = calcSSID(load_PC); + + validSSIT[load_index] = true; + + SSIT[load_index] = new_set; + + validSSIT[store_index] = true; + + SSIT[store_index] = new_set; + + assert(new_set < LFSTSize); + + DPRINTF(StoreSet, "StoreSet: Neither load nor store had a valid " + "storeset, creating a new one: %i for load %#x, store %#x\n", + new_set, load_PC, store_PC); + } else if (valid_load_SSID && !valid_store_SSID) { + SSID load_SSID = SSIT[load_index]; + + validSSIT[store_index] = true; + + SSIT[store_index] = load_SSID; + + assert(load_SSID < LFSTSize); + + DPRINTF(StoreSet, "StoreSet: Load had a valid store set. Adding " + "store to that set: %i for load %#x, store %#x\n", + load_SSID, load_PC, store_PC); + } else if (!valid_load_SSID && valid_store_SSID) { + SSID store_SSID = SSIT[store_index]; + + validSSIT[load_index] = true; + + SSIT[load_index] = store_SSID; + + DPRINTF(StoreSet, "StoreSet: Store had a valid store set: %i for " + "load %#x, store %#x\n", + store_SSID, load_PC, store_PC); + } else { + SSID load_SSID = SSIT[load_index]; + SSID store_SSID = SSIT[store_index]; + + assert(load_SSID < LFSTSize && store_SSID < LFSTSize); + + // The store set with the lower number wins + if (store_SSID > load_SSID) { + SSIT[store_index] = load_SSID; + + DPRINTF(StoreSet, "StoreSet: Load had smaller store set: %i; " + "for load %#x, store %#x\n", + load_SSID, load_PC, store_PC); + } else { + SSIT[load_index] = store_SSID; + + DPRINTF(StoreSet, "StoreSet: Store had smaller store set: %i; " + "for load %#x, store %#x\n", + store_SSID, load_PC, store_PC); + } + } +} + +void +StoreSet::insertLoad(Addr load_PC, InstSeqNum load_seq_num) +{ + // Does nothing. + return; +} + +void +StoreSet::insertStore(Addr store_PC, InstSeqNum store_seq_num, ThreadID tid) +{ + int index = calcIndex(store_PC); + + int store_SSID; + + assert(index < SSITSize); + + if (!validSSIT[index]) { + // Do nothing if there's no valid entry. + return; + } else { + store_SSID = SSIT[index]; + + assert(store_SSID < LFSTSize); + + // Update the last store that was fetched with the current one. + LFST[store_SSID] = store_seq_num; + + validLFST[store_SSID] = 1; + + storeList[store_seq_num] = store_SSID; + + DPRINTF(StoreSet, "Store %#x updated the LFST, SSID: %i\n", + store_PC, store_SSID); + } +} + +InstSeqNum +StoreSet::checkInst(Addr PC) +{ + int index = calcIndex(PC); + + int inst_SSID; + + assert(index < SSITSize); + + if (!validSSIT[index]) { + DPRINTF(StoreSet, "Inst %#x with index %i had no SSID\n", + PC, index); + + // Return 0 if there's no valid entry. + return 0; + } else { + inst_SSID = SSIT[index]; + + assert(inst_SSID < LFSTSize); + + if (!validLFST[inst_SSID]) { + + DPRINTF(StoreSet, "Inst %#x with index %i and SSID %i had no " + "dependency\n", PC, index, inst_SSID); + + return 0; + } else { + DPRINTF(StoreSet, "Inst %#x with index %i and SSID %i had LFST " + "inum of %i\n", PC, index, inst_SSID, LFST[inst_SSID]); + + return LFST[inst_SSID]; + } + } +} + +void +StoreSet::issued(Addr issued_PC, InstSeqNum issued_seq_num, bool is_store) +{ + // This only is updated upon a store being issued. + if (!is_store) { + return; + } + + int index = calcIndex(issued_PC); + + int store_SSID; + + assert(index < SSITSize); + + SeqNumMapIt store_list_it = storeList.find(issued_seq_num); + + if (store_list_it != storeList.end()) { + storeList.erase(store_list_it); + } + + // Make sure the SSIT still has a valid entry for the issued store. + if (!validSSIT[index]) { + return; + } + + store_SSID = SSIT[index]; + + assert(store_SSID < LFSTSize); + + // If the last fetched store in the store set refers to the store that + // was just issued, then invalidate the entry. + if (validLFST[store_SSID] && LFST[store_SSID] == issued_seq_num) { + DPRINTF(StoreSet, "StoreSet: store invalidated itself in LFST.\n"); + validLFST[store_SSID] = false; + } +} + +void +StoreSet::squash(InstSeqNum squashed_num, ThreadID tid) +{ + DPRINTF(StoreSet, "StoreSet: Squashing until inum %i\n", + squashed_num); + + storeList.clear(); + +#if 0 + int idx; + SeqNumMapIt store_list_it = storeList.begin(); + + //@todo:Fix to only delete from correct thread + while (!storeList.empty()) { + idx = (*store_list_it).second; + + if ((*store_list_it).first <= squashed_num) { + break; + } + + bool younger = LFST[idx] > squashed_num; + + if (validLFST[idx] && younger) { + DPRINTF(StoreSet, "Squashed [sn:%lli]\n", LFST[idx]); + validLFST[idx] = false; + + storeList.erase(store_list_it++); + } else if (!validLFST[idx] && younger) { + storeList.erase(store_list_it++); + } + } +#endif +} + +void +StoreSet::clear() +{ + for (int i = 0; i < SSITSize; ++i) { + validSSIT[i] = false; + } + + for (int i = 0; i < LFSTSize; ++i) { + validLFST[i] = false; + } + + storeList.clear(); +} + +void +StoreSet::dump() +{ + cprintf("storeList.size(): %i\n", storeList.size()); + SeqNumMapIt store_list_it = storeList.begin(); + + int num = 0; + + while (store_list_it != storeList.end()) { + cprintf("%i: [sn:%lli] SSID:%i\n", + num, (*store_list_it).first, (*store_list_it).second); + num++; + store_list_it++; + } +} diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/thread_context.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/thread_context.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,287 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#ifndef __CPU_EDGE_THREAD_CONTEXT_HH__ +#define __CPU_EDGE_THREAD_CONTEXT_HH__ + +#include "config/the_isa.hh" +#include "cpu/thread_context.hh" +#include "cpu/edge/isa_specific.hh" + +class EndQuiesceEvent; +namespace Kernel { + class Statistics; +}; + +class TranslatingPort; + +/** + * Derived ThreadContext class for use with the EdgeCPU. It + * provides the interface for any external objects to access a + * single thread's state and some general CPU state. Any time + * external objects try to update state through this interface, + * the CPU will create an event to squash all in-flight + * instructions in order to ensure state is maintained correctly. + * It must be defined specifically for the EdgeCPU because + * not all architectural state is located within the EdgeThreadState + * (such as the commit PC, and registers), and specific actions + * must be taken when using this interface (such as squashing all + * in-flight instructions when doing a write to this interface). + */ +template +class EdgeThreadContext : public ThreadContext +{ + public: + typedef typename Impl::CPU CPU; + + /** Pointer to the CPU. */ + CPU *cpu; + + /** Pointer to the thread state that this TC corrseponds to. */ + EdgeThreadState *thread; + + /** Returns a pointer to the ITB. */ + TheISA::TLB *getITBPtr() { return cpu->itb; } + + /** Returns a pointer to the DTB. */ + TheISA::TLB *getDTBPtr() { return cpu->dtb; } + + /** Returns a pointer to this CPU. */ + virtual BaseCPU *getCpuPtr() { return cpu; } + + /** Reads this CPU's ID. */ + virtual int cpuId() { return cpu->cpuId(); } + + virtual int contextId() { return thread->contextId(); } + + virtual void setContextId(int id) { thread->setContextId(id); } + + /** Returns this thread's ID number. */ + virtual int threadId() { return thread->threadId(); } + virtual void setThreadId(int id) { return thread->setThreadId(id); } + + /** Returns a pointer to the system. */ + virtual System *getSystemPtr() { return cpu->system; } + +#if FULL_SYSTEM + /** Returns a pointer to this thread's kernel statistics. */ + virtual TheISA::Kernel::Statistics *getKernelStats() + { return thread->kernelStats; } + + virtual FunctionalPort *getPhysPort() { return thread->getPhysPort(); } + + virtual VirtualPort *getVirtPort(); + + virtual void connectMemPorts(ThreadContext *tc) { thread->connectMemPorts(tc); } +#else + virtual TranslatingPort *getMemPort() { return thread->getMemPort(); } + + /** Returns a pointer to this thread's process. */ + virtual Process *getProcessPtr() { return thread->getProcessPtr(); } +#endif + /** Returns this thread's status. */ + virtual Status status() const { return thread->status(); } + + /** Sets this thread's status. */ + virtual void setStatus(Status new_status) + { thread->setStatus(new_status); } + + /** Set the status to Active. Optional delay indicates number of + * cycles to wait before beginning execution. */ + virtual void activate(int delay = 1); + + /** Set the status to Suspended. */ + virtual void suspend(int delay = 0); + + /** Set the status to Halted. */ + virtual void halt(int delay = 0); + +#if FULL_SYSTEM + /** Dumps the function profiling information. + * @todo: Implement. + */ + virtual void dumpFuncProfile(); +#endif + /** Takes over execution of a thread from another CPU. */ + virtual void takeOverFrom(ThreadContext *old_context); + + /** Registers statistics associated with this TC. */ + virtual void regStats(const std::string &name); + + /** Serializes state. */ + virtual void serialize(std::ostream &os); + /** Unserializes state. */ + virtual void unserialize(Checkpoint *cp, const std::string §ion); + +#if FULL_SYSTEM + /** Reads the last tick that this thread was activated on. */ + virtual Tick readLastActivate(); + /** Reads the last tick that this thread was suspended on. */ + virtual Tick readLastSuspend(); + + /** Clears the function profiling information. */ + virtual void profileClear(); + /** Samples the function profiling information. */ + virtual void profileSample(); +#endif + /** Returns the instruction this thread is currently committing. + * Only used when an instruction faults. + */ + virtual TheISA::MachInst getInst(); + + /** Copies the architectural registers from another TC into this TC. */ + virtual void copyArchRegs(ThreadContext *tc); + + /** Resets all architectural registers to 0. */ + virtual void clearArchRegs(); + + /** Reads an integer register. */ + virtual uint64_t readIntReg(int reg_idx); + + virtual FloatReg readFloatReg(int reg_idx); + + virtual FloatRegBits readFloatRegBits(int reg_idx); + + /** Sets an integer register to a value. */ + virtual void setIntReg(int reg_idx, uint64_t val); + + virtual void setFloatReg(int reg_idx, FloatReg val); + + virtual void setFloatRegBits(int reg_idx, FloatRegBits val); + + /** Reads this thread's PC. */ + virtual uint64_t readPC() + { return cpu->readPC(thread->threadId()); } + + /** Sets this thread's PC. */ + virtual void setPC(uint64_t val); + + virtual void setBlockPC(uint64_t val); + + virtual uint64_t getBlockPC() + { return cpu->getBlockPC(thread->threadId());} + + /** Reads this thread's next PC. */ + virtual uint64_t readNextPC() + { return cpu->readNextPC(thread->threadId()); } + + /** Sets this thread's next PC. */ + virtual void setNextPC(uint64_t val); + + virtual uint64_t readMicroPC() + { + panic("No readMicroPC allowed in EDGE CPU!\n"); + return 0; + } + + virtual void setMicroPC(uint64_t val) + { + panic("No setMicroPC allowed in EDGE CPU!\n"); + } + + virtual uint64_t readNextMicroPC() + { + panic("No readNextMicroPC allowed in EDGE CPU!\n"); + return 0; + } + + virtual void setNextMicroPC(uint64_t val) + { + panic("No setNextMicroPC allowed in EDGE CPU!\n"); + } + + /** Reads a miscellaneous register. */ + virtual MiscReg readMiscRegNoEffect(int misc_reg) + { return cpu->readMiscRegNoEffect(misc_reg, thread->threadId()); } + + /** Reads a misc. register, including any side-effects the + * read might have as defined by the architecture. */ + virtual MiscReg readMiscReg(int misc_reg) + { return cpu->readMiscReg(misc_reg, thread->threadId()); } + + /** Sets a misc. register. */ + virtual void setMiscRegNoEffect(int misc_reg, const MiscReg &val); + + /** Sets a misc. register, including any side-effects the + * write might have as defined by the architecture. */ + virtual void setMiscReg(int misc_reg, const MiscReg &val); + + virtual int flattenIntIndex(int reg); + virtual int flattenFloatIndex(int reg); + + /** Returns the number of consecutive store conditional failures. */ + // @todo: Figure out where these store cond failures should go. + virtual unsigned readStCondFailures() + { return thread->storeCondFailures; } + + /** Sets the number of consecutive store conditional failures. */ + virtual void setStCondFailures(unsigned sc_failures) + { thread->storeCondFailures = sc_failures; } + + // Only really makes sense for old CPU model. Lots of code + // outside the CPU still checks this function, so it will + // always return false to keep everything working. + /** Checks if the thread is misspeculating. Because it is + * very difficult to determine if the thread is + * misspeculating, this is set as false. */ + virtual bool misspeculating() { return false; } + +#if !FULL_SYSTEM + /** Executes a syscall in SE mode. */ + virtual void syscall(int64_t callnum) + { return cpu->syscall(callnum, thread->threadId()); } + + /** Reads the funcExeInst counter. */ + virtual Counter readFuncExeInst() { return thread->funcExeInst; } +#else + /** Returns pointer to the quiesce event. */ + virtual EndQuiesceEvent *getQuiesceEvent() + { + return this->thread->quiesceEvent; + } +#endif + + virtual uint64_t readNextNPC() + { + return this->cpu->readNextNPC(this->thread->threadId()); + } + + virtual void setNextNPC(uint64_t val) + { + this->cpu->setNextNPC(val, this->thread->threadId()); + } +}; + +#endif diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/thread_context.cc --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/thread_context.cc Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,41 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#include "cpu/edge/thread_context.hh" +#include "cpu/edge/thread_context_impl.hh" +#include "cpu/edge/impl.hh" + +template class EdgeThreadContext; + diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/thread_context_impl.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/thread_context_impl.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,377 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#include "arch/registers.hh" +#include "config/the_isa.hh" +#include "cpu/edge/thread_context.hh" +#include "cpu/quiesce_event.hh" + +#if FULL_SYSTEM +template +VirtualPort * +EdgeThreadContext::getVirtPort() +{ + return thread->getVirtPort(); +} + +template +void +EdgeThreadContext::dumpFuncProfile() +{ + thread->dumpFuncProfile(); +} +#endif + +template +void +EdgeThreadContext::takeOverFrom(ThreadContext *old_context) +{ + // some things should already be set up + assert(getSystemPtr() == old_context->getSystemPtr()); +#if !FULL_SYSTEM + assert(getProcessPtr() == old_context->getProcessPtr()); +#endif + + // copy over functional state + setStatus(old_context->status()); + copyArchRegs(old_context); + setContextId(old_context->contextId()); + setThreadId(old_context->threadId()); + +#if !FULL_SYSTEM + thread->funcExeInst = old_context->readFuncExeInst(); +#else + EndQuiesceEvent *other_quiesce = old_context->getQuiesceEvent(); + if (other_quiesce) { + // Point the quiesce event's TC at this TC so that it wakes up + // the proper CPU. + other_quiesce->tc = this; + } + if (thread->quiesceEvent) { + thread->quiesceEvent->tc = this; + } + + // Transfer kernel stats from one CPU to the other. + thread->kernelStats = old_context->getKernelStats(); +// storeCondFailures = 0; + cpu->lockFlag = false; +#endif + + old_context->setStatus(ThreadContext::Halted); + + thread->inSyscall = false; + thread->trapPending = false; +} + +template +void +EdgeThreadContext::activate(int delay) +{ + DPRINTF(EdgeCPU, "Calling activate on Thread Context %d\n", + threadId()); + + if (thread->status() == ThreadContext::Active) + return; + +#if FULL_SYSTEM + thread->lastActivate = curTick; +#endif + + thread->setStatus(ThreadContext::Active); + + // status() == Suspended + cpu->activateContext(thread->threadId(), delay); +} + +template +void +EdgeThreadContext::suspend(int delay) +{ + DPRINTF(EdgeCPU, "Calling suspend on Thread Context %d\n", + threadId()); + + if (thread->status() == ThreadContext::Suspended) + return; + +#if FULL_SYSTEM + thread->lastActivate = curTick; + thread->lastSuspend = curTick; +#endif +/* +#if FULL_SYSTEM + // Don't change the status from active if there are pending interrupts + if (cpu->checkInterrupts()) { + assert(status() == ThreadContext::Active); + return; + } +#endif +*/ + thread->setStatus(ThreadContext::Suspended); + cpu->suspendContext(thread->threadId()); +} + +template +void +EdgeThreadContext::halt(int delay) +{ + DPRINTF(EdgeCPU, "Calling halt on Thread Context %d\n", + threadId()); + + if (thread->status() == ThreadContext::Halted) + return; + + thread->setStatus(ThreadContext::Halted); + cpu->haltContext(thread->threadId()); +} + +template +void +EdgeThreadContext::regStats(const std::string &name) +{ +#if FULL_SYSTEM + thread->kernelStats = new TheISA::Kernel::Statistics(cpu->system); + thread->kernelStats->regStats(name + ".kern"); +#endif +} + +template +void +EdgeThreadContext::serialize(std::ostream &os) +{ +#if FULL_SYSTEM + if (thread->kernelStats) + thread->kernelStats->serialize(os); +#endif + +} + +template +void +EdgeThreadContext::unserialize(Checkpoint *cp, const std::string §ion) +{ +#if FULL_SYSTEM + if (thread->kernelStats) + thread->kernelStats->unserialize(cp, section); +#endif + +} + +#if FULL_SYSTEM +template +Tick +EdgeThreadContext::readLastActivate() +{ + return thread->lastActivate; +} + +template +Tick +EdgeThreadContext::readLastSuspend() +{ + return thread->lastSuspend; +} + +template +void +EdgeThreadContext::profileClear() +{ + thread->profileClear(); +} + +template +void +EdgeThreadContext::profileSample() +{ + thread->profileSample(); +} +#endif + +template +TheISA::MachInst +EdgeThreadContext:: getInst() +{ + return thread->getInst(); +} + +template +void +EdgeThreadContext::copyArchRegs(ThreadContext *tc) +{ + // This function will mess things up unless the ROB is empty and + // there are no instructions in the pipeline. + panic("EdgeThreadContext::copyArchRegs unimplemented yet!\n"); +} + +template +void +EdgeThreadContext::clearArchRegs() +{} + +template +uint64_t +EdgeThreadContext::readIntReg(int reg_idx) +{ + reg_idx = cpu->isa[thread->threadId()].flattenIntIndex(reg_idx); + return cpu->readArchIntReg(reg_idx, thread->threadId()); +} + +template +TheISA::FloatReg +EdgeThreadContext::readFloatReg(int reg_idx) +{ + reg_idx = cpu->isa[thread->threadId()].flattenFloatIndex(reg_idx); + return cpu->readArchFloatReg(reg_idx, thread->threadId()); +} + +template +TheISA::FloatRegBits +EdgeThreadContext::readFloatRegBits(int reg_idx) +{ + reg_idx = cpu->isa[thread->threadId()].flattenFloatIndex(reg_idx); + return cpu->readArchFloatRegInt(reg_idx, thread->threadId()); +} + +template +void +EdgeThreadContext::setIntReg(int reg_idx, uint64_t val) +{ + reg_idx = cpu->isa[thread->threadId()].flattenIntIndex(reg_idx); + cpu->setArchIntReg(reg_idx, val, thread->threadId()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->threadId()); + } +} + +template +void +EdgeThreadContext::setFloatReg(int reg_idx, FloatReg val) +{ + reg_idx = cpu->isa[thread->threadId()].flattenFloatIndex(reg_idx); + cpu->setArchFloatReg(reg_idx, val, thread->threadId()); + + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->threadId()); + } +} + +template +void +EdgeThreadContext::setFloatRegBits(int reg_idx, FloatRegBits val) +{ + reg_idx = cpu->isa[thread->threadId()].flattenFloatIndex(reg_idx); + cpu->setArchFloatRegInt(reg_idx, val, thread->threadId()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->threadId()); + } +} + +template +void +EdgeThreadContext::setPC(uint64_t val) +{ + cpu->setPC(val, thread->threadId()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->threadId()); + } +} + +template +void +EdgeThreadContext::setBlockPC(uint64_t val ) +{ + cpu->setBlockPC(val, thread->threadId()); + + if ( !thread->trapPending && !thread->inSyscall ) { + cpu->squashFromTC(thread->threadId()); + } +} + +template +void +EdgeThreadContext::setNextPC(uint64_t val) +{ + cpu->setNextPC(val, thread->threadId()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->threadId()); + } +} + +template +int +EdgeThreadContext::flattenIntIndex(int reg) +{ + return cpu->isa[thread->threadId()].flattenIntIndex(reg); +} + +template +int +EdgeThreadContext::flattenFloatIndex(int reg) +{ + return cpu->isa[thread->threadId()].flattenFloatIndex(reg); +} + +template +void +EdgeThreadContext::setMiscRegNoEffect(int misc_reg, const MiscReg &val) +{ + cpu->setMiscRegNoEffect(misc_reg, val, thread->threadId()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->threadId()); + } +} + +template +void +EdgeThreadContext::setMiscReg(int misc_reg, + const MiscReg &val) +{ + cpu->setMiscReg(misc_reg, val, thread->threadId()); + + // Squash if we're not already in a state update mode. + if (!thread->trapPending && !thread->inSyscall) { + cpu->squashFromTC(thread->threadId()); + } +} + diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/edge/thread_state.hh --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/cpu/edge/thread_state.hh Wed Apr 28 19:24:20 2010 +0800 @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2009-2010 HIT Microelectronic Center + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer; + * redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution; + * neither the name of the copyright holders nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Authors: Gou Pengfei + * Jin Yanhan + * + * Date: Dec. 2009 + * + */ + +#ifndef __CPU_EDGE_THREAD_STATE_HH__ +#define __CPU_EDGE_THREAD_STATE_HH__ + +#include "base/callback.hh" +#include "base/output.hh" +#include "cpu/thread_context.hh" +#include "cpu/thread_state.hh" +#include "sim/sim_exit.hh" + +class Event; +class Process; + +#if FULL_SYSTEM +class EndQuiesceEvent; +class FunctionProfile; +class ProfileNode; +#else +class FunctionalMemory; +class Process; +#endif + +/** + * Class that has various thread state, such as the status, the + * current instruction being processed, whether or not the thread has + * a trap pending or is being externally updated, the ThreadContext + * pointer, etc. It also handles anything related to a specific + * thread's process, such as syscalls and checking valid addresses. + */ +template +struct EdgeThreadState : public ThreadState { + typedef ThreadContext::Status Status; + typedef typename Impl::CPU CPU; + + private: + /** Pointer to the CPU. */ + CPU *cpu; + public: + /** Whether or not the thread is currently in syscall mode, and + * thus able to be externally updated without squashing. + */ + bool inSyscall; + + /** Whether or not the thread is currently waiting on a trap, and + * thus able to be externally updated without squashing. + */ + bool trapPending; + + /** + * Number of blocks in this thread + */ + Counter numBlock; + Stats::Scalar numBlocks; + +#if FULL_SYSTEM + EdgeThreadState(CPU *_cpu, int _thread_num) + : ThreadState(_cpu, _thread_num), + cpu(_cpu), inSyscall(0), trapPending(0) + { + if (cpu->params()->profile) { + profile = new FunctionProfile(cpu->params()->system->kernelSymtab); + Callback *cb = + new MakeCallback(this); + registerExitCallback(cb); + } + + // let's fill with a dummy node for now so we don't get a segfault + // on the first cycle when there's no node available. + static ProfileNode dummyNode; + profileNode = &dummyNode; + profilePC = 3; + } +#else + EdgeThreadState(CPU *_cpu, int _thread_num, Process *_process) + : ThreadState(_cpu, _thread_num, _process), + cpu(_cpu), inSyscall(0), trapPending(0) + { } +#endif + + /** Pointer to the ThreadContext of this thread. */ + ThreadContext *tc; + + /** Returns a pointer to the TC of this thread. */ + ThreadContext *getTC() { return tc; } + +#if !FULL_SYSTEM + /** Handles the syscall. */ + void syscall(int64_t callnum) { process->syscall(callnum, tc); } +#endif + +#if FULL_SYSTEM + void dumpFuncProfile() + { + std::ostream *os = simout.create(csprintf("profile.%s.dat", cpu->name())); + profile->dump(tc, *os); + } +#endif +}; + +#endif // __CPU_EDGE_THREAD_STATE_HH__ diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/simple_thread.hh --- a/src/cpu/simple_thread.hh Sun Apr 18 21:33:59 2010 -0700 +++ b/src/cpu/simple_thread.hh Wed Apr 28 19:24:20 2010 +0800 @@ -303,6 +303,12 @@ assert(flatIndex < TheISA::NumFloatRegs); floatRegs.i[flatIndex] = val; } + +#if THE_ISA == TRIPS_ISA + uint64_t getBlockPC() {warn("Simple Thread has no getBlockPC() method\n"); return 0;} + + void setBlockPC(uint64_t val) { warn("Simple Thread has no setBlockPC() method\n");} +#endif uint64_t readPC() { diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/simple_thread.cc --- a/src/cpu/simple_thread.cc Sun Apr 18 21:33:59 2010 -0700 +++ b/src/cpu/simple_thread.cc Wed Apr 28 19:24:20 2010 +0800 @@ -302,3 +302,4 @@ TheISA::copyRegs(src_tc, tc); } + diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/static_inst.hh --- a/src/cpu/static_inst.hh Sun Apr 18 21:33:59 2010 -0700 +++ b/src/cpu/static_inst.hh Wed Apr 28 19:24:20 2010 +0800 @@ -60,6 +60,11 @@ template class OzoneDynInst; class InOrderDynInst; +// Dec.2009, Gou Pengfei +class EdgeCPUImpl; +template class SimpleEdgeDynInst; +typedef SimpleEdgeDynInst EdgeDynInst; + class CheckerCPU; class FastCPU; class AtomicSimpleCPU; @@ -68,6 +73,7 @@ class SymbolTable; class AddrDecodePage; + namespace Trace { class InstRecord; } @@ -182,6 +188,14 @@ IsMicroBranch, ///< This microop branches within the microcode for a macroop IsDspOp, + // Dec. 2009, phoenix + // Add a flag for EDGE + IsEDGE, + IsGRegWR, + IsGRegWriteValid, + IsGRegReadValid, + IsMove, + NumFlags }; @@ -279,6 +293,15 @@ bool isDelayedCommit() const { return flags[IsDelayedCommit]; } bool isLastMicroop() const { return flags[IsLastMicroop]; } bool isFirstMicroop() const { return flags[IsFirstMicroop]; } + + // Dec. 2009, phoenix + // Add a flag for EDGE + bool isEDGE() const { return flags[IsEDGE];} + bool isGRegWR() const { return flags[IsGRegWR];} + bool isGRegWriteValid() const { return flags[IsGRegWriteValid]; } + bool isGRegReadValid() const { return flags[IsGRegReadValid]; } + bool isMove() const { return flags[IsMove]; } + //This flag doesn't do anything yet bool isMicroBranch() const { return flags[IsMicroBranch]; } //@} @@ -390,7 +413,9 @@ * The execute() signatures are auto-generated by scons based on the * set of CPU models we are compiling in today. */ + #if THE_ISA != TRIPS_ISA #include "cpu/static_inst_exec_sigs.hh" +#endif /** * Return the microop that goes with a particular micropc. This should diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/static_inst.cc --- a/src/cpu/static_inst.cc Sun Apr 18 21:33:59 2010 -0700 +++ b/src/cpu/static_inst.cc Wed Apr 28 19:24:20 2010 +0800 @@ -114,3 +114,4 @@ return *cachedDisassembly; } + diff -r edde97a6ea7c -r 9bb72531ffd5 src/cpu/thread_context.hh --- a/src/cpu/thread_context.hh Sun Apr 18 21:33:59 2010 -0700 +++ b/src/cpu/thread_context.hh Wed Apr 28 19:24:20 2010 +0800 @@ -199,6 +199,12 @@ virtual uint64_t readPC() = 0; +#if THE_ISA == TRIPS_ISA + virtual uint64_t getBlockPC() = 0; + + virtual void setBlockPC(uint64_t val) = 0; +#endif + virtual void setPC(uint64_t val) = 0; virtual uint64_t readNextPC() = 0; @@ -384,6 +390,12 @@ void setFloatRegBits(int reg_idx, FloatRegBits val) { actualTC->setFloatRegBits(reg_idx, val); } +#if THE_ISA == TRIPS_ISA + virtual uint64_t getBlockPC() { return actualTC->getBlockPC();} + + virtual void setBlockPC(uint64_t val) { actualTC->setBlockPC(val);} +#endif + uint64_t readPC() { return actualTC->readPC(); } void setPC(uint64_t val) { actualTC->setPC(val); }