diff -r cc1b6d5ce557 -r 5e9470342c08 src/cpu/o3/fetch.hh --- a/src/cpu/o3/fetch.hh Mon Dec 06 15:37:56 2010 -0800 +++ b/src/cpu/o3/fetch.hh Mon Dec 06 15:48:13 2010 -0800 @@ -1,4 +1,16 @@ /* + * Copyright (c) 2010 ARM Limited + * All rights reserved + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * @@ -38,6 +50,7 @@ #include "base/timebuf.hh" #include "config/the_isa.hh" #include "cpu/pc_event.hh" +#include "cpu/translation.hh" #include "mem/packet.hh" #include "mem/port.hh" #include "sim/eventq.hh" @@ -113,6 +126,26 @@ virtual void recvRetry(); }; + class FetchTranslation : public BaseTLB::Translation + { + protected: + DefaultFetch *fetch; + ThreadID tid; + + public: + FetchTranslation(DefaultFetch *_fetch, ThreadID _tid, + RequestPtr _mem_req) + : fetch(_fetch), tid(_tid) + {} + + void + finish(Fault fault, RequestPtr req, ThreadContext *tc, + BaseTLB::Mode mode) + { + fetch->finishTranslation(fault, req); + delete this; + } + }; public: /** Overall fetch status. Used to determine if the CPU can @@ -133,6 +166,7 @@ TrapPending, QuiescePending, SwitchOut, + ItlbWait, IcacheWaitResponse, IcacheWaitRetry, IcacheAccessComplete @@ -242,7 +276,8 @@ * @param pc The actual PC of the current instruction. * @return Any fault that occured. */ - bool fetchCacheLine(Addr vaddr, Fault &ret_fault, ThreadID tid, Addr pc); + bool fetchCacheLine(Addr vaddr, ThreadID tid, Addr pc); + void finishTranslation(Fault fault, RequestPtr mem_req); /** Squashes a specific thread and resets the PC. */ inline void doSquash(const TheISA::PCState &newPC, ThreadID tid); diff -r cc1b6d5ce557 -r 5e9470342c08 src/cpu/o3/fetch_impl.hh --- a/src/cpu/o3/fetch_impl.hh Mon Dec 06 15:37:56 2010 -0800 +++ b/src/cpu/o3/fetch_impl.hh Mon Dec 06 15:48:13 2010 -0800 @@ -548,11 +548,11 @@ template bool -DefaultFetch::fetchCacheLine(Addr vaddr, Fault &ret_fault, ThreadID tid, - Addr pc) +DefaultFetch::fetchCacheLine(Addr vaddr, ThreadID tid, Addr pc) { Fault fault = NoFault; + // @todo: not sure if these should block translation. //AlphaDep if (cacheBlocked) { DPRINTF(Fetch, "[tid:%i] Can't fetch cache line, cache blocked\n", @@ -575,11 +575,6 @@ // Align the fetch address so it's at the start of a cache block. Addr block_PC = icacheBlockAlignPC(vaddr); - // If we've already got the block, no need to try to fetch it again. - if (cacheDataValid[tid] && block_PC == cacheDataPC[tid]) { - return true; - } - // Setup the memReq to do a read of the first instruction's address. // Set the appropriate read size and flags as well. // Build request here. @@ -589,15 +584,22 @@ memReq[tid] = mem_req; - // Translate the instruction request. - fault = cpu->itb->translateAtomic(mem_req, cpu->thread[tid]->getTC(), - BaseTLB::Execute); + // Initiate translation of the icache block + fetchStatus[tid] = ItlbWait; + FetchTranslation *trans = new FetchTranslation(this, tid, mem_req); + cpu->itb->translateTiming(mem_req, cpu->thread[tid]->getTC(), + trans, BaseTLB::Execute); + return true; +} - // In the case of faults, the fetch stage may need to stall and wait - // for the ITB miss to be handled. +template +void +DefaultFetch::finishTranslation(Fault fault, RequestPtr mem_req) +{ + ThreadID tid = mem_req->threadId(); + Addr block_PC = mem_req->getVaddr(); - // If translation was successful, attempt to read the first - // instruction. + // If translation was successful, attempt to read the icache block. if (fault == NoFault) { #if 0 if (cpu->system->memctrl->badaddr(memReq[tid]->paddr) || @@ -609,7 +611,6 @@ return false; } #endif - // Build packet here. PacketPtr data_pkt = new Packet(mem_req, MemCmd::ReadReq, Packet::Broadcast); @@ -617,39 +618,65 @@ cacheDataPC[tid] = block_PC; cacheDataValid[tid] = false; - DPRINTF(Fetch, "Fetch: Doing instruction read.\n"); fetchedCacheLines++; - // Now do the timing access to see whether or not the instruction - // exists within the cache. + // Access the cache. if (!icachePort->sendTiming(data_pkt)) { assert(retryPkt == NULL); assert(retryTid == InvalidThreadID); DPRINTF(Fetch, "[tid:%i] Out of MSHRs!\n", tid); + fetchStatus[tid] = IcacheWaitRetry; retryPkt = data_pkt; retryTid = tid; cacheBlocked = true; - return false; + } else { + DPRINTF(Fetch, "[tid:%i]: Doing Icache access.\n", tid); + DPRINTF(Activity, "[tid:%i]: Activity: Waiting on I-cache " + "response.\n", tid); + + lastIcacheStall[tid] = curTick; + fetchStatus[tid] = IcacheWaitResponse; } - - DPRINTF(Fetch, "[tid:%i]: Doing cache access.\n", tid); - - lastIcacheStall[tid] = curTick; - - DPRINTF(Activity, "[tid:%i]: Activity: Waiting on I-cache " - "response.\n", tid); - - fetchStatus[tid] = IcacheWaitResponse; } else { + // Translation faulted, icache request won't be sent. delete mem_req; memReq[tid] = NULL; + + // Send the fault to commit. This thread will not do anything + // until commit handles the fault. The only other way it can + // wake up is if a squash comes along and changes the PC. + assert(numInst < fetchWidth); + + InstSeqNum inst_seq = cpu->getAndIncrementInstSeq(); + // We will use a nop in order to carry the fault. + ExtMachInst ext_inst = TheISA::NoopMachInst; + TheISA::PCState fetchPC = pc[tid]; + + // Create a new DynInst from the dummy nop. + DynInstPtr instruction = new DynInst(ext_inst, + fetchPC, fetchPC, + inst_seq, cpu); + instruction->setPredTarg(fetchPC); + instruction->setTid(tid); + instruction->setASID(tid); + instruction->setThreadState(cpu->thread[tid]); + instruction->traceData = NULL; + instruction->setInstListIt(cpu->addInst(instruction)); + instruction->fault = fault; + toDecode->insts[numInst] = instruction; + toDecode->size++; + wroteToTimeBuffer = true; + + fetchStatus[tid] = TrapPending; + + DPRINTF(Fetch, "[tid:%i]: Blocked, need to handle the trap.\n", tid); + DPRINTF(Fetch, "[tid:%i]: fault (%s) detected @ PC %s.\n", + tid, fault->name(), pc[tid]); } - - ret_fault = fault; - return true; + _status = updateFetchStatus(); } template @@ -1044,9 +1071,6 @@ // The current PC. TheISA::PCState thisPC = pc[tid]; - // Fault code for memory access. - Fault fault = NoFault; - Addr pcOffset = fetchOffset[tid]; Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask; @@ -1054,18 +1078,28 @@ // to running, otherwise do the cache access. Possibly move this up // to tick() function. if (fetchStatus[tid] == IcacheAccessComplete) { - DPRINTF(Fetch, "[tid:%i]: Icache miss is complete.\n",tid); + DPRINTF(Fetch, "[tid:%i]: Icache miss is complete.\n", tid); fetchStatus[tid] = Running; status_change = true; } else if (fetchStatus[tid] == Running) { - DPRINTF(Fetch, "[tid:%i]: Attempting to translate and read " - "instruction, starting at PC %#x.\n", tid, fetchAddr); + // Align the fetch PC so its at the star of a cache block. + Addr block_PC = icacheBlockAlignPC(fetchAddr); - bool fetch_success = fetchCacheLine(fetchAddr, fault, tid, - thisPC.instAddr()); - if (!fetch_success) { - if (cacheBlocked) { + // Unless buffer already got the block, fetch it from icache. + if (!cacheDataValid[tid] || block_PC != cacheDataPC[tid]) { + DPRINTF(Fetch, "[tid:%i]: Attempting to translate and read " + "instruction, starting at PC %s.\n", tid, thisPC); + + bool trans_started = fetchCacheLine(fetchAddr, tid, thisPC.instAddr()); + if (!trans_started) { + if (cacheBlocked) { + ++icacheStallCycles; + } else { + ++fetchMiscStallCycles; + } + } + if (fetchStatus[tid] == IcacheWaitResponse) { ++icacheStallCycles; } else { ++fetchMiscStallCycles; @@ -1090,145 +1124,139 @@ DPRINTF(Fetch, "[tid:%i]: Fetch is squashing!\n", tid); } else if (fetchStatus[tid] == IcacheWaitResponse) { ++icacheStallCycles; - DPRINTF(Fetch, "[tid:%i]: Fetch is waiting cache response!\n", tid); + DPRINTF(Fetch, "[tid:%i]: Fetch is waiting cache response!\n", + tid); + } else if (fetchStatus[tid] == ItlbWait) { + DPRINTF(Fetch, "[tid:%i]: Fetch is waiting ITLB walk to " + "finish! \n", tid); } - // Status is Idle, Squashing, Blocked, or IcacheWaitResponse, so - // fetch should do nothing. + // Status is Idle, Squashing, Blocked, ItlbWait or IcacheWaitResponse + // so fetch should do nothing. return; } ++fetchCycles; - // If we had a stall due to an icache miss, then return. - if (fetchStatus[tid] == IcacheWaitResponse) { - ++icacheStallCycles; - status_change = true; - return; - } - TheISA::PCState nextPC = thisPC; StaticInstPtr staticInst = NULL; StaticInstPtr curMacroop = macroop[tid]; - if (fault == NoFault) { + // If the read of the first instruction was successful, then grab the + // instructions from the rest of the cache line and put them into the + // queue heading to decode. - // If the read of the first instruction was successful, then grab the - // instructions from the rest of the cache line and put them into the - // queue heading to decode. + DPRINTF(Fetch, "[tid:%i]: Adding instructions to queue to " + "decode.\n", tid); - DPRINTF(Fetch, - "[tid:%i]: Adding instructions to queue to decode.\n", tid); + // Need to keep track of whether or not a predicted branch + // ended this fetch block. + bool predictedBranch = false; - // Need to keep track of whether or not a predicted branch - // ended this fetch block. - bool predictedBranch = false; + TheISA::MachInst *cacheInsts = + reinterpret_cast(cacheData[tid]); - TheISA::MachInst *cacheInsts = - reinterpret_cast(cacheData[tid]); + const unsigned numInsts = cacheBlkSize / instSize; + unsigned blkOffset = (fetchAddr - cacheDataPC[tid]) / instSize; - const unsigned numInsts = cacheBlkSize / instSize; - unsigned blkOffset = (fetchAddr - cacheDataPC[tid]) / instSize; + // Loop through instruction memory from the cache. + while (blkOffset < numInsts && + numInst < fetchWidth && + !predictedBranch) { - // Loop through instruction memory from the cache. - while (blkOffset < numInsts && - numInst < fetchWidth && - !predictedBranch) { - - // If we need to process more memory, do it now. - if (!curMacroop && !predecoder.extMachInstReady()) { - if (ISA_HAS_DELAY_SLOT && pcOffset == 0) { - // Walk past any annulled delay slot instructions. - Addr pcAddr = thisPC.instAddr() & BaseCPU::PCMask; - while (fetchAddr != pcAddr && blkOffset < numInsts) { - blkOffset++; - fetchAddr += instSize; - } - if (blkOffset >= numInsts) - break; - } - MachInst inst = TheISA::gtoh(cacheInsts[blkOffset]); - - predecoder.setTC(cpu->thread[tid]->getTC()); - predecoder.moreBytes(thisPC, fetchAddr, inst); - - if (predecoder.needMoreBytes()) { + // If we need to process more memory, do it now. + if (!curMacroop && !predecoder.extMachInstReady()) { + if (ISA_HAS_DELAY_SLOT && pcOffset == 0) { + // Walk past any annulled delay slot instructions. + Addr pcAddr = thisPC.instAddr() & BaseCPU::PCMask; + while (fetchAddr != pcAddr && blkOffset < numInsts) { blkOffset++; fetchAddr += instSize; - pcOffset += instSize; + } + if (blkOffset >= numInsts) + break; + } + MachInst inst = TheISA::gtoh(cacheInsts[blkOffset]); + + predecoder.setTC(cpu->thread[tid]->getTC()); + predecoder.moreBytes(thisPC, fetchAddr, inst); + + if (predecoder.needMoreBytes()) { + blkOffset++; + fetchAddr += instSize; + pcOffset += instSize; + } + } + + // Extract as many instructions and/or microops as we can from + // the memory we've processed so far. + do { + if (!curMacroop) { + if (predecoder.extMachInstReady()) { + ExtMachInst extMachInst; + + extMachInst = predecoder.getExtMachInst(thisPC); + pcOffset = 0; + staticInst = StaticInstPtr(extMachInst, + thisPC.instAddr()); + + // Increment stat of fetched instructions. + ++fetchedInsts; + + if (staticInst->isMacroop()) + curMacroop = staticInst; + } else { + // We need more bytes for this instruction. + break; } } + if (curMacroop) { + staticInst = curMacroop->fetchMicroop(thisPC.microPC()); + if (staticInst->isLastMicroop()) + curMacroop = NULL; + } - // Extract as many instructions and/or microops as we can from - // the memory we've processed so far. - do { - if (!curMacroop) { - if (predecoder.extMachInstReady()) { - ExtMachInst extMachInst; + DynInstPtr instruction = + buildInst(tid, staticInst, curMacroop, + thisPC, nextPC, true); - extMachInst = predecoder.getExtMachInst(thisPC); - pcOffset = 0; - staticInst = StaticInstPtr(extMachInst, - thisPC.instAddr()); + numInst++; - // Increment stat of fetched instructions. - ++fetchedInsts; + nextPC = thisPC; - if (staticInst->isMacroop()) - curMacroop = staticInst; - } else { - // We need more bytes for this instruction. - break; - } - } - if (curMacroop) { - staticInst = curMacroop->fetchMicroop(thisPC.microPC()); - if (staticInst->isLastMicroop()) - curMacroop = NULL; - } + // If we're branching after this instruction, quite fetching + // from the same block then. + predictedBranch |= thisPC.branching(); + predictedBranch |= + lookupAndUpdateNextPC(instruction, nextPC); + if (predictedBranch) { + DPRINTF(Fetch, "Branch detected with PC = %s\n", thisPC); + } - DynInstPtr instruction = - buildInst(tid, staticInst, curMacroop, - thisPC, nextPC, true); + // Move to the next instruction, unless we have a branch. + thisPC = nextPC; - numInst++; + if (instruction->isQuiesce()) { + DPRINTF(Fetch, + "Quiesce instruction encountered, halting fetch!"); + fetchStatus[tid] = QuiescePending; + status_change = true; + break; + } + } while ((curMacroop || predecoder.extMachInstReady()) && + numInst < fetchWidth); + } - nextPC = thisPC; - - // If we're branching after this instruction, quite fetching - // from the same block then. - predictedBranch |= thisPC.branching(); - predictedBranch |= - lookupAndUpdateNextPC(instruction, nextPC); - if (predictedBranch) { - DPRINTF(Fetch, "Branch detected with PC = %s\n", thisPC); - } - - // Move to the next instruction, unless we have a branch. - thisPC = nextPC; - - if (instruction->isQuiesce()) { - DPRINTF(Fetch, - "Quiesce instruction encountered, halting fetch!"); - fetchStatus[tid] = QuiescePending; - status_change = true; - break; - } - } while ((curMacroop || predecoder.extMachInstReady()) && - numInst < fetchWidth); - } - - if (predictedBranch) { - DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch " - "instruction encountered.\n", tid); - } else if (numInst >= fetchWidth) { - DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth " - "for this cycle.\n", tid); - } else if (blkOffset >= cacheBlkSize) { - DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of cache " - "block.\n", tid); - } + if (predictedBranch) { + DPRINTF(Fetch, "[tid:%i]: Done fetching, predicted branch " + "instruction encountered.\n", tid); + } else if (numInst >= fetchWidth) { + DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth " + "for this cycle.\n", tid); + } else if (blkOffset >= cacheBlkSize) { + DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of cache " + "block.\n", tid); } macroop[tid] = curMacroop; @@ -1237,42 +1265,7 @@ if (numInst > 0) { wroteToTimeBuffer = true; } - - // Now that fetching is completed, update the PC to signify what the next - // cycle will be. - if (fault == NoFault) { - pc[tid] = nextPC; - DPRINTF(Fetch, "[tid:%i]: Setting PC to %s.\n", tid, nextPC); - } else { - // We shouldn't be in an icache miss and also have a fault (an ITB - // miss) - if (fetchStatus[tid] == IcacheWaitResponse) { - panic("Fetch should have exited prior to this!"); - } - - // Send the fault to commit. This thread will not do anything - // until commit handles the fault. The only other way it can - // wake up is if a squash comes along and changes the PC. Send the - // fault on a dummy nop. - staticInst = StaticInstPtr(TheISA::NoopMachInst, thisPC.instAddr()); - - DynInstPtr instruction = - buildInst(tid, staticInst, NULL, thisPC, nextPC, false); - - TheISA::advancePC(nextPC, staticInst); - instruction->setPredTarg(nextPC); - instruction->fault = fault; - - DPRINTF(Fetch, "[tid:%i]: Blocked, need to handle the trap.\n",tid); - - fetchStatus[tid] = TrapPending; - status_change = true; - - DPRINTF(Fetch, "[tid:%i]: fault (%s) detected @ PC %s", - tid, fault->name(), thisPC); - DPRINTF(Fetch, "[tid:%i]: Sending a nop down the pipe. [sn:%lli]\n", - tid, inst_seq); - } + pc[tid] = thisPC; } template