# Node ID 9e2898b0e9bd52bab669bdafa2cdf741cee193dc # Parent 2f9aecba23623a188e3c775530cdc47dc78dec6e diff --git a/configs/common/O3_ARM_v7a.py b/configs/common/O3_ARM_v7a.py --- a/configs/common/O3_ARM_v7a.py +++ b/configs/common/O3_ARM_v7a.py @@ -42,7 +42,7 @@ count = 1 -# Floating point and SIMD instructions +# Floating point and SIMD instructions class O3_ARM_v7a_FP(FUDesc): opList = [ OpDesc(opClass='SimdAdd', opLat=4), OpDesc(opClass='SimdAddAcc', opLat=4), @@ -119,6 +119,7 @@ commitToRenameDelay = 1 commitToIEWDelay = 1 fetchWidth = 3 + fetchBufferSize = 16 fetchToDecodeDelay = 3 decodeWidth = 3 decodeToRenameDelay = 2 @@ -166,7 +167,7 @@ write_buffers = 16 is_top_level = 'true' -# TLB Cache +# TLB Cache # Use a cache as a L2 TLB class O3_ARM_v7aWalkCache(BaseCache): hit_latency = 4 diff --git a/src/SConscript b/src/SConscript --- a/src/SConscript +++ b/src/SConscript @@ -148,7 +148,7 @@ def __ge__(self, other): return self.filename >= other.filename def __eq__(self, other): return self.filename == other.filename def __ne__(self, other): return self.filename != other.filename - + class Source(SourceFile): '''Add a c/c++ source file to the build''' def __init__(self, source, Werror=True, swig=False, **guards): @@ -164,7 +164,7 @@ modules = {} tnodes = {} symnames = {} - + def __init__(self, package, source, **guards): '''specify the python package, the source file, and any guards''' super(PySource, self).__init__(source, **guards) diff --git a/src/cpu/o3/O3CPU.py b/src/cpu/o3/O3CPU.py --- a/src/cpu/o3/O3CPU.py +++ b/src/cpu/o3/O3CPU.py @@ -60,6 +60,7 @@ "delay") commitToFetchDelay = Param.Cycles(1, "Commit to fetch delay") fetchWidth = Param.Unsigned(8, "Fetch width") + fetchBufferSize = Param.Unsigned(64, "Fetch buffer size in bytes") renameToDecodeDelay = Param.Cycles(1, "Rename to decode delay") iewToDecodeDelay = Param.Cycles(1, "Issue/Execute/Writeback to decode " diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -274,9 +274,9 @@ bool lookupAndUpdateNextPC(DynInstPtr &inst, TheISA::PCState &pc); /** - * Fetches the cache line that contains fetch_PC. Returns any + * Fetches the cache line that contains the fetch PC. Returns any * fault that happened. Puts the data into the class variable - * cacheData. + * fetchBuffer, which may not hold the entire fetched cache line. * @param vaddr The memory address that is being fetched from. * @param ret_fault The fault reference that will be set to the result of * the icache access. @@ -339,10 +339,10 @@ */ void fetch(bool &status_change); - /** Align a PC to the start of an I-cache block. */ - Addr icacheBlockAlignPC(Addr addr) + /** Align a PC to the start of a fetch buffer block. */ + Addr fetchBufferAlignPC(Addr addr) { - return (addr & ~(cacheBlkMask)); + return (addr & ~(fetchBufferMask)); } /** The decoder. */ @@ -463,17 +463,22 @@ /** Cache block size. */ unsigned int cacheBlkSize; - /** Mask to get a cache block's address. */ - Addr cacheBlkMask; + /** The size of the fetch buffer in bytes. The fetch buffer + * itself may be smaller than a cache line. + */ + unsigned fetchBufferSize; - /** The cache line being fetched. */ - uint8_t *cacheData[Impl::MaxThreads]; + /** Mask to align a fetch address to a fetch buffer boundary. */ + Addr fetchBufferMask; - /** The PC of the cacheline that has been loaded. */ - Addr cacheDataPC[Impl::MaxThreads]; + /** The fetch data that is being fetched and buffered. */ + uint8_t *fetchBuffer[Impl::MaxThreads]; - /** Whether or not the cache data is valid. */ - bool cacheDataValid[Impl::MaxThreads]; + /** The PC of the first instruction loaded into the fetch buffer. */ + Addr fetchBufferPC[Impl::MaxThreads]; + + /** Whether or not the fetch buffer data is valid. */ + bool fetchBufferValid[Impl::MaxThreads]; /** Size of instructions. */ int instSize; diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -82,7 +82,8 @@ retryPkt(NULL), retryTid(InvalidThreadID), cacheBlkSize(cpu->cacheLineSize()), - cacheBlkMask(cacheBlkSize - 1), + fetchBufferSize(params->fetchBufferSize), + fetchBufferMask(fetchBufferSize - 1), numThreads(params->numThreads), numFetchingThreads(params->smtNumFetchingThreads), finishTranslationEvent(this) @@ -95,6 +96,12 @@ fatal("fetchWidth (%d) is larger than compiled limit (%d),\n" "\tincrease MaxWidth in src/cpu/o3/impl.hh\n", fetchWidth, static_cast(Impl::MaxWidth)); + if (fetchBufferSize > cacheBlkSize) + fatal("fetch buffer size (%u bytes) is greater than the cache " + "block size (%u bytes)\n", fetchBufferSize, cacheBlkSize); + if (cacheBlkSize % fetchBufferSize) + fatal("cache block (%u bytes) is not a multiple of the " + "fetch buffer (%u bytes)\n", cacheBlkSize, fetchBufferSize); std::string policy = params->smtFetchPolicy; @@ -128,16 +135,19 @@ instSize = sizeof(TheISA::MachInst); for (int i = 0; i < Impl::MaxThreads; i++) { - decoder[i] = new TheISA::Decoder; + decoder[i] = NULL; + fetchBuffer[i] = NULL; + fetchBufferPC[i] = 0; + fetchBufferValid[i] = false; } branchPred = params->branchPred; for (ThreadID tid = 0; tid < numThreads; tid++) { - // Create space to store a cache line. - cacheData[tid] = new uint8_t[cacheBlkSize]; - cacheDataPC[tid] = 0; - cacheDataValid[tid] = false; + decoder[tid] = new TheISA::Decoder; + // Create space to buffer the cache line data, + // which may not hold the entire cache line. + fetchBuffer[tid] = new uint8_t[fetchBufferSize]; } } @@ -344,6 +354,11 @@ wroteToTimeBuffer = false; _status = Inactive; + + for (ThreadID tid = 0; tid < numThreads; ++tid) { + fetchBufferPC[tid] = 0; + fetchBufferValid[tid] = false; + } } template @@ -365,8 +380,8 @@ return; } - memcpy(cacheData[tid], pkt->getPtr(), cacheBlkSize); - cacheDataValid[tid] = true; + memcpy(fetchBuffer[tid], pkt->getPtr(), fetchBufferSize); + fetchBufferValid[tid] = true; // Wake up the CPU (if it went to sleep and was waiting on // this completion event). @@ -565,17 +580,17 @@ return false; } - // Align the fetch address so it's at the start of a cache block. - Addr block_PC = icacheBlockAlignPC(vaddr); + // Align the fetch address to the start of a fetch buffer segment. + Addr fetchBufferBlockPC = fetchBufferAlignPC(vaddr); DPRINTF(Fetch, "[tid:%i] Fetching cache line %#x for addr %#x\n", - tid, block_PC, vaddr); + tid, fetchBufferBlockPC, vaddr); // Setup the memReq to do a read of the first instruction's address. // Set the appropriate read size and flags as well. // Build request here. RequestPtr mem_req = - new Request(tid, block_PC, cacheBlkSize, Request::INST_FETCH, + new Request(tid, fetchBufferBlockPC, fetchBufferSize, Request::INST_FETCH, cpu->instMasterId(), pc, cpu->thread[tid]->contextId(), tid); memReq[tid] = mem_req; @@ -593,7 +608,7 @@ DefaultFetch::finishTranslation(Fault fault, RequestPtr mem_req) { ThreadID tid = mem_req->threadId(); - Addr block_PC = mem_req->getVaddr(); + Addr fetchBufferBlockPC = mem_req->getVaddr(); assert(!cpu->switchedOut()); @@ -626,10 +641,10 @@ // Build packet here. PacketPtr data_pkt = new Packet(mem_req, MemCmd::ReadReq); - data_pkt->dataDynamicArray(new uint8_t[cacheBlkSize]); + data_pkt->dataDynamicArray(new uint8_t[fetchBufferSize]); - cacheDataPC[tid] = block_PC; - cacheDataValid[tid] = false; + fetchBufferPC[tid] = fetchBufferBlockPC; + fetchBufferValid[tid] = false; DPRINTF(Fetch, "Fetch: Doing instruction read.\n"); fetchedCacheLines++; @@ -1146,13 +1161,13 @@ fetchStatus[tid] = Running; status_change = true; } else if (fetchStatus[tid] == Running) { - // Align the fetch PC so its at the start of a cache block. - Addr block_PC = icacheBlockAlignPC(fetchAddr); + // Align the fetch PC so its at the start of a fetch buffer segment. + Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr); // If buffer is no longer valid or fetchAddr has moved to point // to the next cache block, AND we have no remaining ucode // from a macro-op, then start fetch from icache. - if (!(cacheDataValid[tid] && block_PC == cacheDataPC[tid]) + if (!(fetchBufferValid[tid] && fetchBufferBlockPC == fetchBufferPC[tid]) && !inRom && !macroop[tid]) { DPRINTF(Fetch, "[tid:%i]: Attempting to translate and read " "instruction, starting at PC %s.\n", tid, thisPC); @@ -1203,10 +1218,10 @@ bool predictedBranch = false; TheISA::MachInst *cacheInsts = - reinterpret_cast(cacheData[tid]); + reinterpret_cast(fetchBuffer[tid]); - const unsigned numInsts = cacheBlkSize / instSize; - unsigned blkOffset = (fetchAddr - cacheDataPC[tid]) / instSize; + const unsigned numInsts = fetchBufferSize / instSize; + unsigned blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize; // Loop through instruction memory from the cache. // Keep issuing while fetchWidth is available and branch is not @@ -1219,12 +1234,13 @@ bool needMem = !inRom && !curMacroop && !decoder[tid]->instReady(); fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask; - Addr block_PC = icacheBlockAlignPC(fetchAddr); + Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr); if (needMem) { // If buffer is no longer valid or fetchAddr has moved to point // to the next cache block then start fetch from icache. - if (!cacheDataValid[tid] || block_PC != cacheDataPC[tid]) + if (!fetchBufferValid[tid] || + fetchBufferBlockPC != fetchBufferPC[tid]) break; if (blkOffset >= numInsts) { @@ -1320,7 +1336,7 @@ if (newMacro) { fetchAddr = thisPC.instAddr() & BaseCPU::PCMask; - blkOffset = (fetchAddr - cacheDataPC[tid]) / instSize; + blkOffset = (fetchAddr - fetchBufferPC[tid]) / instSize; pcOffset = 0; curMacroop = NULL; } @@ -1342,9 +1358,9 @@ } else if (numInst >= fetchWidth) { DPRINTF(Fetch, "[tid:%i]: Done fetching, reached fetch bandwidth " "for this cycle.\n", tid); - } else if (blkOffset >= cacheBlkSize) { - DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of cache " - "block.\n", tid); + } else if (blkOffset >= fetchBufferSize) { + DPRINTF(Fetch, "[tid:%i]: Done fetching, reached the end of the" + "fetch buffer.\n", tid); } macroop[tid] = curMacroop; @@ -1359,8 +1375,8 @@ // pipeline a fetch if we're crossing a cache boundary and not in // a state that would preclude fetching fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask; - Addr block_PC = icacheBlockAlignPC(fetchAddr); - issuePipelinedIfetch[tid] = block_PC != cacheDataPC[tid] && + Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr); + issuePipelinedIfetch[tid] = fetchBufferBlockPC != fetchBufferPC[tid] && fetchStatus[tid] != IcacheWaitResponse && fetchStatus[tid] != ItlbWait && fetchStatus[tid] != IcacheWaitRetry && @@ -1567,11 +1583,11 @@ Addr pcOffset = fetchOffset[tid]; Addr fetchAddr = (thisPC.instAddr() + pcOffset) & BaseCPU::PCMask; - // Align the fetch PC so its at the start of a cache block. - Addr block_PC = icacheBlockAlignPC(fetchAddr); + // Align the fetch PC so its at the start of a fetch buffer segment. + Addr fetchBufferBlockPC = fetchBufferAlignPC(fetchAddr); // Unless buffer already got the block, fetch it from icache. - if (!(cacheDataValid[tid] && block_PC == cacheDataPC[tid])) { + if (!(fetchBufferValid[tid] && fetchBufferBlockPC == fetchBufferPC[tid])) { DPRINTF(Fetch, "[tid:%i]: Issuing a pipelined I-cache access, " "starting at PC %s.\n", tid, thisPC);