diff --git a/configs/common/O3_ARM_v7a.py b/configs/common/O3_ARM_v7a.py --- a/configs/common/O3_ARM_v7a.py +++ b/configs/common/O3_ARM_v7a.py @@ -120,6 +120,7 @@ commitToRenameDelay = 1 commitToIEWDelay = 1 fetchWidth = 3 + fetchBufferSize = 16 fetchToDecodeDelay = 3 decodeWidth = 3 decodeToRenameDelay = 2 diff --git a/src/cpu/o3/O3CPU.py b/src/cpu/o3/O3CPU.py --- a/src/cpu/o3/O3CPU.py +++ b/src/cpu/o3/O3CPU.py @@ -60,6 +60,7 @@ "delay") commitToFetchDelay = Param.Cycles(1, "Commit to fetch delay") fetchWidth = Param.Unsigned(8, "Fetch width") + fetchBufferSize = Param.Unsigned(64, "Fetch buffer size in bytes") renameToDecodeDelay = Param.Cycles(1, "Rename to decode delay") iewToDecodeDelay = Param.Cycles(1, "Issue/Execute/Writeback to decode " diff --git a/src/cpu/o3/fetch.hh b/src/cpu/o3/fetch.hh --- a/src/cpu/o3/fetch.hh +++ b/src/cpu/o3/fetch.hh @@ -454,6 +454,20 @@ /** The width of fetch in instructions. */ unsigned fetchWidth; + /** The size of the fetch buffer in bytes. The fetch buffer + * itself may be smaller than a cache line; in this case, if + * the address of a fetched instruction is outside the bounds + * of the fetch buffer, fetch must get the instruction from + * the cache, even if the buffered line (cacheData) is still + * valid. + */ + unsigned fetchBufferSize; + + /** The offset within a fetched cache line that is + * the base of the actual fetch buffer. + */ + unsigned fetchBufferOffset[Impl::MaxThreads]; + /** Is the cache blocked? If so no threads can access it. */ bool cacheBlocked; diff --git a/src/cpu/o3/fetch_impl.hh b/src/cpu/o3/fetch_impl.hh --- a/src/cpu/o3/fetch_impl.hh +++ b/src/cpu/o3/fetch_impl.hh @@ -79,6 +79,7 @@ iewToFetchDelay(params->iewToFetchDelay), commitToFetchDelay(params->commitToFetchDelay), fetchWidth(params->fetchWidth), + fetchBufferSize(params->fetchBufferSize), retryPkt(NULL), retryTid(InvalidThreadID), numThreads(params->numThreads), @@ -128,6 +129,7 @@ for (int i = 0; i < Impl::MaxThreads; i++) { cacheData[i] = NULL; decoder[i] = new TheISA::Decoder; + fetchBufferOffset[i] = 0; } branchPred = params->branchPred; @@ -354,6 +356,13 @@ // Size of cache block. cacheBlkSize = cpu->getInstPort().peerBlockSize(); + // Fetch buffer should be smaller than, or equal in size, to + // the size of a cache block. + if (fetchBufferSize > cacheBlkSize) { + fatal("fetch buffer size (%u bytes) is greater than the cache " + "block size (%u bytes)\n", fetchBufferSize, cacheBlkSize); + } + // Create mask to get rid of offset bits. cacheBlkMask = (cacheBlkSize - 1); @@ -363,6 +372,7 @@ cacheData[tid] = new uint8_t[cacheBlkSize]; cacheDataPC[tid] = 0; cacheDataValid[tid] = false; + fetchBufferOffset[tid] = 0; } } @@ -387,6 +397,7 @@ memcpy(cacheData[tid], pkt->getPtr(), cacheBlkSize); cacheDataValid[tid] = true; + fetchBufferOffset[tid] = 0; // Wake up the CPU (if it went to sleep and was waiting on // this completion event). @@ -1167,6 +1178,20 @@ // Align the fetch PC so its at the start of a cache block. Addr block_PC = icacheBlockAlignPC(fetchAddr); + // Get the byte offset into the cache line of the instruction + unsigned cacheLineOffset = fetchAddr - cacheDataPC[tid]; + + // If the instruction we're trying to fetch is outside the bounds + // of the fetch buffer itself, we force fetch to re-fetch the cache + // line. + if ((cacheLineOffset < fetchBufferOffset[tid]) || + (cacheLineOffset - fetchBufferOffset[tid] >= fetchBufferSize)) { + // Either we are fetching past the end of the fetch buffer, + // or we have branched back before the start of the fetch buffer + cacheDataValid[tid] = false; + fetchBufferOffset[tid] = cacheLineOffset; + } + // If buffer is no longer valid or fetchAddr has moved to point // to the next cache block, AND we have no remaining ucode // from a macro-op, then start fetch from icache.