# Node ID 8a1419dbbfa65b61cdd341905f3f919a7d885440 # Parent df24b9af42c72606f1fa8e5aa0502b53e81ea176 diff --git a/src/mem/cache/cache.cc b/src/mem/cache/cache.cc --- a/src/mem/cache/cache.cc +++ b/src/mem/cache/cache.cc @@ -670,6 +670,18 @@ // the delay provided by the crossbar Tick forward_time = clockEdge(forwardLatency) + pkt->headerDelay; + if (pkt->cmd == MemCmd::LockedRMWWriteReq) { + // For LockedRMW accesses, we mark the block inaccessible after the + // read (see below), to make sure no one gets in before the write. + // Now that the write is here, mark it accessible again, so the + // write will succeed. LockedRMWReadReq brings the block in in + // exclusive mode, so we know it was previously writable. + CacheBlk *blk = tags->findBlock(pkt->getAddr(), pkt->isSecure()); + assert(blk && blk->isValid()); + assert(!blk->isReadable() && !blk->isWritable()); + blk->status |= (BlkReadable | BlkWritable); + } + // We use lookupLatency here because it is used to specify the latency // to access. Cycles lat = lookupLatency; @@ -718,6 +730,56 @@ next_pf_time = prefetcher->notify(pkt); } + // handle special cases for LockedRMW transactions + if (pkt->isLockedRMW()) { + Addr blk_addr = blockAlign(pkt->getAddr()); + + if (pkt->isRead()) { + // Read hit for LockedRMW. Since it requires exclusive + // permissions, there should be no outstanding access. + assert(!mshrQueue.findMatch(blk_addr, pkt->isSecure())); + // The keys to LockedRMW are that (1) we always have an MSHR + // allocated during the RMW interval to catch snoops and + // defer them until after the RMW completes, and (2) we + // clear permissions on the block to turn any upstream + // access other than the matching write into a miss, causing + // it to append to the MSHR as well. + + // Because we hit in the cache, we have to fake an MSHR to + // achieve part (1). If the read had missed, this MSHR + // would get allocated as part of normal miss processing. + // Basically we need to get the MSHR in the same state as if + // we had missed and just received the response. + Request *req2 = new Request(*(pkt->req)); + Packet *pkt2 = new Packet(req2, pkt->cmd); + MSHR *mshr = allocateMissBuffer(pkt2, curTick(), true); + // Mark the MSHR "in service" (even though it's not) to prevent + // the cache from sending out a request. + mshrQueue.markInService(mshr, false); + // Part (2): mark block inaccessible + assert(blk); + blk->status &= ~(BlkReadable | BlkWritable); + } else { + assert(pkt->isWrite()); + // All LockedRMW writes come here, as they cannot miss. + // Need to undo the two things described above. Block + // permissions were already restored earlier in this + // function, prior to the access() call. Now we just need + // to clear out the MSHR. + + // Read should have already allocated MSHR. + MSHR *mshr = mshrQueue.findMatch(blk_addr, pkt->isSecure()); + assert(mshr); + // Fake up a packet and "respond" to the still-pending + // LockedRMWRead, to process any pending targets and clear + // out the MSHR + Packet *resp_pkt = + new Packet(pkt->req, MemCmd::LockedRMWWriteResp); + resp_pkt->senderState = mshr; + recvTimingResp(resp_pkt); + } + } + if (needsResponse) { pkt->makeTimingResponse(); // @todo: Make someone pay for this @@ -750,6 +812,9 @@ MSHR *mshr = pkt->req->isUncacheable() ? nullptr : mshrQueue.findMatch(blk_addr, pkt->isSecure()); + // See note above: these should always hit + assert(pkt->cmd != MemCmd::LockedRMWWriteReq); + // Software prefetch handling: // To keep the core from waiting on data it won't look at // anyway, send back a response with dummy data. Miss handling @@ -1290,7 +1355,7 @@ // if this is a write, we should be looking at an uncacheable // write - if (pkt->isWrite()) { + if (pkt->isWrite() && pkt->cmd != MemCmd::LockedRMWWriteResp) { assert(pkt->req->isUncacheable()); handleUncacheableWriteResp(pkt); return; @@ -1354,7 +1419,26 @@ // First offset for critical word first calculations int initial_offset = initial_tgt->pkt->getOffset(blkSize); - while (mshr->hasTargets()) { + if (pkt->cmd == MemCmd::LockedRMWWriteResp) { + // This is the fake response generated by the write half of the RMW; + // see comments in recvTimingReq(). The first target on the list + // should be the LockedRMWReadReq which has already been satisfied, + // either because it was a hit (and the MSHR was allocated in + // recvTimingReq()) or because it was left there after the inital + // response (using the 'early_exit' flag below). In either case, we + // don't need to respond now, so pop it off to prevent the loop + // below from generating another response. + assert(initial_tgt->pkt->cmd == MemCmd::LockedRMWReadReq); + delete initial_tgt->pkt->req; + delete initial_tgt->pkt; + mshr->popTarget(); + initial_tgt = nullptr; + } + + // Early exit flag for LockedRMWRead + bool early_exit = false; + + while (mshr->hasTargets() && !early_exit) { MSHR::Target *target = mshr->getTarget(); Packet *tgt_pkt = target->pkt; @@ -1419,6 +1503,22 @@ assert(tgt_pkt->req->masterId() < system->maxMasters()); missLatency[tgt_pkt->cmdToIndex()][tgt_pkt->req->masterId()] += completion_time - target->recvTime; + + if (tgt_pkt->cmd == MemCmd::LockedRMWReadReq) { + // We're going to leave a target in the MSHR until the + // write half of the RMW occurs (see comments above in + // recvTimingReq()). Since we'll be using the current + // request packet (which has the allocated data pointer) + // to form the response, we have to allocate a new dummy + // packet to save in the MSHR target. + Request *req = new Request(*(tgt_pkt->req)); + target->pkt = new Packet(req, tgt_pkt->cmd); + // skip the rest of target processing after we + // send the response + early_exit = true; + // Mark block inaccessible until write arrives + blk->status &= ~(BlkReadable | BlkWritable); + } } else if (pkt->cmd == MemCmd::UpgradeFailResp) { // failed StoreCond upgrade assert(tgt_pkt->cmd == MemCmd::StoreCondReq || @@ -1430,6 +1530,11 @@ completion_time += clockEdge(responseLatency) + pkt->payloadDelay; tgt_pkt->req->setExtraData(0); + } else if (pkt->cmd == MemCmd::LockedRMWWriteResp) { + // Fake response on LockedRMW completion, see above. + // Since the data is already in the cache, we just use + // responseLatency with no extra penalties. + completion_time = clockEdge(responseLatency); } else { // not a cache fill, just forwarding response // responseLatency is the latency of the return path @@ -1483,73 +1588,78 @@ panic("Illegal target->source enum %d\n", target->source); } - mshr->popTarget(); + if (!early_exit) + mshr->popTarget(); } - if (blk && blk->isValid()) { - // an invalidate response stemming from a write line request - // should not invalidate the block, so check if the - // invalidation should be discarded - if (is_invalidate || mshr->hasPostInvalidate()) { - invalidateBlock(blk); - } else if (mshr->hasPostDowngrade()) { - blk->status &= ~BlkWritable; + if (!early_exit) { + if (blk && blk->isValid()) { + // an invalidate response stemming from a write line request + // should not invalidate the block, so check if the + // invalidation should be discarded + if (is_invalidate || mshr->hasPostInvalidate()) { + invalidateBlock(blk); + } else if (mshr->hasPostDowngrade()) { + blk->status &= ~BlkWritable; + } + } + + if (mshr->promoteDeferredTargets()) { + // avoid later read getting stale data while write miss is + // outstanding.. see comment in timingAccess() + if (blk) { + blk->status &= ~BlkReadable; + } + mshrQueue.markPending(mshr); + schedMemSideSendEvent(clockEdge() + pkt->payloadDelay); + } else { + mshrQueue.deallocate(mshr); + if (wasFull && !mshrQueue.isFull()) { + clearBlocked(Blocked_NoMSHRs); + } + + // Request the bus for a prefetch if this deallocation freed enough + // MSHRs for a prefetch to take place + if (prefetcher && mshrQueue.canPrefetch()) { + Tick next_pf_time = + std::max(prefetcher->nextPrefetchReadyTime(), clockEdge()); + if (next_pf_time != MaxTick) + schedMemSideSendEvent(next_pf_time); + } + } + + // if we used temp block, check to see if its valid and then + // clear it out + if (blk == tempBlock && tempBlock->isValid()) { + // We use forwardLatency here because we are copying + // Writebacks/CleanEvicts to write buffer. It specifies + // the latency to allocate an internal buffer and to + // schedule an event to the queued port. + if (blk->isDirty() || writebackClean) { + PacketPtr wbPkt = writebackBlk(blk); + allocateWriteBuffer(wbPkt, forward_time); + // Set BLOCK_CACHED flag if cached above. + if (isCachedAbove(wbPkt)) + wbPkt->setBlockCached(); + } else { + PacketPtr wcPkt = cleanEvictBlk(blk); + // Check to see if block is cached above. If not allocate + // write buffer + if (isCachedAbove(wcPkt)) + delete wcPkt; + else + allocateWriteBuffer(wcPkt, forward_time); + } + blk->invalidate(); } } - if (mshr->promoteDeferredTargets()) { - // avoid later read getting stale data while write miss is - // outstanding.. see comment in timingAccess() - if (blk) { - blk->status &= ~BlkReadable; - } - mshrQueue.markPending(mshr); - schedMemSideSendEvent(clockEdge() + pkt->payloadDelay); - } else { - mshrQueue.deallocate(mshr); - if (wasFull && !mshrQueue.isFull()) { - clearBlocked(Blocked_NoMSHRs); - } - - // Request the bus for a prefetch if this deallocation freed enough - // MSHRs for a prefetch to take place - if (prefetcher && mshrQueue.canPrefetch()) { - Tick next_pf_time = std::max(prefetcher->nextPrefetchReadyTime(), - clockEdge()); - if (next_pf_time != MaxTick) - schedMemSideSendEvent(next_pf_time); - } - } // reset the xbar additional timinig as it is now accounted for pkt->headerDelay = pkt->payloadDelay = 0; // copy writebacks to write buffer doWritebacks(writebacks, forward_time); - // if we used temp block, check to see if its valid and then clear it out - if (blk == tempBlock && tempBlock->isValid()) { - // We use forwardLatency here because we are copying - // Writebacks/CleanEvicts to write buffer. It specifies the latency to - // allocate an internal buffer and to schedule an event to the - // queued port. - if (blk->isDirty() || writebackClean) { - PacketPtr wbPkt = writebackBlk(blk); - allocateWriteBuffer(wbPkt, forward_time); - // Set BLOCK_CACHED flag if cached above. - if (isCachedAbove(wbPkt)) - wbPkt->setBlockCached(); - } else { - PacketPtr wcPkt = cleanEvictBlk(blk); - // Check to see if block is cached above. If not allocate - // write buffer - if (isCachedAbove(wcPkt)) - delete wcPkt; - else - allocateWriteBuffer(wcPkt, forward_time); - } - blk->invalidate(); - } - DPRINTF(CacheVerbose, "Leaving %s with %s for addr %#llx\n", __func__, pkt->cmdString(), pkt->getAddr()); delete pkt; diff --git a/src/mem/cache/mshr.hh b/src/mem/cache/mshr.hh --- a/src/mem/cache/mshr.hh +++ b/src/mem/cache/mshr.hh @@ -124,7 +124,7 @@ const Tick recvTime; //!< Time when request was received (for stats) const Tick readyTime; //!< Time when request is ready to be serviced const Counter order; //!< Global order (for memory consistency mgmt) - const PacketPtr pkt; //!< Pending request packet. + PacketPtr pkt; //!< Pending request packet. const Source source; //!< Request from cpu, memory, or prefetcher? const bool markedPending; //!< Did we mark upstream MSHR //!< as downstreamPending? diff --git a/src/mem/packet.hh b/src/mem/packet.hh --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -107,6 +107,10 @@ StoreCondReq, StoreCondFailReq, // Failed StoreCondReq in MSHR (never sent) StoreCondResp, + LockedRMWReadReq, + LockedRMWReadResp, + LockedRMWWriteReq, + LockedRMWWriteResp, SwapReq, SwapResp, MessageReq, @@ -147,6 +151,7 @@ IsSWPrefetch, IsHWPrefetch, IsLlsc, //!< Alpha/MIPS LL or SC access + IsLockedRMW, //!< x86 locked RMW access HasData, //!< There is an associated payload IsError, //!< Error response IsPrint, //!< Print state matching address (for debugging) @@ -207,6 +212,7 @@ */ bool hasData() const { return testCmdAttrib(HasData); } bool isLLSC() const { return testCmdAttrib(IsLlsc); } + bool isLockedRMW() const { return testCmdAttrib(IsLockedRMW); } bool isSWPrefetch() const { return testCmdAttrib(IsSWPrefetch); } bool isHWPrefetch() const { return testCmdAttrib(IsHWPrefetch); } bool isPrefetch() const { return testCmdAttrib(IsSWPrefetch) || @@ -522,6 +528,7 @@ return resp_cmd.hasData(); } bool isLLSC() const { return cmd.isLLSC(); } + bool isLockedRMW() const { return cmd.isLockedRMW(); } bool isError() const { return cmd.isError(); } bool isPrint() const { return cmd.isPrint(); } bool isFlush() const { return cmd.isFlush(); } @@ -779,6 +786,8 @@ return MemCmd::LoadLockedReq; else if (req->isPrefetch()) return MemCmd::SoftPFReq; + else if (req->isLockedRMW()) + return MemCmd::LockedRMWReadReq; else return MemCmd::ReadReq; } @@ -793,6 +802,8 @@ return MemCmd::StoreCondReq; else if (req->isSwap()) return MemCmd::SwapReq; + else if (req->isLockedRMW()) + return MemCmd::LockedRMWWriteReq; else return MemCmd::WriteReq; } diff --git a/src/mem/packet.cc b/src/mem/packet.cc --- a/src/mem/packet.cc +++ b/src/mem/packet.cc @@ -159,6 +159,18 @@ /* StoreCondResp */ { SET3(IsWrite, IsLlsc, IsResponse), InvalidCmd, "StoreCondResp" }, + /* LockedRMWReadReq */ + { SET5(IsRead, IsLockedRMW, NeedsWritable, IsRequest, NeedsResponse), + LockedRMWReadResp, "LockedRMWReadReq" }, + /* LockedRMWReadResp */ + { SET5(IsRead, IsLockedRMW, NeedsWritable, IsResponse, HasData), + InvalidCmd, "LockedRMWReadResp" }, + /* LockedRMWWriteReq */ + { SET6(IsWrite, IsLockedRMW, NeedsWritable, IsRequest, NeedsResponse, + HasData), LockedRMWWriteResp, "LockedRMWWriteReq" }, + /* LockedRMWWriteResp */ + { SET4(IsWrite, IsLockedRMW, NeedsWritable, IsResponse), + InvalidCmd, "LockedRMWWriteResp" }, /* SwapReq -- for Swap ldstub type operations */ { SET6(IsRead, IsWrite, NeedsWritable, IsRequest, HasData, NeedsResponse), SwapResp, "SwapReq" },