# Node ID 5b59da968ce8627e75ddc1a8d0275e4bbc00098a # Parent fbdaa08aaa426b9f4660c366f934ccb670d954ec diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -524,6 +524,18 @@ // the delay provided by the crossbar Tick forward_time = clockEdge(forwardLatency) + pkt->headerDelay; + if (pkt->cmd == MemCmd::LockedRMWWriteReq) { + // For LockedRMW accesses, we mark the block inaccessible after the + // read (see below), to make sure no one gets in before the write. + // Now that the write is here, mark it accessible again, so the + // write will succeed. LockedRMWReadReq brings the block in in + // exclusive mode, so we know it was previously writable. + BlkType *blk = tags->findBlock(pkt->getAddr(), pkt->isSecure()); + assert(blk && blk->isValid()); + assert(!blk->isReadable() && !blk->isWritable()); + blk->status |= (BlkReadable | BlkWritable); + } + // We use lookupLatency here because it is used to specify the latency // to access. Cycles lat = lookupLatency; @@ -578,6 +590,56 @@ next_pf_time = prefetcher->notify(pkt); } + // handle special cases for LockedRMW transactions + if (pkt->isLockedRMW()) { + Addr blk_addr = blockAlign(pkt->getAddr()); + + if (pkt->isRead()) { + // Read hit for LockedRMW. Since it requires exclusive + // permissions, there should be no outstanding access. + assert(!mshrQueue.findMatch(blk_addr, pkt->isSecure())); + // The keys to LockedRMW are that (1) we always have an MSHR + // allocated during the RMW interval to catch snoops and + // defer them until after the RMW completes, and (2) we + // clear permissions on the block to turn any upstream + // access other than the matching write into a miss, causing + // it to append to the MSHR as well. + + // Because we hit in the cache, we have to fake an MSHR to + // achieve part (1). If the read had missed, this MSHR + // would get allocated as part of normal miss processing. + // Basically we need to get the MSHR in the same state as if + // we had missed and just received the response. + Request *req2 = new Request(*(pkt->req)); + Packet *pkt2 = new Packet(req2, pkt->cmd); + MSHR *mshr = allocateMissBuffer(pkt2, curTick(), true); + // Mark the MSHR "in service" (even though it's not) to prevent + // the cache from sending out a request. + mshr->queue->markInService(mshr, false); + // Part (2): mark block inaccessible + assert(blk); + blk->status &= ~(BlkReadable | BlkWritable); + } else { + assert(pkt->isWrite()); + // All LockedRMW writes come here, as they cannot miss. + // Need to undo the two things described above. Block + // permissions were already restored earlier in this + // function, prior to the access() call. Now we just need + // to clear out the MSHR. + + // Read should have already allocated MSHR. + MSHR *mshr = mshrQueue.findMatch(blk_addr, pkt->isSecure()); + assert(mshr); + // Fake up a packet and "respond" to the still-pending + // LockedRMWRead, to process any pending targets and clear + // out the MSHR + Packet *resp_pkt = + new Packet(pkt->req, MemCmd::LockedRMWWriteResp); + resp_pkt->senderState = mshr; + recvTimingResp(resp_pkt); + } + } + if (needsResponse) { pkt->makeTimingResponse(); // @todo: Make someone pay for this @@ -605,6 +667,9 @@ MSHR *mshr = pkt->req->isUncacheable() ? nullptr : mshrQueue.findMatch(blk_addr, pkt->isSecure()); + // See note above: these should always hit + assert(pkt->cmd != MemCmd::LockedRMWWriteReq); + // Software prefetch handling: // To keep the core from waiting on data it won't look at // anyway, send back a response with dummy data. Miss handling @@ -1134,10 +1199,29 @@ // First offset for critical word first calculations int initial_offset = initial_tgt->pkt->getOffset(blkSize); + if (pkt->cmd == MemCmd::LockedRMWWriteResp) { + // This is the fake response generated by the write half of the RMW; + // see comments in recvTimingReq(). The first target on the list + // should be the LockedRMWReadReq which has already been satisfied, + // either because it was a hit (and the MSHR was allocated in + // recvTimingReq()) or because it was left there after the inital + // response (using the 'early_exit' flag below). In either case, we + // don't need to respond now, so pop it off to prevent the loop + // below from generating another response. + assert(initial_tgt->pkt->cmd == MemCmd::LockedRMWReadReq); + delete initial_tgt->pkt->req; + delete initial_tgt->pkt; + mshr->popTarget(); + initial_tgt = nullptr; + } + while (mshr->hasTargets()) { MSHR::Target *target = mshr->getTarget(); Packet *tgt_pkt = target->pkt; + // Early exit flag for LockedRMWRead + bool early_exit = false; + switch (target->source) { case MSHR::Target::FromCPU: Tick completion_time; @@ -1195,6 +1279,22 @@ assert(tgt_pkt->req->masterId() < system->maxMasters()); missLatency[tgt_pkt->cmdToIndex()][tgt_pkt->req->masterId()] += completion_time - target->recvTime; + + if (tgt_pkt->cmd == MemCmd::LockedRMWReadReq) { + // We're going to leave a target in the MSHR until the + // write half of the RMW occurs (see comments above in + // recvTimingReq()). Since we'll be using the current + // request packet (which has the allocated data pointer) + // to form the response, we have to allocate a new dummy + // packet to save in the MSHR target. + Request *req = new Request(*(tgt_pkt->req)); + target->pkt = new Packet(req, tgt_pkt->cmd); + // skip the rest of target processing after we + // send the response + early_exit = true; + // Mark block inaccessible until write arrives + blk->status &= ~(BlkReadable | BlkWritable); + } } else if (pkt->cmd == MemCmd::UpgradeFailResp) { // failed StoreCond upgrade assert(tgt_pkt->cmd == MemCmd::StoreCondReq || @@ -1206,6 +1306,11 @@ completion_time += clockEdge(responseLatency) + pkt->payloadDelay; tgt_pkt->req->setExtraData(0); + } else if (pkt->cmd == MemCmd::LockedRMWWriteResp) { + // Fake response on LockedRMW completion, see above. + // Since the data is already in the cache, we just use + // responseLatency with no extra penalties. + completion_time = clockEdge(responseLatency); } else { // not a cache fill, just forwarding response // responseLatency is the latency of the return path @@ -1259,6 +1364,9 @@ panic("Illegal target->source enum %d\n", target->source); } + if (early_exit) + goto exit; + mshr->popTarget(); } @@ -1292,12 +1400,27 @@ // Request the bus for a prefetch if this deallocation freed enough // MSHRs for a prefetch to take place if (prefetcher && mq == &mshrQueue && mshrQueue.canPrefetch()) { - Tick next_pf_time = std::max(prefetcher->nextPrefetchReadyTime(), - curTick()); + Tick next_pf_time = + std::max(prefetcher->nextPrefetchReadyTime(), curTick()); if (next_pf_time != MaxTick) requestMemSideBus(Request_PF, next_pf_time); } } + + // if we used temp block, clear it out + if (blk == tempBlock) { + if (blk->isDirty()) { + // We use forwardLatency here because we are copying + // writebacks to write buffer. It specifies the + // latency to allocate an internal buffer and to + // schedule an event to the queued port. + allocateWriteBuffer(writebackBlk(blk), forward_time, true); + } + blk->invalidate(); + } + + exit: + // reset the xbar additional timinig as it is now accounted for pkt->headerDelay = pkt->payloadDelay = 0; @@ -1307,17 +1430,6 @@ allocateWriteBuffer(wbPkt, clockEdge(forwardLatency), true); writebacks.pop_front(); } - // if we used temp block, clear it out - if (blk == tempBlock) { - if (blk->isDirty()) { - // We use forwardLatency here because we are copying - // writebacks to write buffer. It specifies the latency to - // allocate an internal buffer and to schedule an event to the - // queued port. - allocateWriteBuffer(writebackBlk(blk), forward_time, true); - } - blk->invalidate(); - } DPRINTF(Cache, "Leaving %s with %s for addr %#llx\n", __func__, pkt->cmdString(), pkt->getAddr()); diff --git a/src/mem/cache/mshr.hh b/src/mem/cache/mshr.hh --- a/src/mem/cache/mshr.hh +++ b/src/mem/cache/mshr.hh @@ -103,7 +103,7 @@ const Tick recvTime; //!< Time when request was received (for stats) const Tick readyTime; //!< Time when request is ready to be serviced const Counter order; //!< Global order (for memory consistency mgmt) - const PacketPtr pkt; //!< Pending request packet. + PacketPtr pkt; //!< Pending request packet. const Source source; //!< Request from cpu, memory, or prefetcher? const bool markedPending; //!< Did we mark upstream MSHR //!< as downstreamPending? diff --git a/src/mem/packet.hh b/src/mem/packet.hh --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -104,6 +104,10 @@ StoreCondReq, StoreCondFailReq, // Failed StoreCondReq in MSHR (never sent) StoreCondResp, + LockedRMWReadReq, + LockedRMWReadResp, + LockedRMWWriteReq, + LockedRMWWriteResp, SwapReq, SwapResp, MessageReq, @@ -140,6 +144,7 @@ IsSWPrefetch, IsHWPrefetch, IsLlsc, //!< Alpha/MIPS LL or SC access + IsLockedRMW, //!< x86 locked RMW access HasData, //!< There is an associated payload IsError, //!< Error response IsPrint, //!< Print state matching address (for debugging) @@ -195,6 +200,7 @@ */ bool hasData() const { return testCmdAttrib(HasData); } bool isLLSC() const { return testCmdAttrib(IsLlsc); } + bool isLockedRMW() const { return testCmdAttrib(IsLockedRMW); } bool isSWPrefetch() const { return testCmdAttrib(IsSWPrefetch); } bool isHWPrefetch() const { return testCmdAttrib(IsHWPrefetch); } bool isPrefetch() const { return testCmdAttrib(IsSWPrefetch) || @@ -483,6 +489,7 @@ bool isWriteInvalidate() const { return cmd.isWriteInvalidate(); } bool hasData() const { return cmd.hasData(); } bool isLLSC() const { return cmd.isLLSC(); } + bool isLockedRMW() const { return cmd.isLockedRMW(); } bool isError() const { return cmd.isError(); } bool isPrint() const { return cmd.isPrint(); } bool isFlush() const { return cmd.isFlush(); } @@ -653,6 +660,8 @@ return MemCmd::LoadLockedReq; else if (req->isPrefetch()) return MemCmd::SoftPFReq; + else if (req->isLockedRMW()) + return MemCmd::LockedRMWReadReq; else return MemCmd::ReadReq; } @@ -667,6 +676,8 @@ return MemCmd::StoreCondReq; else if (req->isSwap()) return MemCmd::SwapReq; + else if (req->isLockedRMW()) + return MemCmd::LockedRMWWriteReq; else return MemCmd::WriteReq; } diff --git a/src/mem/packet.cc b/src/mem/packet.cc --- a/src/mem/packet.cc +++ b/src/mem/packet.cc @@ -144,6 +144,18 @@ /* StoreCondResp */ { SET4(IsWrite, NeedsExclusive, IsLlsc, IsResponse), InvalidCmd, "StoreCondResp" }, + /* LockedRMWReadReq */ + { SET5(IsRead, IsLockedRMW, NeedsExclusive, IsRequest, NeedsResponse), + LockedRMWReadResp, "LockedRMWReadReq" }, + /* LockedRMWReadResp */ + { SET5(IsRead, IsLockedRMW, NeedsExclusive, IsResponse, HasData), + InvalidCmd, "LockedRMWReadResp" }, + /* LockedRMWWriteReq */ + { SET6(IsWrite, IsLockedRMW, NeedsExclusive, IsRequest, NeedsResponse, + HasData), LockedRMWWriteResp, "LockedRMWWriteReq" }, + /* LockedRMWWriteResp */ + { SET4(IsWrite, IsLockedRMW, NeedsExclusive, IsResponse), + InvalidCmd, "LockedRMWWriteResp" }, /* SwapReq -- for Swap ldstub type operations */ { SET6(IsRead, IsWrite, NeedsExclusive, IsRequest, HasData, NeedsResponse), SwapResp, "SwapReq" },