# Node ID e5e740fac1fb0a66ce81690c93f7768340c16c24 # Parent 915989ed4b8a4f17e5cde158c8123b5b9860e285 diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -247,6 +247,9 @@ /** Attempts to send a store to the cache. */ bool sendStore(PacketPtr data_pkt); + /** Attempts to send a store exclusive prefetch to the cache. */ + bool sendStoreAccess(PacketPtr data_pkt); + /** Increments the given store index (circular queue). */ inline void incrStIdx(int &store_idx) const; /** Decrements the given store index (circular queue). */ @@ -508,6 +511,12 @@ /** Number of times the LSQ is blocked due to the cache. */ Stats::Scalar lsqCacheBlocked; + /** Number of times StoreAccess gets sent successfully */ + Stats::Scalar lsqStoreAccessNonBlocked; + + /** Number of times StoreAccess couldn't get sent successfully */ + Stats::Scalar lsqStoreAccessBlocked; + public: /** Executes the load at the given index. */ Fault read(Request *req, Request *sreqLow, Request *sreqHigh, @@ -892,15 +901,81 @@ assert(size <= sizeof(storeQueue[store_idx].data) || (req->getFlags() & Request::CACHE_BLOCK_ZERO)); + bool split = false; // Split stores can only occur in ISAs with unaligned memory accesses. If // a store request has been split, sreqLow and sreqHigh will be non-null. if (TheISA::HasUnalignedMemAcc && sreqLow) { storeQueue[store_idx].isSplit = true; + split = true; } if (!(req->getFlags() & Request::CACHE_BLOCK_ZERO)) memcpy(storeQueue[store_idx].data, data, size); + // Issue a store access request + if (usedPorts < cachePorts && !hasPendingPkt && + size != 0 && !storeQueue[store_idx].inst->isDataPrefetch() && + !req->isMmappedIpr() && !req->isUncacheable() && + cpu->system->isMemAddr(req->getPaddr())) { + + ++usedPorts; + DynInstPtr inst = storeQueue[store_idx].inst; + MemCmd command = MemCmd::StoreAccess; + + PacketPtr data_pkt; + PacketPtr snd_data_pkt = NULL; + + LSQSenderState *state = new LSQSenderState; + state->isLoad = false; + state->idx = store_idx; + + if (!split) { + // Build a single data packet if the store isn't split. + data_pkt = new Packet(req, command); + data_pkt->senderState = state; + } else { + // Create two packets if the store is split in two. + data_pkt = new Packet(sreqLow, command); + snd_data_pkt = new Packet(sreqHigh, command); + + data_pkt->senderState = state; + snd_data_pkt->senderState = state; + + state->isSplit = true; + state->outstanding = 2; + + req = sreqLow; + } + + DPRINTF(LSQUnit, "D-Cache: store exclusive prefetch idx:%i PC:%s " + "to Addr:%#x, [sn:%lli]\n", store_idx, inst->pcState(), + req->getPaddr(), inst->seqNum); + + if (!sendStoreAccess(data_pkt)) { + DPRINTF(LSQUnit, "D-Cache became blocked when writing store " + "permission prefetch [sn:%lli], will NOT retry later\n", + inst->seqNum); + ++lsqStoreAccessBlocked; + } else { + // Sent the first packet successfully. + // If the store is split, try to send the second packet too + ++lsqStoreAccessNonBlocked; + if (split) { + assert(snd_data_pkt); + // + // Ensure there are enough ports to use. + // + if (usedPorts < cachePorts) { + ++usedPorts; + if (!sendStoreAccess(snd_data_pkt)) + ++lsqStoreAccessBlocked; + else + ++lsqStoreAccessNonBlocked; + } + } + } + } + // This function only writes the data to the store queue, so no fault // can happen here. return NoFault; diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -94,6 +94,8 @@ void LSQUnit::completeDataAccess(PacketPtr pkt) { + DPRINTF(Activity, "CompleteDataAccess for address: 0x%x; storeAccess: %s\n", + pkt->getAddr(), pkt->isStoreAccess()? "yes" : "no"); LSQSenderState *state = dynamic_cast(pkt->senderState); DynInstPtr inst = state->inst; DPRINTF(IEW, "Writeback event [sn:%lli].\n", inst->seqNum); @@ -255,7 +257,18 @@ lsqCacheBlocked .name(name() + ".cacheBlocked") - .desc("Number of times an access to memory failed due to the cache being blocked"); + .desc("Number of times an access to memory failed " + "due to the cache being blocked"); + + lsqStoreAccessNonBlocked + .name(name() + ".storeAccessNonBlocked") + .desc("Number of times a StoreAccess to memory " + "sent successfully"); + + lsqStoreAccessBlocked + .name(name() + ".storeAccessBlocked") + .desc("Number of times a StoreAccess to memory failed " + "due to cache being blocked"); } template @@ -1229,6 +1242,17 @@ return true; } +// Exclusive permission prefetch +template +bool +LSQUnit::sendStoreAccess(PacketPtr data_pkt) +{ + if (!dcachePort->sendTimingReq(data_pkt)) { + return false; + } + return true; +} + template void LSQUnit::recvRetry() diff --git a/src/mem/abstract_mem.cc b/src/mem/abstract_mem.cc --- a/src/mem/abstract_mem.cc +++ b/src/mem/abstract_mem.cc @@ -386,7 +386,7 @@ bytesRead[pkt->req->masterId()] += pkt->getSize(); if (pkt->req->isInstFetch()) bytesInstRead[pkt->req->masterId()] += pkt->getSize(); - } else if (pkt->isInvalidate()) { + } else if (pkt->isInvalidate() || pkt->isStoreAccess()) { // no need to do anything // this clause is intentionally before the write clause: the only // transaction that is both a write and an invalidate is diff --git a/src/mem/cache/cache.cc b/src/mem/cache/cache.cc --- a/src/mem/cache/cache.cc +++ b/src/mem/cache/cache.cc @@ -305,6 +305,10 @@ return false; } + if (pkt->isStoreAccess()) { + DPRINTF(Cache, "StoreAccess request: %x\n", pkt->getAddr()); + } + ContextID id = pkt->req->hasContextId() ? pkt->req->contextId() : InvalidContextID; // Here lat is the value passed as parameter to accessBlock() function @@ -390,6 +394,10 @@ // like a Writeback which could not find a replaceable block so has to // go to next level. return false; + } else if (pkt->isStoreAccess() && blk && blk->isWritable()) { + // found a block and it's already in exclusive state + incHitCount(pkt); + return true; } else if ((blk != NULL) && (pkt->needsExclusive() ? blk->isWritable() : blk->isReadable())) { @@ -410,6 +418,10 @@ return true; } + if (pkt->isStoreAccess()) { + DPRINTF(Cache, "StoreAccess miss in cache\n"); + } + return false; } @@ -656,6 +668,10 @@ assert(!pkt->req->isUncacheable()); // hit (for all other request types) + if (pkt->isStoreAccess()) { + // StoreAccess hit in cache with exclusive permission, do nothing + return true; + } if (prefetcher && (prefetchOnAccess || (blk && blk->wasPrefetched()))) { if (blk) @@ -668,6 +684,8 @@ if (needsResponse) { pkt->makeTimingResponse(); + DPRINTF(Cache, "Schedule timing response from cache, " + "latency: %d, clock: %lld\n", lat, clockEdge(lat)); // @todo: Make someone pay for this pkt->headerDelay = pkt->payloadDelay = 0; @@ -687,6 +705,20 @@ } } else { // miss + // StoreAccess prefetch + if (pkt->isStoreAccess()) { + DPRINTF(Cache, "StoreAccess missed in cache or with no " + "exclusive permission\n"); + if (prefetcher) { + DPRINTF(Cache, "Prefetcher enabled, calculating " + "next_pf_time\n"); + next_pf_time = prefetcher->notify(pkt); + DPRINTF(Cache, "Sending prefetch request for StoreAccess\n"); + schedMemSideSendEvent(next_pf_time); + + } + return true; + } Addr blk_addr = blockAlign(pkt->getAddr()); @@ -879,8 +911,6 @@ return NULL; } - assert(cpu_pkt->needsResponse()); - MemCmd cmd; // @TODO make useUpgrades a parameter. // Note that ownership protocols require upgrade, otherwise a @@ -904,7 +934,7 @@ // forward as invalidate to all other caches, this gives us // the line in exclusive state, and invalidates all other // copies - cmd = MemCmd::InvalidateReq; + cmd = MemCmd::InvalidationReq; } else { // block is invalid cmd = needsExclusive ? MemCmd::ReadExReq : @@ -1027,7 +1057,7 @@ if (bus_pkt->isError()) { pkt->makeAtomicResponse(); pkt->copyError(bus_pkt); - } else if (pkt->cmd == MemCmd::InvalidateReq) { + } else if (pkt->cmd == MemCmd::InvalidationReq) { if (blk) { // invalidate response to a cache that received // an invalidate request @@ -1857,7 +1887,7 @@ // an invalidate, we don't need to send a response. The // invalidation itself is taken care of below. bool respond = blk->isDirty() && pkt->needsResponse() && - pkt->cmd != MemCmd::InvalidateReq; + pkt->cmd != MemCmd::InvalidationReq; bool have_exclusive = blk->isWritable(); // Invalidate any prefetch's from below that would strip write permissions diff --git a/src/mem/cache/mshr.cc b/src/mem/cache/mshr.cc --- a/src/mem/cache/mshr.cc +++ b/src/mem/cache/mshr.cc @@ -218,7 +218,7 @@ assert(targets.isReset()); // Don't know of a case where we would allocate a new MSHR for a // snoop (mem-side request), so set source according to request here - Target::Source source = (target->cmd == MemCmd::HardPFReq) ? + Target::Source source = (target->cmd.isHWPrefetch()) ? Target::FromPrefetcher : Target::FromCPU; targets.add(target, when_ready, _order, source, true); assert(deferredTargets.isReset()); diff --git a/src/mem/cache/prefetch/queued.cc b/src/mem/cache/prefetch/queued.cc --- a/src/mem/cache/prefetch/queued.cc +++ b/src/mem/cache/prefetch/queued.cc @@ -61,6 +61,37 @@ Tick QueuedPrefetcher::notify(const PacketPtr &pkt) { + Tick pf_time = curTick() + clockPeriod() * latency; + + if (pkt->isStoreAccess()) { + // convert storeAccess to HardPFExReq + Addr blk_addr = pkt->getAddr() & ~(Addr)(blkSize - 1); + Request *prefetchReq = new Request(blk_addr, blkSize, 0, masterId); + PacketPtr prefetch = new Packet(prefetchReq, MemCmd::HardPFExReq); + + DPRINTF(HWPrefetch, "Add StoreAccess request " + "with blk_addr: 0x%x to pf queue\n", blk_addr); + + prefetch->allocate(); + prefetch->req->setThreadContext(pkt->req->contextId(), + pkt->req->threadId()); + + // We just remove the head if we are full + if (pfq.size() == queueSize) { + pfRemovedFull++; + PacketPtr old_pkt = pfq.begin()->pkt; + DPRINTF(HWPrefetch, "Prefetch queue full, removing oldest 0x%x\n", + old_pkt->getAddr()); + delete old_pkt->req; + delete old_pkt; + pfq.pop_front(); + } + + pfq.emplace_back(DeferredPacket(pf_time, prefetch)); + + return pfq.empty() ? MaxTick : pfq.front().tick; + } + // Verify this access type is observed by prefetcher if (observeAccess(pkt)) { Addr blk_addr = pkt->getAddr() & ~(Addr)(blkSize - 1); @@ -142,7 +173,6 @@ pfq.pop_front(); } - Tick pf_time = curTick() + clockPeriod() * latency; DPRINTF(HWPrefetch, "Prefetch queued. " "addr:%#x tick:%lld.\n", pf_addr, pf_time); diff --git a/src/mem/packet.hh b/src/mem/packet.hh --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -90,6 +90,7 @@ CleanEvict, SoftPFReq, HardPFReq, + HardPFExReq, SoftPFResp, HardPFResp, WriteLineReq, @@ -125,8 +126,9 @@ // Fake simulator-only commands PrintReq, // Print state matching address FlushReq, //request for a cache flush - InvalidateReq, // request for address to be invalidated - InvalidateResp, + InvalidationReq, // request for address to be invalidated from lsq + InvalidationResp, + StoreAccess, //exclusive permission NUM_MEM_CMDS }; @@ -153,6 +155,7 @@ IsFlush, //!< Flush the address from caches IsAcquire, //!< Acquire operation IsRelease, //!< Release operation + IsStoreAccess, //!< exclusive permission NUM_COMMAND_ATTRIBUTES }; @@ -211,6 +214,7 @@ bool isFlush() const { return testCmdAttrib(IsFlush); } bool isAcquire() const { return testCmdAttrib(IsAcquire); } bool isRelease() const { return testCmdAttrib(IsRelease); } + bool isStoreAccess() const { return testCmdAttrib(IsStoreAccess); } const Command responseCommand() const @@ -502,6 +506,7 @@ bool isFlush() const { return cmd.isFlush(); } bool isAcquire() const { return cmd.isAcquire(); } bool isRelease() const { return cmd.isRelease(); } + bool isStoreAccess() const { return cmd.isStoreAccess(); } // Snoop flags void assertMemInhibit() @@ -726,7 +731,10 @@ // needed (CleanEvict and Writeback), since the snoop packet // re-uses the same request. if (req && isRequest() && !needsResponse() && - !isExpressSnoop()) { + !isExpressSnoop() && !cmd.isHWPrefetch()) { + // If the request is a HWExPrefetch it doesn't need a response + // because it already deletes the request when getting the + // response in handleResponse() delete req; } deleteData(); diff --git a/src/mem/packet.cc b/src/mem/packet.cc --- a/src/mem/packet.cc +++ b/src/mem/packet.cc @@ -95,6 +95,9 @@ /* HardPFReq */ { SET4(IsRead, IsRequest, IsHWPrefetch, NeedsResponse), HardPFResp, "HardPFReq" }, + /* HardPFExReq */ + { SET4(IsRead, IsRequest, IsHWPrefetch, NeedsExclusive), + InvalidCmd, "HardPFExReq" }, /* SoftPFResp */ { SET4(IsRead, IsResponse, IsSWPrefetch, HasData), InvalidCmd, "SoftPFResp" }, @@ -187,10 +190,13 @@ { SET3(IsRequest, IsFlush, NeedsExclusive), InvalidCmd, "FlushReq" }, /* Invalidation Request */ { SET4(IsInvalidate, IsRequest, NeedsExclusive, NeedsResponse), - InvalidateResp, "InvalidateReq" }, + InvalidationResp, "InvalidationReq" }, /* Invalidation Response */ { SET3(IsInvalidate, IsResponse, NeedsExclusive), - InvalidCmd, "InvalidateResp" } + InvalidCmd, "InvalidationResp" }, + /* StoreAccess Request */ + { SET3(NeedsExclusive, IsRequest, IsStoreAccess), InvalidCmd, + "StoreAccessReq" }, }; bool diff --git a/src/mem/protocol/RubySlicc_Exports.sm b/src/mem/protocol/RubySlicc_Exports.sm --- a/src/mem/protocol/RubySlicc_Exports.sm +++ b/src/mem/protocol/RubySlicc_Exports.sm @@ -140,6 +140,7 @@ Release, desc="Release operation"; Acquire, desc="Acquire opertion"; AcquireRelease, desc="Acquire and Release opertion"; + Store_Access, desc="Store Permission Prefetch"; } enumeration(SequencerRequestType, desc="...", default="SequencerRequestType_NULL") { diff --git a/src/mem/ruby/system/RubyPort.hh b/src/mem/ruby/system/RubyPort.hh --- a/src/mem/ruby/system/RubyPort.hh +++ b/src/mem/ruby/system/RubyPort.hh @@ -169,6 +169,7 @@ protected: void trySendRetries(); + void retry_blocked_port(PacketPtr pkt); void ruby_hit_callback(PacketPtr pkt); void testDrainComplete(); void ruby_eviction_callback(Addr address); diff --git a/src/mem/ruby/system/RubyPort.cc b/src/mem/ruby/system/RubyPort.cc --- a/src/mem/ruby/system/RubyPort.cc +++ b/src/mem/ruby/system/RubyPort.cc @@ -354,6 +354,24 @@ } void +RubyPort::retry_blocked_port(PacketPtr pkt) +{ + DPRINTF(RubyPort, "Potential blocked port for request: %s 0x%x\n", + pkt->cmdString(), + pkt->getAddr()); + + // Retrieve the request port from the sender State + RubyPort::SenderState *senderState = + safe_cast(pkt->popSenderState()); + MemSlavePort *port = senderState->port; + assert(port != NULL); + delete senderState; + + if (!onRetryList(port)) { + addToRetryList(port); + } +} +void RubyPort::ruby_hit_callback(PacketPtr pkt) { DPRINTF(RubyPort, "Hit callback for %s 0x%x\n", pkt->cmdString(), @@ -531,7 +549,7 @@ Request::funcMasterId); // Use a single packet to signal all snooping ports of the invalidation. // This assumes that snooping ports do NOT modify the packet/request - Packet pkt(&request, MemCmd::InvalidateReq); + Packet pkt(&request, MemCmd::InvalidationReq); for (CpuPortIter p = slave_ports.begin(); p != slave_ports.end(); ++p) { // check if the connected master port is snooping if ((*p)->isSnooping()) { diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc --- a/src/mem/ruby/system/Sequencer.cc +++ b/src/mem/ruby/system/Sequencer.cc @@ -338,7 +338,8 @@ while (!i->second->empty()) { request = i->second->front(); if (ruby_request) { - assert((request->m_type == RubyRequestType_ST) || + assert((request->m_type == RubyRequestType_Store_Access) || + (request->m_type == RubyRequestType_ST) || (request->m_type == RubyRequestType_ATOMIC) || (request->m_type == RubyRequestType_RMW_Read) || (request->m_type == RubyRequestType_RMW_Write) || @@ -481,7 +482,7 @@ if (RubySystem::getWarmupEnabled()) { data.setData(pkt->getConstPtr(), getOffset(request_address), pkt->getSize()); - } else if (!pkt->isFlush()) { + } else if (!pkt->isFlush() && !pkt->isStoreAccess()) { if ((type == RubyRequestType_LD) || (type == RubyRequestType_IFETCH) || (type == RubyRequestType_RMW_Read) || @@ -522,7 +523,13 @@ delete pkt; rs->m_cache_recorder->enqueueNextFlushRequest(); } else { - ruby_hit_callback(pkt); + if (type != RubyRequestType_Store_Access) { + ruby_hit_callback(pkt); + } else { + // A storeAccess can block the CPU from issuing stores, so call + // retry_blocked_port so the CPU can reissue blocked stores. + retry_blocked_port(pkt); + } } } @@ -541,8 +548,12 @@ RubyRequestType primary_type = RubyRequestType_NULL; RubyRequestType secondary_type = RubyRequestType_NULL; - - if (pkt->isLLSC()) { + if (pkt->isStoreAccess()) { + DPRINTF(RubySequencer, "Issuing Store Access\n"); + primary_type = RubyRequestType_Store_Access; + secondary_type = RubyRequestType_ST; + } + else if (pkt->isLLSC()) { // // Alpha LL/SC instructions need to be handled carefully by the cache // coherence protocol to ensure they follow the proper semantics. In @@ -633,12 +644,18 @@ pc = pkt->req->getPC(); } + // Certain requests will not have allocated data + uint8_t* data; + if (pkt->isFlush() || pkt->isStoreAccess()) { + data = nullptr; + } else { + data = pkt->getPtr(); + } + // check if the packet has data as for example prefetch and flush // requests do not std::shared_ptr msg = - std::make_shared(clockEdge(), pkt->getAddr(), - pkt->isFlush() ? - nullptr : pkt->getPtr(), + std::make_shared(clockEdge(), pkt->getAddr(), data, pkt->getSize(), pc, secondary_type, RubyAccessMode_Supervisor, pkt, PrefetchBit_No, proc_id);