# Node ID d8ed395159dc893a45acfb74b518377bd8421aa3 # Parent d010e6a8e783e0ccead48292b9759eb700e66a85 diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -248,6 +248,9 @@ /** Attempts to send a store to the cache. */ bool sendStore(PacketPtr data_pkt); + /** Attempts to send a store exclusive prefetch to the cache. */ + bool sendStoreAccess(PacketPtr data_pkt); + /** Increments the given store index (circular queue). */ inline void incrStIdx(int &store_idx) const; /** Decrements the given store index (circular queue). */ @@ -509,6 +512,12 @@ /** Number of times the LSQ is blocked due to the cache. */ Stats::Scalar lsqCacheBlocked; + /** Number of times StoreAccess gets sent successfully */ + Stats::Scalar lsqStoreAccessNonBlocked; + + /** Number of times StoreAccess couldn't get sent successfully */ + Stats::Scalar lsqStoreAccessBlocked; + public: /** Executes the load at the given index. */ Fault read(Request *req, Request *sreqLow, Request *sreqHigh, @@ -893,15 +902,81 @@ assert(size <= sizeof(storeQueue[store_idx].data) || (req->getFlags() & Request::CACHE_BLOCK_ZERO)); + bool split = false; // Split stores can only occur in ISAs with unaligned memory accesses. If // a store request has been split, sreqLow and sreqHigh will be non-null. if (TheISA::HasUnalignedMemAcc && sreqLow) { storeQueue[store_idx].isSplit = true; + split = true; } if (!(req->getFlags() & Request::CACHE_BLOCK_ZERO)) memcpy(storeQueue[store_idx].data, data, size); + // Issue a store access request + if (usedPorts < cachePorts && !hasPendingPkt && + size != 0 && !storeQueue[store_idx].inst->isDataPrefetch() && + !req->isMmappedIpr() && !req->isUncacheable() && + cpu->system->isMemAddr(req->getPaddr())) { + + ++usedPorts; + DynInstPtr inst = storeQueue[store_idx].inst; + MemCmd command = MemCmd::StoreAccess; + + PacketPtr data_pkt; + PacketPtr snd_data_pkt = NULL; + + LSQSenderState *state = new LSQSenderState; + state->isLoad = false; + state->idx = store_idx; + + if (!split) { + // Build a single data packet if the store isn't split. + data_pkt = new Packet(req, command); + data_pkt->senderState = state; + } else { + // Create two packets if the store is split in two. + data_pkt = new Packet(sreqLow, command); + snd_data_pkt = new Packet(sreqHigh, command); + + data_pkt->senderState = state; + snd_data_pkt->senderState = state; + + state->isSplit = true; + state->outstanding = 2; + + req = sreqLow; + } + + DPRINTF(LSQUnit, "D-Cache: store exclusive prefetch idx:%i PC:%s " + "to Addr:%#x, [sn:%lli]\n", store_idx, inst->pcState(), + req->getPaddr(), inst->seqNum); + + if (!sendStoreAccess(data_pkt)) { + DPRINTF(LSQUnit, "D-Cache became blocked when writing store " + "permission prefetch [sn:%lli], will NOT retry later\n", + inst->seqNum); + ++lsqStoreAccessBlocked; + } else { + // Sent the first packet successfully. + // If the store is split, try to send the second packet too + ++lsqStoreAccessNonBlocked; + if (split) { + assert(snd_data_pkt); + // + // Ensure there are enough ports to use. + // + if (usedPorts < cachePorts) { + ++usedPorts; + if (!sendStoreAccess(snd_data_pkt)) + ++lsqStoreAccessBlocked; + else + ++lsqStoreAccessNonBlocked; + } + } + } + } + // This function only writes the data to the store queue, so no fault // can happen here. return NoFault; diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -94,6 +94,8 @@ void LSQUnit::completeDataAccess(PacketPtr pkt) { + DPRINTF(Activity, "CompleteDataAccess for address: 0x%x; storeAccess: %s\n", + pkt->getAddr(), pkt->isStoreAccess()? "yes" : "no"); LSQSenderState *state = dynamic_cast(pkt->senderState); DynInstPtr inst = state->inst; DPRINTF(IEW, "Writeback event [sn:%lli].\n", inst->seqNum); @@ -255,7 +257,18 @@ lsqCacheBlocked .name(name() + ".cacheBlocked") - .desc("Number of times an access to memory failed due to the cache being blocked"); + .desc("Number of times an access to memory failed " + "due to the cache being blocked"); + + lsqStoreAccessNonBlocked + .name(name() + ".storeAccessNonBlocked") + .desc("Number of times a StoreAccess to memory " + "sent successfully"); + + lsqStoreAccessBlocked + .name(name() + ".storeAccessBlocked") + .desc("Number of times a StoreAccess to memory failed " + "due to cache being blocked"); } template @@ -1223,6 +1236,17 @@ return true; } +// Exclusive permission prefetch +template +bool +LSQUnit::sendStoreAccess(PacketPtr data_pkt) +{ + if (!dcachePort->sendTimingReq(data_pkt)) { + return false; + } + return true; +} + template void LSQUnit::recvRetry() diff --git a/src/mem/abstract_mem.cc b/src/mem/abstract_mem.cc --- a/src/mem/abstract_mem.cc +++ b/src/mem/abstract_mem.cc @@ -379,7 +379,7 @@ bytesRead[pkt->req->masterId()] += pkt->getSize(); if (pkt->req->isInstFetch()) bytesInstRead[pkt->req->masterId()] += pkt->getSize(); - } else if (pkt->isInvalidate()) { + } else if (pkt->isInvalidate() || pkt->isStoreAccess()) { // no need to do anything // this clause is intentionally before the write clause: the only // transaction that is both a write and an invalidate is diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -333,6 +333,10 @@ return false; } + if (pkt->isStoreAccess()) { + DPRINTF(Cache, "StoreAccess request: %x\n", pkt->getAddr()); + } + int id = pkt->req->hasContextId() ? pkt->req->contextId() : -1; // Here lat is the value passed as parameter to accessBlock() function // that can modify its value. @@ -372,6 +376,10 @@ DPRINTF(Cache, "%s new state is %s\n", __func__, blk->print()); incHitCount(pkt); return true; + } else if (pkt->isStoreAccess() && blk->isWritable()) { + // found a block and it's already in exclusive state + incHitCount(pkt); + return true; } else if ((blk != NULL) && (pkt->needsExclusive() ? blk->isWritable() : blk->isReadable())) { @@ -392,6 +400,9 @@ return true; } + if (pkt->isStoreAccess()) { + DPRINTF(Cache, "StoreAccess miss in cache\n"); + } return false; } @@ -581,6 +592,10 @@ assert(!pkt->req->isUncacheable()); // hit (for all other request types) + if (pkt->isStoreAccess()) { + // StoreAccess hit in cache with exclusive permission, do nothing + return true; + } if (prefetcher && (prefetchOnAccess || (blk && blk->wasPrefetched()))) { if (blk) @@ -593,6 +608,8 @@ if (needsResponse) { pkt->makeTimingResponse(); + DPRINTF(Cache, "Schedule timing response from cache, " + "latency: %d, clock: %lld\n", lat, clockEdge(lat)); // @todo: Make someone pay for this pkt->headerDelay = pkt->payloadDelay = 0; @@ -610,6 +627,22 @@ } } else { // miss + // StoreAccess prefetch + if (pkt->isStoreAccess()) { + DPRINTF(Cache, "StoreAccess missed in cache or with no " + "exclusive permission\n"); + if (prefetcher) { + DPRINTF(Cache, "Prefetcher enabled, calculating " + "next_pf_time\n"); + next_pf_time = prefetcher->notify(pkt); + } if (next_pf_time != 0) { + DPRINTF(Cache, "Sending prefetch request for StoreAccess\n"); + requestMemSideBus(Request_PF, + std::max(prefetcher->nextPrefetchReadyTime(), + next_pf_time)); + } + return true; + } Addr blk_addr = blockAlign(pkt->getAddr()); @@ -801,8 +834,6 @@ return NULL; } - assert(cpu_pkt->needsResponse()); - MemCmd cmd; // @TODO make useUpgrades a parameter. // Note that ownership protocols require upgrade, otherwise a diff --git a/src/mem/cache/mshr.cc b/src/mem/cache/mshr.cc --- a/src/mem/cache/mshr.cc +++ b/src/mem/cache/mshr.cc @@ -215,7 +215,7 @@ assert(targets.isReset()); // Don't know of a case where we would allocate a new MSHR for a // snoop (mem-side request), so set source according to request here - Target::Source source = (target->cmd == MemCmd::HardPFReq) ? + Target::Source source = (target->cmd.isHWPrefetch()) ? Target::FromPrefetcher : Target::FromCPU; targets.add(target, when_ready, _order, source, true); assert(deferredTargets.isReset()); diff --git a/src/mem/cache/prefetch/queued.cc b/src/mem/cache/prefetch/queued.cc --- a/src/mem/cache/prefetch/queued.cc +++ b/src/mem/cache/prefetch/queued.cc @@ -61,6 +61,37 @@ Tick QueuedPrefetcher::notify(const PacketPtr &pkt) { + Tick pf_time = curTick() + clockPeriod() * latency; + + if (pkt->isStoreAccess()) { + // convert storeAccess to HardPFExReq + Addr blk_addr = pkt->getAddr() & ~(Addr)(blkSize - 1); + Request *prefetchReq = new Request(blk_addr, blkSize, 0, masterId); + PacketPtr prefetch = new Packet(prefetchReq, MemCmd::HardPFExReq); + + DPRINTF(HWPrefetch, "Add StoreAccess request " + "with blk_addr: 0x%x to pf queue\n", blk_addr); + + prefetch->allocate(); + prefetch->req->setThreadContext(pkt->req->contextId(), + pkt->req->threadId()); + + // We just remove the head if we are full + if (pfq.size() == queueSize) { + pfRemovedFull++; + PacketPtr old_pkt = pfq.begin()->pkt; + DPRINTF(HWPrefetch, "Prefetch queue full, removing oldest 0x%x\n", + old_pkt->getAddr()); + delete old_pkt->req; + delete old_pkt; + pfq.pop_front(); + } + + pfq.emplace_back(DeferredPacket(pf_time, prefetch)); + + return pfq.empty() ? MaxTick : pfq.front().tick; + } + // Verify this access type is observed by prefetcher if (observeAccess(pkt)) { Addr blk_addr = pkt->getAddr() & ~(Addr)(blkSize - 1); @@ -145,7 +176,6 @@ pfq.pop_front(); } - Tick pf_time = curTick() + clockPeriod() * latency; DPRINTF(HWPrefetch, "Prefetch queued. " "addr:%#x tick:%lld.\n", pf_addr, pf_time); diff --git a/src/mem/packet.hh b/src/mem/packet.hh --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -89,6 +89,7 @@ Writeback, SoftPFReq, HardPFReq, + HardPFExReq, SoftPFResp, HardPFResp, WriteInvalidateReq, @@ -120,6 +121,7 @@ PrintReq, // Print state matching address FlushReq, //request for a cache flush InvalidationReq, // request for address to be invalidated from lsq + StoreAccess, //exclusive permission NUM_MEM_CMDS }; @@ -144,6 +146,7 @@ IsError, //!< Error response IsPrint, //!< Print state matching address (for debugging) IsFlush, //!< Flush the address from caches + IsStoreAccess, //!< exclusive permission NUM_COMMAND_ATTRIBUTES }; @@ -202,6 +205,7 @@ bool isError() const { return testCmdAttrib(IsError); } bool isPrint() const { return testCmdAttrib(IsPrint); } bool isFlush() const { return testCmdAttrib(IsFlush); } + bool isStoreAccess() const { return testCmdAttrib(IsStoreAccess); } const Command responseCommand() const @@ -486,6 +490,7 @@ bool isError() const { return cmd.isError(); } bool isPrint() const { return cmd.isPrint(); } bool isFlush() const { return cmd.isFlush(); } + bool isStoreAccess() const { return cmd.isStoreAccess(); } // Snoop flags void assertMemInhibit() @@ -554,8 +559,8 @@ } /** - * When ruby is in use, Ruby will monitor the cache line and thus M5 - * phys memory should treat LL ops as normal reads. + * When ruby is in use, Ruby will monitor the cache line and thus M5 + * phys memory should treat LL ops as normal reads. */ void convertLlToRead() @@ -695,8 +700,12 @@ // If this is a request packet for which there's no response, // delete the request object here, since the requester will // never get the chance. - if (req && isRequest() && !needsResponse()) + if (req && isRequest() && !needsResponse() + && !cmd.isHWPrefetch()) { + // If HWExPrefetch, it doesn't need a response, hence already + // deletes req when getting the response in handleResponse function delete req; + } deleteData(); } diff --git a/src/mem/packet.cc b/src/mem/packet.cc --- a/src/mem/packet.cc +++ b/src/mem/packet.cc @@ -92,6 +92,9 @@ /* HardPFReq */ { SET4(IsRead, IsRequest, IsHWPrefetch, NeedsResponse), HardPFResp, "HardPFReq" }, + /* HardPFExReq */ + { SET4(IsRead, IsRequest, IsHWPrefetch, NeedsExclusive), + InvalidCmd, "HardPFExReq" }, /* SoftPFResp */ { SET4(IsRead, IsResponse, IsSWPrefetch, HasData), InvalidCmd, "SoftPFResp" }, @@ -170,6 +173,9 @@ /* Invalidation Request */ { SET3(NeedsExclusive, IsInvalidate, IsRequest), InvalidCmd, "InvalidationReq" }, + /* StoreAccess Request */ + { SET3(NeedsExclusive, IsRequest, IsStoreAccess), InvalidCmd, + "StoreAccessReq" }, }; bool diff --git a/src/mem/protocol/RubySlicc_Exports.sm b/src/mem/protocol/RubySlicc_Exports.sm --- a/src/mem/protocol/RubySlicc_Exports.sm +++ b/src/mem/protocol/RubySlicc_Exports.sm @@ -135,6 +135,7 @@ COMMIT, desc="Commit version"; NULL, desc="Invalid request type"; FLUSH, desc="Flush request type"; + Store_Access, desc="Store Permission Prefetch"; } enumeration(SequencerRequestType, desc="...", default="SequencerRequestType_NULL") { diff --git a/src/mem/ruby/system/RubyPort.hh b/src/mem/ruby/system/RubyPort.hh --- a/src/mem/ruby/system/RubyPort.hh +++ b/src/mem/ruby/system/RubyPort.hh @@ -170,6 +170,7 @@ protected: void trySendRetries(); + void retry_blocked_port(PacketPtr pkt); void ruby_hit_callback(PacketPtr pkt); void testDrainComplete(); void ruby_eviction_callback(const Address& address); diff --git a/src/mem/ruby/system/RubyPort.cc b/src/mem/ruby/system/RubyPort.cc --- a/src/mem/ruby/system/RubyPort.cc +++ b/src/mem/ruby/system/RubyPort.cc @@ -352,6 +352,24 @@ } void +RubyPort::retry_blocked_port(PacketPtr pkt) +{ + DPRINTF(RubyPort, "Potential blocked port for request: %s 0x%x\n", + pkt->cmdString(), + pkt->getAddr()); + + // Retrieve the request port from the sender State + RubyPort::SenderState *senderState = + safe_cast(pkt->popSenderState()); + MemSlavePort *port = senderState->port; + assert(port != NULL); + delete senderState; + + if (!onRetryList(port)) { + addToRetryList(port); + } +} +void RubyPort::ruby_hit_callback(PacketPtr pkt) { DPRINTF(RubyPort, "Hit callback for %s 0x%x\n", pkt->cmdString(), diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc --- a/src/mem/ruby/system/Sequencer.cc +++ b/src/mem/ruby/system/Sequencer.cc @@ -389,7 +389,8 @@ while (!i->second->empty()) { request = i->second->front(); if(ruby_request) { - assert((request->m_type == RubyRequestType_ST) || + assert((request->m_type == RubyRequestType_Store_Access) || + (request->m_type == RubyRequestType_ST) || (request->m_type == RubyRequestType_ATOMIC) || (request->m_type == RubyRequestType_RMW_Read) || (request->m_type == RubyRequestType_RMW_Write) || @@ -538,7 +539,7 @@ if (g_system_ptr->m_warmup_enabled) { data.setData(pkt->getConstPtr(), request_address.getOffset(), pkt->getSize()); - } else if (!pkt->isFlush()) { + } else if (!pkt->isFlush() && !pkt->isStoreAccess()) { if ((type == RubyRequestType_LD) || (type == RubyRequestType_IFETCH) || (type == RubyRequestType_RMW_Read) || @@ -578,7 +579,13 @@ delete pkt; g_system_ptr->m_cache_recorder->enqueueNextFlushRequest(); } else { - ruby_hit_callback(pkt); + if (type != RubyRequestType_Store_Access) { + ruby_hit_callback(pkt); + } else { + // A storeAccess can block the CPU from issuing stores, so call + // retry_blocked_port so the CPU can reissue blocked stores. + retry_blocked_port(pkt); + } } } @@ -597,8 +604,12 @@ RubyRequestType primary_type = RubyRequestType_NULL; RubyRequestType secondary_type = RubyRequestType_NULL; - - if (pkt->isLLSC()) { + if (pkt->isStoreAccess()) { + DPRINTF(RubySequencer, "Issuing Store Access\n"); + primary_type = RubyRequestType_Store_Access; + secondary_type = RubyRequestType_ST; + } + else if (pkt->isLLSC()) { // // Alpha LL/SC instructions need to be handled carefully by the cache // coherence protocol to ensure they follow the proper semantics. In @@ -691,12 +702,18 @@ pc = pkt->req->getPC(); } + // Certain requests will not have allocated data + uint8_t* data; + if (pkt->isFlush() || pkt->isStoreAccess()) { + data = nullptr; + } else { + data = pkt->getPtr(); + } + // check if the packet has data as for example prefetch and flush // requests do not std::shared_ptr msg = - std::make_shared(clockEdge(), pkt->getAddr(), - pkt->isFlush() ? - nullptr : pkt->getPtr(), + std::make_shared(clockEdge(), pkt->getAddr(), data, pkt->getSize(), pc, secondary_type, RubyAccessMode_Supervisor, pkt, PrefetchBit_No, proc_id);