# Node ID 4da865dce0dae6c70c7bed8f115280e779d1c654 # Parent f377969aa1b20cd4724021b6bb003c10eb8ac9d0 diff --git a/src/cpu/o3/lsq_unit.hh b/src/cpu/o3/lsq_unit.hh --- a/src/cpu/o3/lsq_unit.hh +++ b/src/cpu/o3/lsq_unit.hh @@ -265,6 +265,9 @@ /** Attempts to send a store to the cache. */ bool sendStore(PacketPtr data_pkt); + /** Attempts to send a store exclusive prefetch to the cache. */ + bool sendStoreAccess(PacketPtr data_pkt); + /** Increments the given store index (circular queue). */ inline void incrStIdx(int &store_idx) const; /** Decrements the given store index (circular queue). */ @@ -532,6 +535,14 @@ /** Number of times the LSQ is blocked due to the cache. */ Stats::Scalar lsqCacheBlocked; + /** Number of times StoreAccess gets sent successfully */ + Stats::Scalar lsqStoreAccessNonBlocked; + + /** Number of times StoreAccess couldn't get sent successfully */ + Stats::Scalar lsqStoreAccessBlocked; + + + public: /** Executes the load at the given index. */ Fault read(Request *req, Request *sreqLow, Request *sreqHigh, @@ -935,13 +946,89 @@ // Split stores can only occur in ISAs with unaligned memory accesses. If // a store request has been split, sreqLow and sreqHigh will be non-null. + bool split = false; if (TheISA::HasUnalignedMemAcc && sreqLow) { storeQueue[store_idx].isSplit = true; + split = true; } if (!(req->getFlags() & Request::CACHE_BLOCK_ZERO)) memcpy(storeQueue[store_idx].data, data, size); + // + //store access request + // + if (usedPorts < cachePorts && !hasPendingPkt && + /*!isStoreBlocked && */ !lsq->cacheBlocked() && + size != 0 && !storeQueue[store_idx].inst->isDataPrefetch() && + !req->isMmappedIpr() && !req->isUncacheable()) { + + ++usedPorts; + DynInstPtr inst = storeQueue[store_idx].inst; + MemCmd command = MemCmd::StoreAccess; + + PacketPtr data_pkt; + PacketPtr snd_data_pkt = NULL; + + LSQSenderState *state = new LSQSenderState; + state->isLoad = false; + state->idx = store_idx; + + if (!split) { + // + // Build a single data packet if the store isn't split. + // + data_pkt = new Packet(req, command); + data_pkt->senderState = state; + } else { + // + // Create two packets if the store is split in two. + // + data_pkt = new Packet(sreqLow, command); + snd_data_pkt = new Packet(sreqHigh, command); + + data_pkt->senderState = state; + snd_data_pkt->senderState = state; + + state->isSplit = true; + state->outstanding = 2; + + req = sreqLow; + } + + DPRINTF(LSQUnit, "D-Cache: store exclusive prefetch idx:%i PC:%s " + "to Addr:%#x, [sn:%lli]\n", + store_idx, inst->pcState(), + req->getPaddr(), + inst->seqNum); + + if (!sendStoreAccess(data_pkt)) { + DPRINTF(LSQUnit, "D-Cache became blocked when writing store " + "prefetch [sn:%lli], will NOT retry later\n", + inst->seqNum); + ++lsqStoreAccessBlocked; + } else { + // + //sent the first packet through + // If split, try to send the second packet too + // + ++lsqStoreAccessNonBlocked; + if (split) { + assert(snd_data_pkt); + // + // Ensure there are enough ports to use. + // + if (usedPorts < cachePorts) { + ++usedPorts; + if (!sendStoreAccess(snd_data_pkt)) + ++lsqStoreAccessBlocked; + else + ++lsqStoreAccessNonBlocked; + } + } + } + } + // This function only writes the data to the store queue, so no fault // can happen here. return NoFault; diff --git a/src/cpu/o3/lsq_unit_impl.hh b/src/cpu/o3/lsq_unit_impl.hh --- a/src/cpu/o3/lsq_unit_impl.hh +++ b/src/cpu/o3/lsq_unit_impl.hh @@ -94,6 +94,8 @@ void LSQUnit::completeDataAccess(PacketPtr pkt) { + DPRINTF(Activity, "CompleteDataAccess for address: 0x%x; storeAccess: %s\n", + pkt->getAddr(), pkt->isStoreAccess()? "yes" : "no"); LSQSenderState *state = dynamic_cast(pkt->senderState); DynInstPtr inst = state->inst; DPRINTF(IEW, "Writeback event [sn:%lli].\n", inst->seqNum); @@ -259,7 +261,18 @@ lsqCacheBlocked .name(name() + ".cacheBlocked") - .desc("Number of times an access to memory failed due to the cache being blocked"); + .desc("Number of times an access to memory failed " + "due to the cache being blocked"); + + lsqStoreAccessNonBlocked + .name(name() + ".storeAccessNonBlocked") + .desc("Number of times a StoreAccess to memory " + "sent successfully"); + + lsqStoreAccessBlocked + .name(name() + ".storeAccessBlocked") + .desc("Number of times a StoreAccess to memory failed " + "due to cache being blocked"); } template @@ -1227,6 +1240,17 @@ return true; } +//exclusive permission prefetch +template +bool +LSQUnit::sendStoreAccess(PacketPtr data_pkt) +{ + if (!dcachePort->sendTimingReq(data_pkt)) { + return false; + } + return true; +} + template void LSQUnit::recvRetry() diff --git a/src/mem/abstract_mem.cc b/src/mem/abstract_mem.cc --- a/src/mem/abstract_mem.cc +++ b/src/mem/abstract_mem.cc @@ -385,9 +385,10 @@ numWrites[pkt->req->masterId()]++; bytesWritten[pkt->req->masterId()] += pkt->getSize(); } - } else if (pkt->isInvalidate()) { + } else if (pkt->isInvalidate() || pkt->isStoreAccess()) { // no need to do anything - } else { + } + else { panic("unimplemented"); } diff --git a/src/mem/cache/cache_impl.hh b/src/mem/cache/cache_impl.hh --- a/src/mem/cache/cache_impl.hh +++ b/src/mem/cache/cache_impl.hh @@ -309,7 +309,13 @@ blk ? "hit" : "miss", blk ? blk->print() : ""); if (blk != NULL) { - + // + //found a block and it's already in exclusive state + // + if (pkt->isStoreAccess() && blk->isWritable()) { + incHitCount(pkt); + return true; + } if (pkt->needsExclusive() ? blk->isWritable() : blk->isReadable()) { // OK to satisfy access incHitCount(pkt); @@ -510,6 +516,10 @@ bool needsResponse = pkt->needsResponse(); if (satisfied) { + if (pkt->isStoreAccess()) { + //StoreAccess hit in cache with exclusive permission, do nothing + return true; + } if (prefetcher && (prefetchOnAccess || (blk && blk->wasPrefetched()))) { if (blk) blk->status &= ~BlkHWPrefetched; @@ -518,6 +528,8 @@ if (needsResponse) { pkt->makeTimingResponse(); + DPRINTF(Cache, "Schedule timing response from cache, " + "latency: %d, clock: %lld\n", lat, clockEdge(lat)); // @todo: Make someone pay for this pkt->busFirstWordDelay = pkt->busLastWordDelay = 0; cpuSidePort->schedTimingResp(pkt, clockEdge(lat)); @@ -529,6 +541,20 @@ } } else { // miss + //StoreAccess prefetch + if (pkt->isStoreAccess()) { + DPRINTF(Cache, "StoreAccess missed in cache or with no " + "exclusive permission\n"); + if (prefetcher) { + DPRINTF(Cache, "Prefetcher enabled, calculating next_pf_time\n"); + next_pf_time = prefetcher->notify(pkt, time); + } + if (next_pf_time != 0) { + DPRINTF(Cache, "Sending prefetch request for StoreAccess\n"); + requestMemSideBus(Request_PF, std::max(time, next_pf_time)); + } + return true; + } // @todo: Make someone pay for this pkt->busFirstWordDelay = pkt->busLastWordDelay = 0; @@ -548,6 +574,8 @@ mshr->threadNum = -1; } mshr->allocateTarget(pkt, time, order++); + DPRINTF(Cache, "Allocate a target in mshr queue for address: 0x%x, " + "current size: %d\n", pkt->getAddr(), mshr->getNumTargets()); if (mshr->getNumTargets() == numTarget) { noTargetMSHR = mshr; setBlocked(Blocked_NoTargets); @@ -641,8 +669,6 @@ return NULL; } - assert(cpu_pkt->needsResponse()); - MemCmd cmd; // @TODO make useUpgrades a parameter. // Note that ownership protocols require upgrade, otherwise a @@ -947,6 +973,7 @@ switch (target->source) { case MSHR::Target::FromCPU: + DPRINTF(Cache, "HandleResponse fromCPU\n"); Tick completion_time; if (is_fill) { satisfyCpuSideRequest(target->pkt, blk, @@ -1012,8 +1039,9 @@ break; case MSHR::Target::FromPrefetcher: - assert(target->pkt->cmd == MemCmd::HardPFReq); - if (blk) + DPRINTF(Cache, "HandleResponse fromPrefetcher\n"); + assert(target->pkt->cmd.isHWPrefetch()); + if (blk) blk->status |= BlkHWPrefetched; delete target->pkt->req; delete target->pkt; @@ -1679,6 +1707,7 @@ assert(!miss_mshr && !write_mshr); if (prefetcher && !mshrQueue.isFull()) { // If we have a miss queue slot, we can try a prefetch + DPRINTF(Cache, "Trying prefetch queue\n"); PacketPtr pkt = prefetcher->getPacket(); if (pkt) { Addr pf_addr = blockAlign(pkt->getAddr()); diff --git a/src/mem/cache/mshr.cc b/src/mem/cache/mshr.cc --- a/src/mem/cache/mshr.cc +++ b/src/mem/cache/mshr.cc @@ -218,7 +218,7 @@ assert(targets.isReset()); // Don't know of a case where we would allocate a new MSHR for a // snoop (mem-side request), so set source according to request here - Target::Source source = (target->cmd == MemCmd::HardPFReq) ? + Target::Source source = (target->cmd.isHWPrefetch()) ? Target::FromPrefetcher : Target::FromCPU; targets.add(target, whenReady, _order, source, true); assert(deferredTargets.isReset()); diff --git a/src/mem/cache/prefetch/base.cc b/src/mem/cache/prefetch/base.cc --- a/src/mem/cache/prefetch/base.cc +++ b/src/mem/cache/prefetch/base.cc @@ -187,6 +187,37 @@ Tick BasePrefetcher::notify(PacketPtr &pkt, Tick tick) { + if (pkt->isStoreAccess()) { + // + //convert storeAccess to HardPFExReq + Addr blk_addr = pkt->getAddr() & ~(Addr)(blkSize-1); + Request *prefetchReq = new Request(blk_addr, blkSize, 0, masterId); + PacketPtr prefetch = new Packet(prefetchReq, MemCmd::HardPFExReq); + + DPRINTF(HWPrefetch, "Add StoreAccess request " + "with blk_addr: 0x%x to pf queue\n", blk_addr); + + prefetch->allocate(); + prefetch->req->setThreadContext(pkt->req->contextId(), + pkt->req->threadId()); + + // We just remove the head if we are full + if (pf.size() == size) { + pfRemovedFull++; + PacketPtr old_pkt = pf.begin()->pkt; + DPRINTF(HWPrefetch, "Prefetch queue full, removing oldest 0x%x\n", + old_pkt->getAddr()); + delete old_pkt->req; + delete old_pkt; + pf.pop_front(); + } + + pf.push_back(DeferredPacket(tick, prefetch)); + + //return pf.empty() ? 0 : pf.front()->time; + return pf.empty() ? 0 : pf.front().tick; + } + // Don't consult the prefetcher if any of the following conditons are true // 1) The request is uncacheable // 2) The request is a fetch, but we are only prefeching data @@ -287,6 +318,8 @@ pf.pop_front(); } + DPRINTF(HWPrefetch, "Add non StoreAccess request with blk_addr: " + "0x%x to pf queue\n", *addrIter); pf.push_back(DeferredPacket(tick + clockPeriod() * *delayIter, prefetch)); } diff --git a/src/mem/coherent_bus.cc b/src/mem/coherent_bus.cc --- a/src/mem/coherent_bus.cc +++ b/src/mem/coherent_bus.cc @@ -431,6 +431,7 @@ // (corresponding to our own slave port that is also in // snoopPorts) and should not send it back to where it came // from + if (exclude_slave_port_id == InvalidPortID || p->getId() != exclude_slave_port_id) { // cache is not allowed to refuse snoop diff --git a/src/mem/packet.hh b/src/mem/packet.hh --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -89,6 +89,7 @@ Writeback, SoftPFReq, HardPFReq, + HardPFExReq, SoftPFResp, HardPFResp, // WriteInvalidateReq transactions used to be generated by the @@ -128,6 +129,7 @@ PrintReq, // Print state matching address FlushReq, //request for a cache flush InvalidationReq, // request for address to be invalidated from lsq + StoreAccess, //exclusive permission NUM_MEM_CMDS }; @@ -152,6 +154,7 @@ IsError, //!< Error response IsPrint, //!< Print state matching address (for debugging) IsFlush, //!< Flush the address from caches + IsStoreAccess, //!< exclusive permission NUM_COMMAND_ATTRIBUTES }; @@ -199,7 +202,8 @@ bool isError() const { return testCmdAttrib(IsError); } bool isPrint() const { return testCmdAttrib(IsPrint); } bool isFlush() const { return testCmdAttrib(IsFlush); } - + bool isStoreAccess() const { return testCmdAttrib(IsStoreAccess); } + bool isHWPrefetch() const { return testCmdAttrib(IsHWPrefetch); } const Command responseCommand() const { @@ -509,6 +513,7 @@ bool isError() const { return cmd.isError(); } bool isPrint() const { return cmd.isPrint(); } bool isFlush() const { return cmd.isFlush(); } + bool isStoreAccess() const { return cmd.isStoreAccess(); } // Snoop flags void assertMemInhibit() { flags.set(MEM_INHIBIT); } @@ -677,8 +682,14 @@ // If this is a request packet for which there's no response, // delete the request object here, since the requester will // never get the chance. - if (req && isRequest() && !needsResponse()) + if (req && isRequest() && !needsResponse() + && !cmd.isHWPrefetch()) { + // + //If HWExPrefetch, it doesn't need a response, hence already + //deletes req when getting the response in handleResponse function + // delete req; + } deleteData(); } diff --git a/src/mem/packet.cc b/src/mem/packet.cc --- a/src/mem/packet.cc +++ b/src/mem/packet.cc @@ -92,6 +92,9 @@ /* HardPFReq */ { SET4(IsRead, IsRequest, IsHWPrefetch, NeedsResponse), HardPFResp, "HardPFReq" }, + /* HardPFExReq */ + { SET4(IsRead, IsRequest, IsHWPrefetch, NeedsExclusive), + InvalidCmd, "HardPFExReq" }, /* SoftPFResp */ { SET4(IsRead, IsResponse, IsSWPrefetch, HasData), InvalidCmd, "SoftPFResp" }, @@ -169,6 +172,9 @@ /* Invalidation Request */ { SET3(NeedsExclusive, IsInvalidate, IsRequest), InvalidCmd, "InvalidationReq" }, + /* StoreAccess Request */ + { SET3(NeedsExclusive, IsRequest, IsStoreAccess), InvalidCmd, + "StoreAccessReq" }, }; bool diff --git a/src/mem/protocol/RubySlicc_Exports.sm b/src/mem/protocol/RubySlicc_Exports.sm --- a/src/mem/protocol/RubySlicc_Exports.sm +++ b/src/mem/protocol/RubySlicc_Exports.sm @@ -135,6 +135,7 @@ COMMIT, desc="Commit version"; NULL, desc="Invalid request type"; FLUSH, desc="Flush request type"; + Store_Access, desc="Store Permission Prefetch"; } enumeration(SequencerRequestType, desc="...", default="SequencerRequestType_NULL") { diff --git a/src/mem/ruby/system/RubyPort.hh b/src/mem/ruby/system/RubyPort.hh --- a/src/mem/ruby/system/RubyPort.hh +++ b/src/mem/ruby/system/RubyPort.hh @@ -165,6 +165,7 @@ protected: void ruby_hit_callback(PacketPtr pkt); + void rubyDoRetries(); void testDrainComplete(); void ruby_eviction_callback(const Address& address); diff --git a/src/mem/ruby/system/RubyPort.cc b/src/mem/ruby/system/RubyPort.cc --- a/src/mem/ruby/system/RubyPort.cc +++ b/src/mem/ruby/system/RubyPort.cc @@ -347,6 +347,14 @@ slave_ports[pkt->getSrc()]->hitCallback(pkt); + rubyDoRetries(); + + testDrainComplete(); +} + +void +RubyPort::rubyDoRetries() +{ // // If we had to stall the MemSlavePorts, wake them up because the sequencer // likely has free resources now. @@ -370,8 +378,6 @@ (*i)->sendRetry(); } } - - testDrainComplete(); } void diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc --- a/src/mem/ruby/system/Sequencer.cc +++ b/src/mem/ruby/system/Sequencer.cc @@ -358,18 +358,21 @@ assert(i != m_RequestTable.end()); SequencerRequest* request; - request = i->second->front(); - assert((request->m_type == RubyRequestType_ST) || - (request->m_type == RubyRequestType_ATOMIC) || - (request->m_type == RubyRequestType_RMW_Read) || - (request->m_type == RubyRequestType_RMW_Write) || - (request->m_type == RubyRequestType_Load_Linked) || - (request->m_type == RubyRequestType_Store_Conditional) || - (request->m_type == RubyRequestType_Locked_RMW_Read) || - (request->m_type == RubyRequestType_Locked_RMW_Write) || - (request->m_type == RubyRequestType_FLUSH)); - + bool ruby_request = true; while (!i->second->empty()) { + request = i->second->front(); + if (ruby_request) { + assert((request->m_type == RubyRequestType_Store_Access) || + (request->m_type == RubyRequestType_ST) || + (request->m_type == RubyRequestType_ATOMIC) || + (request->m_type == RubyRequestType_RMW_Read) || + (request->m_type == RubyRequestType_RMW_Write) || + (request->m_type == RubyRequestType_Load_Linked) || + (request->m_type == RubyRequestType_Store_Conditional) || + (request->m_type == RubyRequestType_Locked_RMW_Read) || + (request->m_type == RubyRequestType_Locked_RMW_Write) || + (request->m_type == RubyRequestType_FLUSH)); + } // handle write request if ((request->m_type != RubyRequestType_LD) && @@ -381,7 +384,7 @@ // Not valid for Network_test protocl // bool success = true; - if(!m_usingNetworkTester) + if (!m_usingNetworkTester) success = handleLlsc(address, request); if (request->m_type == RubyRequestType_Locked_RMW_Read) { m_controller->blockOnQueue(address, m_mandatory_q_ptr); @@ -402,9 +405,7 @@ i->second->pop_front(); markRemoved(); - - // Process the next entry in the list - request = i->second->front(); + ruby_request = false; } // free all outstanding requests corresponding to this address @@ -430,11 +431,13 @@ assert(i != m_RequestTable.end()); SequencerRequest* request; - request = i->second->front(); - assert((request->m_type == RubyRequestType_LD) || - (request->m_type == RubyRequestType_IFETCH)); - + bool ruby_request = true; while (!i->second->empty()) { + request = i->second->front(); + if (ruby_request) { + assert((request->m_type == RubyRequestType_LD) || + (request->m_type == RubyRequestType_IFETCH)); + } if ((request->m_type != RubyRequestType_LD) && (request->m_type != RubyRequestType_IFETCH)) { @@ -447,9 +450,7 @@ firstResponseTime); i->second->pop_front(); markRemoved(); - - // Process the next entry in the list - request = i->second->front(); + ruby_request = false; } // free all outstanding requests corresponding to this address @@ -538,7 +539,15 @@ delete pkt; g_system_ptr->m_cache_recorder->enqueueNextFlushRequest(); } else { - ruby_hit_callback(pkt); + if (type != RubyRequestType_Store_Access) { + ruby_hit_callback(pkt); + } + else { + // A storeAccess can block the CPU from issuing stores, so call + // rubyDoRetries so the CPU can reissue blocked stores. + rubyDoRetries(); + DPRINTF(RubySequencer, "StoreAccess's response; skip forwarding to CPU\n"); + } } } @@ -557,8 +566,12 @@ RubyRequestType primary_type = RubyRequestType_NULL; RubyRequestType secondary_type = RubyRequestType_NULL; - - if (pkt->isLLSC()) { + if (pkt->isStoreAccess()) { + DPRINTF(RubySequencer, "Issuing Store Access\n"); + primary_type = RubyRequestType_Store_Access; + secondary_type = RubyRequestType_ST; + } + else if (pkt->isLLSC()) { // // Alpha LL/SC instructions need to be handled carefully by the cache // coherence protocol to ensure they follow the proper semantics. In