diff -r 10647f5d0f7f -r 8492ca5ea301 src/mem/dram_ctrl.cc --- a/src/mem/dram_ctrl.cc Thu Feb 04 16:57:59 2016 -0600 +++ b/src/mem/dram_ctrl.cc Mon Feb 08 13:55:00 2016 -0600 @@ -58,9 +58,10 @@ DRAMCtrl::DRAMCtrl(const DRAMCtrlParams* p) : AbstractMemory(p), port(name() + ".port", *this), isTimingMode(false), + requestPortBlocked(false), responsePortBlocked(false), retryRdReq(false), retryWrReq(false), busState(READ), - nextReqEvent(this), respondEvent(this), + nextReqEvent(this), respondEvent(this), backendEvent(this), deviceSize(p->device_size), deviceBusWidth(p->device_bus_width), burstLength(p->burst_length), deviceRowBufferSize(p->device_rowbuffer_size), @@ -75,6 +76,8 @@ banksPerRank(p->banks_per_rank), channels(p->channels), rowsPerBank(0), readBufferSize(p->read_buffer_size), writeBufferSize(p->write_buffer_size), + totalBuffers(p->read_buffer_size + p->write_buffer_size + + p->response_buffer_size), writeHighThreshold(writeBufferSize * p->write_high_thresh_perc / 100.0), writeLowThreshold(writeBufferSize * p->write_low_thresh_perc / 100.0), minWritesPerSwitch(p->min_writes_per_switch), @@ -286,6 +289,17 @@ } bool +DRAMCtrl::buffersFull(unsigned int needed_entries) const +{ + unsigned int currently_used = readQueue.size() + writeQueue.size() + + respQueue.size() + backendQueue.size(); + DPRINTF(DRAM, "Total buffer limit %d, current size %d, entries needed %d\n", + totalBuffers, currently_used, needed_entries); + + return (currently_used + needed_entries) > totalBuffers; +} + +bool DRAMCtrl::readQueueFull(unsigned int neededEntries) const { DPRINTF(DRAM, "Read queue limit %d, current size %d, entries needed %d\n", @@ -596,13 +610,6 @@ return true; } - // Calc avg gap between requests - if (prevArrival != 0) { - totGap += curTick() - prevArrival; - } - prevArrival = curTick(); - - // Find out how many dram packets a pkt translates to // If the burst size is equal or larger than the pkt size, then a pkt // translates to only one dram packet. Otherwise, a pkt translates to @@ -611,6 +618,20 @@ unsigned offset = pkt->getAddr() & (burstSize - 1); unsigned int dram_pkt_count = divCeil(offset + size, burstSize); + // Check if there is enough total buffering to accept this packet + if (buffersFull(dram_pkt_count)) { + DPRINTF(DRAM, " Controller buffers full. Blocking requests...\n"); + requestPortBlocked = true; + numBufsFullRetry++; + return false; + } + + // Calc avg gap between requests + if (prevArrival != 0) { + totGap += curTick() - prevArrival; + } + prevArrival = curTick(); + // check local buffers and do not accept if full if (pkt->isRead()) { assert(size != 0); @@ -648,6 +669,19 @@ } void +DRAMCtrl::recvRespRetry() +{ + assert(responsePortBlocked); + responsePortBlocked = false; + + assert(!backendEvent.scheduled()); + BufferEntryBase *queue_head = backendQueue.top(); + assert(queue_head->readyTime <= curTick()); + // Backend queue is operated at the memory controller's frequency + schedule(backendEvent, nextCycle()); +} + +void DRAMCtrl::processRespondEvent() { DPRINTF(DRAM, @@ -679,6 +713,7 @@ if (!respQueue.empty()) { assert(respQueue.front()->readyTime >= curTick()); assert(!respondEvent.scheduled()); + assert(respQueue.front()->readyTime >= curTick()); schedule(respondEvent, respQueue.front()->readyTime); } else { // if there is nothing left in any queue, signal a drain @@ -838,7 +873,7 @@ void DRAMCtrl::accessAndRespond(PacketPtr pkt, Tick static_latency) { - DPRINTF(DRAM, "Responding to Address %lld.. ",pkt->getAddr()); + DPRINTF(DRAM, "Scheduling response for %lld...\n", pkt->getAddr()); bool needsResponse = pkt->needsResponse(); // do the actual memory access which also turns the packet into a @@ -853,14 +888,20 @@ // with headerDelay that takes into account the delay provided by // the xbar and also the payloadDelay that takes into account the // number of data beats. - Tick response_time = curTick() + static_latency + pkt->headerDelay + - pkt->payloadDelay; + // NOTE: Unlike other queues, the backend queue and port are operated + // at the frequency of the memory controller + Tick response_time = clockEdge(ticksToCycles(static_latency + + pkt->headerDelay + + pkt->payloadDelay)); // Here we reset the timing of the packet before sending it out. pkt->headerDelay = pkt->payloadDelay = 0; - // queue the packet in the response queue to be sent out after + // queue the packet in the back-end queue to be sent out after // the static latency has passed - port.schedTimingResp(pkt, response_time, true); + backendQueue.push(new BufferEntryBase(pkt, response_time)); + if (!backendEvent.scheduled() && !responsePortBlocked) { + schedule(backendEvent, response_time); + } } else { // @todo the packet is going to be deleted, and the DRAMPacket // is still having a pointer to it @@ -873,6 +914,44 @@ } void +DRAMCtrl::serviceBackendQueue() +{ + assert(!responsePortBlocked); + + BufferEntryBase *queue_head = backendQueue.top(); + PacketPtr pkt = queue_head->pkt; + Addr target_addr = pkt->getAddr(); + if (!port.sendTimingResp(pkt)) { + DPRINTF(DRAM, "Response port blocked\n"); + responsePortBlocked = true; + } else { + DPRINTF(DRAM, "Sent response packet %s for 0x%0x\n", pkt->cmdString(), + target_addr); + backendQueue.pop(); + delete queue_head; + + if (backendQueue.size() > 0) { + queue_head = backendQueue.top(); + Tick next_ready = std::max(queue_head->readyTime, nextCycle()); + assert(!backendEvent.scheduled() && !responsePortBlocked); + schedule(backendEvent, next_ready); + } + + if (!respQueue.empty() && !respondEvent.scheduled()) { + DRAMPacket *resp_queue_head = respQueue.front(); + Tick next_ready = std::max(resp_queue_head->readyTime, nextCycle()); + schedule(respondEvent, next_ready); + } + + if (requestPortBlocked) { + DPRINTF(DRAM, " Unblocking requests\n"); + requestPortBlocked = false; + port.sendRetryReq(); + } + } +} + +void DRAMCtrl::activateBank(Rank& rank_ref, Bank& bank_ref, Tick act_tick, uint32_t row) { @@ -1975,6 +2054,10 @@ avgMemAccLat = totMemAccLat / (readBursts - servicedByWrQ); + numBufsFullRetry + .name(name() + ".numBufsFullRetry") + .desc("Number of times all buffers full causing retry"); + numRdRetry .name(name() + ".numRdRetry") .desc("Number of times read queue was full causing retry"); @@ -2197,8 +2280,7 @@ } DRAMCtrl::MemoryPort::MemoryPort(const std::string& name, DRAMCtrl& _memory) - : QueuedSlavePort(name, &_memory, queue), queue(_memory, *this), - memory(_memory) + : SlavePort(name, &_memory), memory(_memory) { } AddrRangeList @@ -2214,12 +2296,10 @@ { pkt->pushLabel(memory.name()); - if (!queue.checkFunctional(pkt)) { - // Default implementation of SimpleTimingPort::recvFunctional() - // calls recvAtomic() and throws away the latency; we can save a - // little here by just not calculating the latency. - memory.recvFunctional(pkt); - } + // Default implementation of SimpleTimingPort::recvFunctional() + // calls recvAtomic() and throws away the latency; we can save a + // little here by just not calculating the latency. + memory.recvFunctional(pkt); pkt->popLabel(); } @@ -2237,6 +2317,12 @@ return memory.recvTimingReq(pkt); } +void +DRAMCtrl::MemoryPort::recvRespRetry() +{ + memory.recvRespRetry(); +} + DRAMCtrl* DRAMCtrlParams::create() { diff -r 10647f5d0f7f -r 8492ca5ea301 src/mem/DRAMCtrl.py --- a/src/mem/DRAMCtrl.py Thu Feb 04 16:57:59 2016 -0600 +++ b/src/mem/DRAMCtrl.py Mon Feb 08 13:55:00 2016 -0600 @@ -82,6 +82,8 @@ # the cacheline size or request/packet size write_buffer_size = Param.Unsigned(64, "Number of write queue entries") read_buffer_size = Param.Unsigned(32, "Number of read queue entries") + response_buffer_size = Param.Unsigned(32, "Number of response queue " + "entries") # threshold in percent for when to forcefully trigger writes and # start emptying the write buffer diff -r 10647f5d0f7f -r 8492ca5ea301 src/mem/dram_ctrl.hh --- a/src/mem/dram_ctrl.hh Thu Feb 04 16:57:59 2016 -0600 +++ b/src/mem/dram_ctrl.hh Mon Feb 08 13:55:00 2016 -0600 @@ -52,6 +52,7 @@ #define __MEM_DRAM_CTRL_HH__ #include +#include #include #include @@ -93,10 +94,9 @@ // For now, make use of a queued slave port to avoid dealing with // flow control for the responses being sent back - class MemoryPort : public QueuedSlavePort + class MemoryPort : public SlavePort { - RespPacketQueue queue; DRAMCtrl& memory; public: @@ -111,6 +111,8 @@ bool recvTimingReq(PacketPtr); + void recvRespRetry(); + virtual AddrRangeList getAddrRanges() const; }; @@ -127,7 +129,17 @@ bool isTimingMode; /** - * Remember if we have to retry a request when available. + * Bools to track whether the port is blocked from receiving requests + * because total buffering is full (and thus, needs to send a retry to the + * requester when buffer space is available again), and to track when the + * response port gets blocked. + */ + bool requestPortBlocked; + bool responsePortBlocked; + + /** + * Remember the type of request we have to retry when read or write buffer + * space is available, respectively. */ bool retryRdReq; bool retryWrReq; @@ -408,10 +420,12 @@ }; /** - * A DRAM packet stores packets along with the timestamp of when - * the packet entered the queue, and also the decoded address. + * The base class for buffer entries in the controller. These track + * the request packet and timestamp of when a packet entered a queue. + * BufferEntryBase is used to wrap packets queued in the backend queue, + * since they only need the packet pointer and timing info. */ - class DRAMPacket { + class BufferEntryBase { public: @@ -424,6 +438,25 @@ /** This comes from the outside world */ const PacketPtr pkt; + BufferEntryBase(PacketPtr _pkt, Tick ready_time) + : entryTime(curTick()), readyTime(ready_time), pkt(_pkt) + { } + + bool operator<(const BufferEntryBase& pkt_wrap) + { return readyTime < pkt_wrap.readyTime; } + }; + + /** + * A DRAM packet stores packets along with the timestamp of when + * the packet entered the queue, so it inherits these from + * BufferEntryBase, and adds decoded DRAM access information like + * the type and size of the access, and rank/bank/row information. These + * are queued in the read, write, and resp queues. + */ + class DRAMPacket : public BufferEntryBase { + + public: + const bool isRead; /** Will be populated by address decoder */ @@ -463,10 +496,10 @@ DRAMPacket(PacketPtr _pkt, bool is_read, uint8_t _rank, uint8_t _bank, uint32_t _row, uint16_t bank_id, Addr _addr, unsigned int _size, Bank& bank_ref, Rank& rank_ref) - : entryTime(curTick()), readyTime(curTick()), - pkt(_pkt), isRead(is_read), rank(_rank), bank(_bank), row(_row), - bankId(bank_id), addr(_addr), size(_size), burstHelper(NULL), - bankRef(bank_ref), rankRef(rank_ref) + : BufferEntryBase(_pkt, curTick()), isRead(is_read), rank(_rank), + bank(_bank), row(_row), bankId(bank_id), addr(_addr), + size(_size), burstHelper(NULL), bankRef(bank_ref), + rankRef(rank_ref) { } }; @@ -484,6 +517,14 @@ EventWrapper respondEvent; /** + * Check if all buffers are full + * + * @param pktCount The number of buffers entries needed + * @return true if buffers would overfill, false otherwise + */ + bool buffersFull(unsigned int pktCount) const; + + /** * Check if the read queue has room for more entries * * @param pktCount The number of entries needed in the read queue @@ -552,6 +593,15 @@ void accessAndRespond(PacketPtr pkt, Tick static_latency); /** + * When the backend queue has response packets to be sent back to + * requesters, the backendEvent is scheduled and calls + * serviceBackendQueue(). If the response port unblocks backend queue + * space, then this function wakes up the appropriate waiting queues. + */ + void serviceBackendQueue(); + EventWrapper backendEvent; + + /** * Address decoder to figure out physical mapping onto ranks, * banks, and rows. This function is called multiple times on the same * system packet if the pakcet is larger than burst of the memory. The @@ -671,6 +721,27 @@ std::deque respQueue; /** + * A comparator to sort BufferEntryBase types based on their ready times. + * This is currently used to sort the backend queue. + */ + struct rqCompare { + bool operator() (BufferEntryBase *&lhs, BufferEntryBase *&rhs) const + { + return lhs->readyTime > rhs->readyTime; + } + }; + + /** + * The backend queue is a priority queue that sorts response packets on + * when they become available to send to through the port (e.g. read + * packets traverse the read queue, access memory, and incur backend + * latency, while writes only incur frontend latency, so they might be + * sent before reads started at the same time). + */ + std::priority_queue, + rqCompare > backendQueue; + + /** * Vector of ranks */ std::vector ranks; @@ -698,6 +769,7 @@ uint32_t rowsPerBank; const uint32_t readBufferSize; const uint32_t writeBufferSize; + const uint32_t totalBuffers; const uint32_t writeHighThreshold; const uint32_t writeLowThreshold; const uint32_t minWritesPerSwitch; @@ -785,6 +857,7 @@ Stats::Scalar neitherReadNorWrite; Stats::Vector perBankRdBursts; Stats::Vector perBankWrBursts; + Stats::Scalar numBufsFullRetry; Stats::Scalar numRdRetry; Stats::Scalar numWrRetry; Stats::Scalar totGap; @@ -886,6 +959,7 @@ Tick recvAtomic(PacketPtr pkt); void recvFunctional(PacketPtr pkt); bool recvTimingReq(PacketPtr pkt); + void recvRespRetry(); };