diff -r 2492d7ccda7e -r 0e146004005e configs/common/CacheConfig.py --- a/configs/common/CacheConfig.py Fri Jul 19 11:52:07 2013 +0200 +++ b/configs/common/CacheConfig.py Wed Jul 31 23:50:10 2013 -0700 @@ -69,7 +69,13 @@ # bytes (256 bits). system.l2 = l2_cache_class(clk_domain=system.cpu_clk_domain, size=options.l2_size, - assoc=options.l2_assoc) + assoc=options.l2_assoc, + read_latency=options.l2_read_lat, + write_latency=options.l2_write_lat, + enable_bank_model=options.l2_enable_bank, + num_banks=options.l2_num_banks, + bank_intlv_high_bit=options.l2_intlv_bit) + system.tol2bus = CoherentBus(clk_domain = system.cpu_clk_domain, width = 32) @@ -79,9 +85,19 @@ for i in xrange(options.num_cpus): if options.caches: icache = icache_class(size=options.l1i_size, - assoc=options.l1i_assoc) + assoc=options.l1i_assoc, + read_latency=options.l1_read_lat, + write_latency=options.l1_write_lat, + enable_bank_model=options.l1_enable_bank, + num_banks=options.l1_num_banks, + bank_intlv_high_bit=options.l1_intlv_bit) dcache = dcache_class(size=options.l1d_size, - assoc=options.l1d_assoc) + assoc=options.l1d_assoc, + read_latency=options.l1_read_lat, + write_latency=options.l1_write_lat, + enable_bank_model=options.l1_enable_bank, + num_banks=options.l1_num_banks, + bank_intlv_high_bit=options.l1_intlv_bit) # When connecting the caches, the clock is also inherited # from the CPU in question diff -r 2492d7ccda7e -r 0e146004005e configs/common/Caches.py --- a/configs/common/Caches.py Fri Jul 19 11:52:07 2013 +0200 +++ b/configs/common/Caches.py Wed Jul 31 23:50:10 2013 -0700 @@ -48,24 +48,30 @@ class L1Cache(BaseCache): assoc = 2 - hit_latency = 2 + read_latency = 2 + write_latency = 2 response_latency = 2 + enable_bank_model = False mshrs = 4 tgts_per_mshr = 20 is_top_level = True class L2Cache(BaseCache): assoc = 8 - hit_latency = 20 + read_latency = 20 + write_latency = 20 response_latency = 20 + enable_bank_model = False mshrs = 20 tgts_per_mshr = 12 write_buffers = 8 class IOCache(BaseCache): assoc = 8 - hit_latency = 50 + read_latency = 50 + write_latency = 50 response_latency = 50 + enable_bank_model = False mshrs = 20 size = '1kB' tgts_per_mshr = 12 @@ -74,8 +80,10 @@ class PageTableWalkerCache(BaseCache): assoc = 2 - hit_latency = 2 + read_latency = 2 + write_latency = 2 response_latency = 2 + enable_bank_model = False mshrs = 10 size = '1kB' tgts_per_mshr = 12 diff -r 2492d7ccda7e -r 0e146004005e configs/common/O3_ARM_v7a.py --- a/configs/common/O3_ARM_v7a.py Fri Jul 19 11:52:07 2013 +0200 +++ b/configs/common/O3_ARM_v7a.py Wed Jul 31 23:50:10 2013 -0700 @@ -147,7 +147,7 @@ # Instruction Cache class O3_ARM_v7a_ICache(BaseCache): - hit_latency = 1 + read_latency = 1 response_latency = 1 mshrs = 2 tgts_per_mshr = 8 @@ -157,7 +157,7 @@ # Data Cache class O3_ARM_v7a_DCache(BaseCache): - hit_latency = 2 + read_latency = 2 response_latency = 2 mshrs = 6 tgts_per_mshr = 8 @@ -169,7 +169,7 @@ # TLB Cache # Use a cache as a L2 TLB class O3_ARM_v7aWalkCache(BaseCache): - hit_latency = 4 + read_latency = 4 response_latency = 4 mshrs = 6 tgts_per_mshr = 8 @@ -181,7 +181,7 @@ # L2 Cache class O3_ARM_v7aL2(BaseCache): - hit_latency = 12 + read_latency = 12 response_latency = 12 mshrs = 16 tgts_per_mshr = 8 diff -r 2492d7ccda7e -r 0e146004005e configs/common/Options.py --- a/configs/common/Options.py Fri Jul 19 11:52:07 2013 +0200 +++ b/configs/common/Options.py Wed Jul 31 23:50:10 2013 -0700 @@ -103,6 +103,36 @@ parser.add_option("--l1i_assoc", type="int", default=2) parser.add_option("--l2_assoc", type="int", default=8) parser.add_option("--l3_assoc", type="int", default=16) + parser.add_option("--l1-read-lat", type="int", default="2", + help="L1 read latency (cycles).") + parser.add_option("--l2-read-lat", type="int", default="10", + help="L2 read latency (cycles).") + parser.add_option("--l3-read-lat", type="int", default="40", + help="L3 read latency (cycles).") + parser.add_option("--l1-write-lat", type="int", default="2", + help="L1 write latency (cycles).") + parser.add_option("--l2-write-lat", type="int", default="10", + help="L2 write latency (cycles).") + parser.add_option("--l3-write-lat", type="int", default="40", + help="L3 write latency (cycles).") + parser.add_option("--l1-enable-bank", action="store_true", + help="Enable L1 bank model") + parser.add_option("--l2-enable-bank", action="store_true", + help="Enable L2 bank model") + parser.add_option("--l3-enable-bank", action="store_true", + help="Enable L3 bank model") + parser.add_option("--l1-num-banks", type="int", default="1", + help="L1 bank count.") + parser.add_option("--l2-num-banks", type="int", default="1", + help="L2 bank count.") + parser.add_option("--l3-num-banks", type="int", default="1", + help="L3 bank count.") + parser.add_option("--l1-intlv-bit", type="int", default="0", + help="L1 bank interleave highest bit.") + parser.add_option("--l2-intlv-bit", type="int", default="0", + help="L2 bank interleave highest bit.") + parser.add_option("--l3-intlv-bit", type="int", default="0", + help="L3 bank interleave highest bit.") parser.add_option("--cacheline_size", type="int", default=64) # Enable Ruby diff -r 2492d7ccda7e -r 0e146004005e src/mem/cache/BaseCache.py --- a/src/mem/cache/BaseCache.py Fri Jul 19 11:52:07 2013 +0200 +++ b/src/mem/cache/BaseCache.py Wed Jul 31 23:50:10 2013 -0700 @@ -48,9 +48,15 @@ type = 'BaseCache' cxx_header = "mem/cache/base.hh" assoc = Param.Int("associativity") - hit_latency = Param.Cycles("The hit latency for this cache") + read_latency = Param.Cycles("The read latency for this cache (cycles)") + write_latency = Param.Cycles("The write latency for this cache (cycles)") response_latency = Param.Cycles( - "Additional cache latency for the return path to core on a miss"); + "Additional cache latency for the return path to core on a miss") + enable_bank_model = Param.Bool("knob to control if the bank model is used") + num_banks = Param.Int(1, "Number of cache data array banks") + bank_intlv_high_bit = Param.Int(0, + "Cache data array bank interleave highest bit " + "(0=automatically aligned to cache line granularity)") max_miss_count = Param.Counter(0, "number of misses to handle before calling exit") mshrs = Param.Int("number of MSHRs (max outstanding requests)") diff -r 2492d7ccda7e -r 0e146004005e src/mem/cache/SConscript --- a/src/mem/cache/SConscript Fri Jul 19 11:52:07 2013 +0200 +++ b/src/mem/cache/SConscript Wed Jul 31 23:50:10 2013 -0700 @@ -43,6 +43,7 @@ DebugFlag('Cache') DebugFlag('CachePort') +DebugFlag('CacheBank') DebugFlag('CacheRepl') DebugFlag('CacheTags') DebugFlag('HWPrefetch') diff -r 2492d7ccda7e -r 0e146004005e src/mem/cache/base.hh --- a/src/mem/cache/base.hh Fri Jul 19 11:52:07 2013 +0200 +++ b/src/mem/cache/base.hh Wed Jul 31 23:50:10 2013 -0700 @@ -61,6 +61,7 @@ #include "base/types.hh" #include "debug/Cache.hh" #include "debug/CachePort.hh" +#include "debug/CacheBank.hh" #include "mem/cache/mshr_queue.hh" #include "mem/mem_object.hh" #include "mem/packet.hh" @@ -180,9 +181,51 @@ bool mustSendRetry; + EventWrapper sendRetryEvent; + + }; + + /** + * Cache data array bank. + * Only models bank access contention, does not hold actual data + */ + class CacheBank + { + private: + /** Descriptive name (for DPRINTF output) */ + std::string bankName; - EventWrapper sendRetryEvent; + bool inService; + + Tick nextIdleTick; + + public: + + /** Mark this cache bank in-service until finishTick */ + void markInService(Tick finishTick); + + /** Mark tis cache bank idle */ + void clearInService(); + + /** Extend this cache bank's in-service time by extraTick */ + void extendService(Tick extraTick); + + CacheBank(const std::string &_name) : + bankName(_name), + inService(false), + nextIdleTick(0) + {} + + /** Check if the service is done at current tick */ + bool serviceDone() const; + + bool isBusy() const { return inService; } + + Tick finishTick() const { return nextIdleTick; } + + /** Return bank name (for DPRINTF). */ + const std::string name() const { return bankName; } }; @@ -191,6 +234,9 @@ protected: + /** Data array banks */ + std::vector bank; + /** Miss status registers */ MSHRQueue mshrQueue; @@ -246,9 +292,14 @@ const unsigned blkSize; /** - * The latency of a hit in this device. + * The latency of a read in this device. */ - const Cycles hitLatency; + const Cycles readLatency; + + /** + * The latency of a write in this device. + */ + const Cycles writeLatency; /** * The latency of sending reponse to its upper level cache/core on a @@ -258,6 +309,36 @@ */ const Cycles responseLatency; + /** + * The knob to turn on/off cache data array bank model + */ + const bool enableBankModel; + + /** + * The number of cache data array banks. + */ + const unsigned numBanks; + + /** + * The number of cache data array bank interleave bits + */ + const unsigned bankIntlvBits; + + /** + * Cache data array bank interleave high bit + */ + const unsigned bankIntlvHighBit; + + /** + * Cache data array bank interleave low bit + */ + const unsigned bankIntlvLowBit; + + /** + * Cache data array bank interleve mask + */ + const Addr bankIntlvMask; + /** The number of targets for each MSHR. */ const int numTarget; @@ -442,7 +523,8 @@ public: typedef BaseCacheParams Params; BaseCache(const Params *p); - ~BaseCache() {} + /** Non-default destructor is needed to deallocate memory. */ + virtual ~BaseCache(); virtual void init(); @@ -461,6 +543,14 @@ return blkSize; } + /** + * Return bank ID according to interleave bits + */ + unsigned + getBankId(Addr addr) const + { + return (addr & bankIntlvMask) >> bankIntlvLowBit; + } Addr blockAlign(Addr addr) const { return (addr & ~(Addr(blkSize - 1))); } diff -r 2492d7ccda7e -r 0e146004005e src/mem/cache/base.cc --- a/src/mem/cache/base.cc Fri Jul 19 11:52:07 2013 +0200 +++ b/src/mem/cache/base.cc Wed Jul 31 23:50:10 2013 -0700 @@ -68,12 +68,21 @@ BaseCache::BaseCache(const Params *p) : MemObject(p), + bank(p->num_banks), mshrQueue("MSHRs", p->mshrs, 4, MSHRQueue_MSHRs), writeBuffer("write buffer", p->write_buffers, p->mshrs+1000, MSHRQueue_WriteBuffer), blkSize(p->system->cacheLineSize()), - hitLatency(p->hit_latency), + readLatency(p->read_latency), + writeLatency(p->write_latency), responseLatency(p->response_latency), + enableBankModel(p->enable_bank_model), + numBanks(p->num_banks), + bankIntlvBits(ceilLog2(p->num_banks)), + bankIntlvHighBit(p->bank_intlv_high_bit ? p->bank_intlv_high_bit : + ceilLog2(blkSize) + bankIntlvBits - 1), + bankIntlvLowBit(bankIntlvHighBit + 1 - bankIntlvBits), + bankIntlvMask(((ULL(1) << bankIntlvBits) - 1) << bankIntlvLowBit), numTarget(p->tgts_per_mshr), forwardSnoops(p->forward_snoops), isTopLevel(p->is_top_level), @@ -83,6 +92,57 @@ addrRanges(p->addr_ranges.begin(), p->addr_ranges.end()), system(p->system) { + if (ULL(1) << bankIntlvBits != numBanks) + fatal("%s number of banks is not a power of 2", name()); + + uint64_t granularity = ULL(1) << bankIntlvLowBit; + if (granularity < blkSize) + fatal("%s bank interleave granuarity (%ld) smaller than line size " + " (%ld)", name(), granularity, blkSize); + + for (unsigned i = 0; i < bank.size(); ++i) { + bank[i] = new CacheBank(csprintf("%s.bank%d", p->name, i)); + } +} + +BaseCache::~BaseCache() +{ + for (unsigned i = 0; i < bank.size(); ++i) + delete bank[i]; +} + +void +BaseCache::CacheBank::markInService(Tick finishTick) +{ + assert(!inService); + nextIdleTick = finishTick; + DPRINTF(CacheBank, "In service until Tick %ld\n", + nextIdleTick); + inService = true; +} + +void +BaseCache::CacheBank::clearInService() +{ + assert(inService); + DPRINTF(CacheBank, "Service done, become idle\n"); + inService = false; +} + +void +BaseCache::CacheBank::extendService(Tick extraTick) +{ + assert(inService); + assert(nextIdleTick > curTick()); + nextIdleTick += extraTick; + DPRINTF(CacheBank, "Extend service to Tick %ld\n", + nextIdleTick); +} + +bool +BaseCache::CacheBank::serviceDone() const +{ + return inService && nextIdleTick <= curTick(); } void diff -r 2492d7ccda7e -r 0e146004005e src/mem/cache/cache_impl.hh --- a/src/mem/cache/cache_impl.hh Fri Jul 19 11:52:07 2013 +0200 +++ b/src/mem/cache/cache_impl.hh Wed Jul 31 23:50:10 2013 -0700 @@ -56,6 +56,7 @@ #include "debug/Cache.hh" #include "debug/CachePort.hh" #include "debug/CacheTags.hh" +#include "debug/CacheBank.hh" #include "mem/cache/prefetch/base.hh" #include "mem/cache/blk.hh" #include "mem/cache/cache.hh" @@ -293,12 +294,21 @@ if (pkt->req->isUncacheable()) { uncacheableFlush(pkt); blk = NULL; - lat = hitLatency; + lat = readLatency; return false; } int id = pkt->req->hasContextId() ? pkt->req->contextId() : -1; blk = tags->accessBlock(pkt->getAddr(), lat, id); + // Update latency decided by if it's read or write + // @todo: We now consider the tag lookup and data array operation are in + // parallel. However, a more accurate model should assume they are + // in series. + if (pkt->isRead()) { + if (readLatency > lat) lat = readLatency; + } else if (pkt->isWrite()) { + if (writeLatency > lat) lat = writeLatency; + } DPRINTF(Cache, "%s%s %x %s %s\n", pkt->cmdString(), pkt->req->isInstFetch() ? " (ifetch)" : "", @@ -374,7 +384,7 @@ { DPRINTF(Cache, "%s for %s address %x size %d\n", __func__, pkt->cmdString(), pkt->getAddr(), pkt->getSize()); - Tick time = clockEdge(hitLatency); + Tick time = clockEdge(readLatency); assert(pkt->isResponse()); @@ -416,8 +426,8 @@ delete pendingDelete[x]; pendingDelete.clear(); - // we charge hitLatency for doing just about anything here - Tick time = clockEdge(hitLatency); + // we charge readLatency for doing just about anything here + Tick time = clockEdge(readLatency); assert(pkt->isRequest()); @@ -471,7 +481,7 @@ return true; } - Cycles lat = hitLatency; + Cycles lat = readLatency; BlkType *blk = NULL; PacketList writebacks; @@ -511,6 +521,7 @@ next_pf_time = prefetcher->notify(pkt, time); } + unsigned bank_id = getBankId(pkt->getAddr()); if (needsResponse) { pkt->makeTimingResponse(); // @todo: Make someone pay for this @@ -522,6 +533,10 @@ /// cache is still relying on it pendingDelete.push_back(pkt); } + // Mark the corresponding bank in service + if (enableBankModel) { + bank[bank_id]->markInService(clockEdge(lat)); + } } else { // miss @@ -655,7 +670,7 @@ Tick Cache::recvAtomic(PacketPtr pkt) { - Cycles lat = hitLatency; + Cycles lat = readLatency; // @TODO: make this a parameter bool last_level_cache = false; @@ -856,7 +871,7 @@ { assert(pkt->isResponse()); - Tick time = clockEdge(hitLatency); + Tick time = clockEdge(readLatency); MSHR *mshr = dynamic_cast(pkt->senderState); bool is_error = pkt->isError(); @@ -908,6 +923,16 @@ blk = handleFill(pkt, blk, writebacks); assert(blk != NULL); + + if (enableBankModel) { + // mark the corresponding bank in service + unsigned bank_id = getBankId(pkt->getAddr()); + if (bank[bank_id]->isBusy()) { + bank[bank_id]->extendService(writeLatency * clockPeriod()); + } else { + bank[bank_id]->markInService(clockEdge(writeLatency)); + } + } } // First offset for critical word first calculations @@ -1308,7 +1333,7 @@ } DPRINTF(Cache, "%s created response: %s address %x size %d\n", __func__, pkt->cmdString(), pkt->getAddr(), pkt->getSize()); - memSidePort->schedTimingSnoopResp(pkt, clockEdge(hitLatency)); + memSidePort->schedTimingSnoopResp(pkt, clockEdge(readLatency)); } template @@ -1531,7 +1556,7 @@ BlkType *blk = tags->findBlock(pkt->getAddr()); handleSnoop(pkt, blk, false, false, false); - return hitLatency * clockPeriod(); + return readLatency * clockPeriod(); } @@ -1768,11 +1793,37 @@ bool Cache::CpuSidePort::recvTimingReq(PacketPtr pkt) { + // unmark bank in service + // NOTE: Ideally, the bank status should be updated immedidately after the + // nextIdleTick expires, but we will need to create new events to do that. + // Instead, we only check-and-unmark the inService bit before we really + // want to know the bank status. + // @todo: we need to replace the bank_busy mark/unmark code into an + // event-driven style + if (cache->enableBankModel) + for (auto b = cache->bank.begin(); b != cache->bank.end(); ++b) + if ((*b)->serviceDone()) + (*b)->clearInService(); + + unsigned bank_id = cache->getBankId(pkt->getAddr()); + bool bank_busy = cache->enableBankModel && cache->bank[bank_id]->isBusy(); // always let inhibited requests through even if blocked - if (!pkt->memInhibitAsserted() && blocked) { + if (!pkt->memInhibitAsserted() && (blocked || bank_busy)) { assert(!cache->system->bypassCaches()); - DPRINTF(Cache,"Scheduling a retry while blocked\n"); - mustSendRetry = true; + DPRINTF(Cache, "Scheduling a retry while blocked\n"); + if (blocked) { + // not because of bank is busy + // the cache port is blocked (e.g. no MSHR) + // wait until the cache is unblocked and then send a retry + mustSendRetry = true; + } else { + DPRINTF(CachePort, "Cache port %s denying new requests because the" + " accessing bank is busy\n", name()); + // because of bank is busy + // precisely know which tick the service will finish + assert(!sendRetryEvent.scheduled()); + owner.schedule(sendRetryEvent, cache->bank[bank_id]->finishTick()); + } return false; } @@ -1813,6 +1864,16 @@ bool Cache::MemSidePort::recvTimingResp(PacketPtr pkt) { + // unmark bank in service + // NOTE: Ideally, the bank status should be updated immedidately after the + // nextIdleTick expires, but we will need to create new events to do that. + // Instead, we only check-and-unmark the inService bit before we really + // want to know the bank status. + if (cache->enableBankModel) + for (auto b = cache->bank.begin(); b != cache->bank.end(); ++b) + if ((*b)->serviceDone()) + (*b)->clearInService(); + cache->recvTimingResp(pkt); return true; } diff -r 2492d7ccda7e -r 0e146004005e src/mem/cache/tags/Tags.py --- a/src/mem/cache/tags/Tags.py Fri Jul 19 11:52:07 2013 +0200 +++ b/src/mem/cache/tags/Tags.py Wed Jul 31 23:50:10 2013 -0700 @@ -50,7 +50,7 @@ block_size = Param.Int(Parent.cache_line_size, "block size in bytes") # Get the hit latency from the parent (cache) - hit_latency = Param.Cycles(Parent.hit_latency, + hit_latency = Param.Cycles(Parent.read_latency, "The hit latency for this cache") class LRU(BaseTags):