diff -r 2492d7ccda7e -r 0e146004005e configs/common/CacheConfig.py
--- a/configs/common/CacheConfig.py	Fri Jul 19 11:52:07 2013 +0200
+++ b/configs/common/CacheConfig.py	Wed Jul 31 23:50:10 2013 -0700
@@ -69,7 +69,13 @@
         # bytes (256 bits).
         system.l2 = l2_cache_class(clk_domain=system.cpu_clk_domain,
                                    size=options.l2_size,
-                                   assoc=options.l2_assoc)
+                                   assoc=options.l2_assoc,
+                                   read_latency=options.l2_read_lat,
+                                   write_latency=options.l2_write_lat,
+                                   enable_bank_model=options.l2_enable_bank,
+                                   num_banks=options.l2_num_banks,
+                                   bank_intlv_high_bit=options.l2_intlv_bit)
+
 
         system.tol2bus = CoherentBus(clk_domain = system.cpu_clk_domain,
                                      width = 32)
@@ -79,9 +85,19 @@
     for i in xrange(options.num_cpus):
         if options.caches:
             icache = icache_class(size=options.l1i_size,
-                                  assoc=options.l1i_assoc)
+                                  assoc=options.l1i_assoc,
+                                  read_latency=options.l1_read_lat,
+                                  write_latency=options.l1_write_lat,
+                                  enable_bank_model=options.l1_enable_bank,
+                                  num_banks=options.l1_num_banks,
+                                  bank_intlv_high_bit=options.l1_intlv_bit)
             dcache = dcache_class(size=options.l1d_size,
-                                  assoc=options.l1d_assoc)
+                                  assoc=options.l1d_assoc,
+                                  read_latency=options.l1_read_lat,
+                                  write_latency=options.l1_write_lat,
+                                  enable_bank_model=options.l1_enable_bank,
+                                  num_banks=options.l1_num_banks,
+                                  bank_intlv_high_bit=options.l1_intlv_bit)
 
             # When connecting the caches, the clock is also inherited
             # from the CPU in question
diff -r 2492d7ccda7e -r 0e146004005e configs/common/Caches.py
--- a/configs/common/Caches.py	Fri Jul 19 11:52:07 2013 +0200
+++ b/configs/common/Caches.py	Wed Jul 31 23:50:10 2013 -0700
@@ -48,24 +48,30 @@
 
 class L1Cache(BaseCache):
     assoc = 2
-    hit_latency = 2
+    read_latency = 2
+    write_latency = 2
     response_latency = 2
+    enable_bank_model = False
     mshrs = 4
     tgts_per_mshr = 20
     is_top_level = True
 
 class L2Cache(BaseCache):
     assoc = 8
-    hit_latency = 20
+    read_latency = 20
+    write_latency = 20
     response_latency = 20
+    enable_bank_model = False
     mshrs = 20
     tgts_per_mshr = 12
     write_buffers = 8
 
 class IOCache(BaseCache):
     assoc = 8
-    hit_latency = 50
+    read_latency = 50
+    write_latency = 50
     response_latency = 50
+    enable_bank_model = False
     mshrs = 20
     size = '1kB'
     tgts_per_mshr = 12
@@ -74,8 +80,10 @@
 
 class PageTableWalkerCache(BaseCache):
     assoc = 2
-    hit_latency = 2
+    read_latency = 2
+    write_latency = 2
     response_latency = 2
+    enable_bank_model = False
     mshrs = 10
     size = '1kB'
     tgts_per_mshr = 12
diff -r 2492d7ccda7e -r 0e146004005e configs/common/O3_ARM_v7a.py
--- a/configs/common/O3_ARM_v7a.py	Fri Jul 19 11:52:07 2013 +0200
+++ b/configs/common/O3_ARM_v7a.py	Wed Jul 31 23:50:10 2013 -0700
@@ -147,7 +147,7 @@
 
 # Instruction Cache
 class O3_ARM_v7a_ICache(BaseCache):
-    hit_latency = 1
+    read_latency = 1
     response_latency = 1
     mshrs = 2
     tgts_per_mshr = 8
@@ -157,7 +157,7 @@
 
 # Data Cache
 class O3_ARM_v7a_DCache(BaseCache):
-    hit_latency = 2
+    read_latency = 2
     response_latency = 2
     mshrs = 6
     tgts_per_mshr = 8
@@ -169,7 +169,7 @@
 # TLB Cache 
 # Use a cache as a L2 TLB
 class O3_ARM_v7aWalkCache(BaseCache):
-    hit_latency = 4
+    read_latency = 4
     response_latency = 4
     mshrs = 6
     tgts_per_mshr = 8
@@ -181,7 +181,7 @@
 
 # L2 Cache
 class O3_ARM_v7aL2(BaseCache):
-    hit_latency = 12
+    read_latency = 12
     response_latency = 12
     mshrs = 16
     tgts_per_mshr = 8
diff -r 2492d7ccda7e -r 0e146004005e configs/common/Options.py
--- a/configs/common/Options.py	Fri Jul 19 11:52:07 2013 +0200
+++ b/configs/common/Options.py	Wed Jul 31 23:50:10 2013 -0700
@@ -103,6 +103,36 @@
     parser.add_option("--l1i_assoc", type="int", default=2)
     parser.add_option("--l2_assoc", type="int", default=8)
     parser.add_option("--l3_assoc", type="int", default=16)
+    parser.add_option("--l1-read-lat", type="int", default="2",
+                      help="L1 read latency (cycles).")
+    parser.add_option("--l2-read-lat", type="int", default="10",
+                      help="L2 read latency (cycles).")
+    parser.add_option("--l3-read-lat", type="int", default="40",
+                      help="L3 read latency (cycles).")
+    parser.add_option("--l1-write-lat", type="int", default="2",
+                      help="L1 write latency (cycles).")
+    parser.add_option("--l2-write-lat", type="int", default="10",
+                      help="L2 write latency (cycles).")
+    parser.add_option("--l3-write-lat", type="int", default="40",
+                      help="L3 write latency (cycles).")
+    parser.add_option("--l1-enable-bank", action="store_true",
+                      help="Enable L1 bank model")
+    parser.add_option("--l2-enable-bank", action="store_true",
+                      help="Enable L2 bank model")
+    parser.add_option("--l3-enable-bank", action="store_true",
+                      help="Enable L3 bank model")
+    parser.add_option("--l1-num-banks", type="int", default="1",
+                      help="L1 bank count.")
+    parser.add_option("--l2-num-banks", type="int", default="1",
+                      help="L2 bank count.")
+    parser.add_option("--l3-num-banks", type="int", default="1",
+                      help="L3 bank count.")
+    parser.add_option("--l1-intlv-bit", type="int", default="0",
+                      help="L1 bank interleave highest bit.")
+    parser.add_option("--l2-intlv-bit", type="int", default="0",
+                      help="L2 bank interleave highest bit.")
+    parser.add_option("--l3-intlv-bit", type="int", default="0",
+                      help="L3 bank interleave highest bit.")
     parser.add_option("--cacheline_size", type="int", default=64)
 
     # Enable Ruby
diff -r 2492d7ccda7e -r 0e146004005e src/mem/cache/BaseCache.py
--- a/src/mem/cache/BaseCache.py	Fri Jul 19 11:52:07 2013 +0200
+++ b/src/mem/cache/BaseCache.py	Wed Jul 31 23:50:10 2013 -0700
@@ -48,9 +48,15 @@
     type = 'BaseCache'
     cxx_header = "mem/cache/base.hh"
     assoc = Param.Int("associativity")
-    hit_latency = Param.Cycles("The hit latency for this cache")
+    read_latency = Param.Cycles("The read latency for this cache (cycles)")
+    write_latency = Param.Cycles("The write latency for this cache (cycles)")
     response_latency = Param.Cycles(
-            "Additional cache latency for the return path to core on a miss");
+            "Additional cache latency for the return path to core on a miss")
+    enable_bank_model = Param.Bool("knob to control if the bank model is used")
+    num_banks = Param.Int(1, "Number of cache data array banks")
+    bank_intlv_high_bit = Param.Int(0,
+        "Cache data array bank interleave highest bit "
+        "(0=automatically aligned to cache line granularity)")
     max_miss_count = Param.Counter(0,
         "number of misses to handle before calling exit")
     mshrs = Param.Int("number of MSHRs (max outstanding requests)")
diff -r 2492d7ccda7e -r 0e146004005e src/mem/cache/SConscript
--- a/src/mem/cache/SConscript	Fri Jul 19 11:52:07 2013 +0200
+++ b/src/mem/cache/SConscript	Wed Jul 31 23:50:10 2013 -0700
@@ -43,6 +43,7 @@
 
 DebugFlag('Cache')
 DebugFlag('CachePort')
+DebugFlag('CacheBank')
 DebugFlag('CacheRepl')
 DebugFlag('CacheTags')
 DebugFlag('HWPrefetch')
diff -r 2492d7ccda7e -r 0e146004005e src/mem/cache/base.hh
--- a/src/mem/cache/base.hh	Fri Jul 19 11:52:07 2013 +0200
+++ b/src/mem/cache/base.hh	Wed Jul 31 23:50:10 2013 -0700
@@ -61,6 +61,7 @@
 #include "base/types.hh"
 #include "debug/Cache.hh"
 #include "debug/CachePort.hh"
+#include "debug/CacheBank.hh"
 #include "mem/cache/mshr_queue.hh"
 #include "mem/mem_object.hh"
 #include "mem/packet.hh"
@@ -180,9 +181,51 @@
 
         bool mustSendRetry;
 
+        EventWrapper<SlavePort, &SlavePort::sendRetry> sendRetryEvent;
+
+    };
+
+    /**
+     * Cache data array bank.
+     * Only models bank access contention, does not hold actual data
+     */
+    class CacheBank
+    {
+
       private:
+        /** Descriptive name (for DPRINTF output) */
+        std::string bankName;
 
-        EventWrapper<SlavePort, &SlavePort::sendRetry> sendRetryEvent;
+        bool inService;
+
+        Tick nextIdleTick;
+
+      public:
+
+        /** Mark this cache bank in-service until finishTick */
+        void markInService(Tick finishTick);
+
+        /** Mark tis cache bank idle */
+        void clearInService();
+
+        /** Extend this cache bank's in-service time by extraTick */
+        void extendService(Tick extraTick);
+
+        CacheBank(const std::string &_name) :
+            bankName(_name),
+            inService(false),
+            nextIdleTick(0)
+        {}
+
+        /** Check if the service is done at current tick */
+        bool serviceDone() const;
+
+        bool isBusy() const { return inService; }
+
+        Tick finishTick() const { return nextIdleTick; }
+
+        /** Return bank name (for DPRINTF). */
+        const std::string name() const { return bankName; }
 
     };
 
@@ -191,6 +234,9 @@
 
   protected:
 
+    /** Data array banks */
+    std::vector<CacheBank *> bank;
+
     /** Miss status registers */
     MSHRQueue mshrQueue;
 
@@ -246,9 +292,14 @@
     const unsigned blkSize;
 
     /**
-     * The latency of a hit in this device.
+     * The latency of a read in this device.
      */
-    const Cycles hitLatency;
+    const Cycles readLatency;
+
+    /**
+     * The latency of a write in this device.
+     */
+    const Cycles writeLatency;
 
     /**
      * The latency of sending reponse to its upper level cache/core on a
@@ -258,6 +309,36 @@
      */
     const Cycles responseLatency;
 
+    /**
+     * The knob to turn on/off cache data array bank model
+     */
+    const bool enableBankModel;
+
+    /**
+     * The number of cache data array banks.
+     */
+    const unsigned numBanks;
+
+    /**
+     * The number of cache data array bank interleave bits
+     */
+    const unsigned bankIntlvBits;
+
+    /**
+     * Cache data array bank interleave high bit
+     */
+    const unsigned bankIntlvHighBit;
+
+    /**
+     * Cache data array bank interleave low bit
+     */
+    const unsigned bankIntlvLowBit;
+
+    /**
+     * Cache data array bank interleve mask
+     */
+    const Addr bankIntlvMask;
+
     /** The number of targets for each MSHR. */
     const int numTarget;
 
@@ -442,7 +523,8 @@
   public:
     typedef BaseCacheParams Params;
     BaseCache(const Params *p);
-    ~BaseCache() {}
+    /** Non-default destructor is needed to deallocate memory. */
+    virtual ~BaseCache();
 
     virtual void init();
 
@@ -461,6 +543,14 @@
         return blkSize;
     }
 
+    /**
+     * Return bank ID according to interleave bits
+     */
+    unsigned
+    getBankId(Addr addr) const
+    {
+        return (addr & bankIntlvMask) >> bankIntlvLowBit;
+    }
 
     Addr blockAlign(Addr addr) const { return (addr & ~(Addr(blkSize - 1))); }
 
diff -r 2492d7ccda7e -r 0e146004005e src/mem/cache/base.cc
--- a/src/mem/cache/base.cc	Fri Jul 19 11:52:07 2013 +0200
+++ b/src/mem/cache/base.cc	Wed Jul 31 23:50:10 2013 -0700
@@ -68,12 +68,21 @@
 
 BaseCache::BaseCache(const Params *p)
     : MemObject(p),
+      bank(p->num_banks),
       mshrQueue("MSHRs", p->mshrs, 4, MSHRQueue_MSHRs),
       writeBuffer("write buffer", p->write_buffers, p->mshrs+1000,
                   MSHRQueue_WriteBuffer),
       blkSize(p->system->cacheLineSize()),
-      hitLatency(p->hit_latency),
+      readLatency(p->read_latency),
+      writeLatency(p->write_latency),
       responseLatency(p->response_latency),
+      enableBankModel(p->enable_bank_model),
+      numBanks(p->num_banks),
+      bankIntlvBits(ceilLog2(p->num_banks)),
+      bankIntlvHighBit(p->bank_intlv_high_bit ? p->bank_intlv_high_bit :
+                           ceilLog2(blkSize) + bankIntlvBits - 1),
+      bankIntlvLowBit(bankIntlvHighBit + 1 - bankIntlvBits),
+      bankIntlvMask(((ULL(1) << bankIntlvBits) - 1) << bankIntlvLowBit),
       numTarget(p->tgts_per_mshr),
       forwardSnoops(p->forward_snoops),
       isTopLevel(p->is_top_level),
@@ -83,6 +92,57 @@
       addrRanges(p->addr_ranges.begin(), p->addr_ranges.end()),
       system(p->system)
 {
+    if (ULL(1) << bankIntlvBits != numBanks)
+        fatal("%s number of banks is not a power of 2", name());
+
+    uint64_t granularity = ULL(1) << bankIntlvLowBit;
+    if (granularity < blkSize)
+        fatal("%s bank interleave granuarity (%ld) smaller than line size "
+              " (%ld)", name(), granularity, blkSize);
+
+    for (unsigned i = 0; i < bank.size(); ++i) {
+        bank[i] = new CacheBank(csprintf("%s.bank%d", p->name, i));
+    }
+}
+
+BaseCache::~BaseCache()
+{
+    for (unsigned i = 0; i < bank.size(); ++i)
+        delete bank[i];
+}
+
+void
+BaseCache::CacheBank::markInService(Tick finishTick)
+{
+    assert(!inService);
+    nextIdleTick = finishTick;
+    DPRINTF(CacheBank, "In service until Tick %ld\n",
+            nextIdleTick);
+    inService = true;
+}
+
+void
+BaseCache::CacheBank::clearInService()
+{
+    assert(inService);
+    DPRINTF(CacheBank, "Service done, become idle\n");
+    inService = false;
+}
+
+void
+BaseCache::CacheBank::extendService(Tick extraTick)
+{
+    assert(inService);
+    assert(nextIdleTick > curTick());
+    nextIdleTick += extraTick;
+    DPRINTF(CacheBank, "Extend service to Tick %ld\n",
+            nextIdleTick);
+}
+
+bool
+BaseCache::CacheBank::serviceDone() const
+{
+    return inService && nextIdleTick <= curTick();
 }
 
 void
diff -r 2492d7ccda7e -r 0e146004005e src/mem/cache/cache_impl.hh
--- a/src/mem/cache/cache_impl.hh	Fri Jul 19 11:52:07 2013 +0200
+++ b/src/mem/cache/cache_impl.hh	Wed Jul 31 23:50:10 2013 -0700
@@ -56,6 +56,7 @@
 #include "debug/Cache.hh"
 #include "debug/CachePort.hh"
 #include "debug/CacheTags.hh"
+#include "debug/CacheBank.hh"
 #include "mem/cache/prefetch/base.hh"
 #include "mem/cache/blk.hh"
 #include "mem/cache/cache.hh"
@@ -293,12 +294,21 @@
     if (pkt->req->isUncacheable()) {
         uncacheableFlush(pkt);
         blk = NULL;
-        lat = hitLatency;
+        lat = readLatency;
         return false;
     }
 
     int id = pkt->req->hasContextId() ? pkt->req->contextId() : -1;
     blk = tags->accessBlock(pkt->getAddr(), lat, id);
+    // Update latency decided by if it's read or write
+    // @todo: We now consider the tag lookup and data array operation are in
+    //        parallel.  However, a more accurate model should assume they are
+    //        in series.
+    if (pkt->isRead()) {
+       if (readLatency > lat) lat = readLatency;
+    } else if (pkt->isWrite()) {
+       if (writeLatency > lat) lat = writeLatency;
+    }
 
     DPRINTF(Cache, "%s%s %x %s %s\n", pkt->cmdString(),
             pkt->req->isInstFetch() ? " (ifetch)" : "",
@@ -374,7 +384,7 @@
 {
     DPRINTF(Cache, "%s for %s address %x size %d\n", __func__,
             pkt->cmdString(), pkt->getAddr(), pkt->getSize());
-    Tick time = clockEdge(hitLatency);
+    Tick time = clockEdge(readLatency);
 
     assert(pkt->isResponse());
 
@@ -416,8 +426,8 @@
         delete pendingDelete[x];
     pendingDelete.clear();
 
-    // we charge hitLatency for doing just about anything here
-    Tick time = clockEdge(hitLatency);
+    // we charge readLatency for doing just about anything here
+    Tick time = clockEdge(readLatency);
 
     assert(pkt->isRequest());
 
@@ -471,7 +481,7 @@
         return true;
     }
 
-    Cycles lat = hitLatency;
+    Cycles lat = readLatency;
     BlkType *blk = NULL;
     PacketList writebacks;
 
@@ -511,6 +521,7 @@
             next_pf_time = prefetcher->notify(pkt, time);
         }
 
+        unsigned bank_id = getBankId(pkt->getAddr());
         if (needsResponse) {
             pkt->makeTimingResponse();
             // @todo: Make someone pay for this
@@ -522,6 +533,10 @@
             /// cache is still relying on it
             pendingDelete.push_back(pkt);
         }
+        // Mark the corresponding bank in service
+        if (enableBankModel) {
+            bank[bank_id]->markInService(clockEdge(lat));
+        }
     } else {
         // miss
 
@@ -655,7 +670,7 @@
 Tick
 Cache<TagStore>::recvAtomic(PacketPtr pkt)
 {
-    Cycles lat = hitLatency;
+    Cycles lat = readLatency;
 
     // @TODO: make this a parameter
     bool last_level_cache = false;
@@ -856,7 +871,7 @@
 {
     assert(pkt->isResponse());
 
-    Tick time = clockEdge(hitLatency);
+    Tick time = clockEdge(readLatency);
     MSHR *mshr = dynamic_cast<MSHR*>(pkt->senderState);
     bool is_error = pkt->isError();
 
@@ -908,6 +923,16 @@
 
         blk = handleFill(pkt, blk, writebacks);
         assert(blk != NULL);
+
+        if (enableBankModel) {
+            // mark the corresponding bank in service
+            unsigned bank_id = getBankId(pkt->getAddr());
+            if (bank[bank_id]->isBusy()) {
+                bank[bank_id]->extendService(writeLatency * clockPeriod());
+            } else {
+                bank[bank_id]->markInService(clockEdge(writeLatency));
+            }
+        }
     }
 
     // First offset for critical word first calculations
@@ -1308,7 +1333,7 @@
     }
     DPRINTF(Cache, "%s created response: %s address %x size %d\n",
             __func__, pkt->cmdString(), pkt->getAddr(), pkt->getSize());
-    memSidePort->schedTimingSnoopResp(pkt, clockEdge(hitLatency));
+    memSidePort->schedTimingSnoopResp(pkt, clockEdge(readLatency));
 }
 
 template<class TagStore>
@@ -1531,7 +1556,7 @@
 
     BlkType *blk = tags->findBlock(pkt->getAddr());
     handleSnoop(pkt, blk, false, false, false);
-    return hitLatency * clockPeriod();
+    return readLatency * clockPeriod();
 }
 
 
@@ -1768,11 +1793,37 @@
 bool
 Cache<TagStore>::CpuSidePort::recvTimingReq(PacketPtr pkt)
 {
+    // unmark bank in service
+    // NOTE: Ideally, the bank status should be updated immedidately after the
+    // nextIdleTick expires, but we will need to create new events to do that.
+    // Instead, we only check-and-unmark the inService bit before we really
+    // want to know the bank status.
+    // @todo: we need to replace the bank_busy mark/unmark code into an
+    //        event-driven style
+    if (cache->enableBankModel)
+        for (auto b = cache->bank.begin(); b != cache->bank.end(); ++b)
+            if ((*b)->serviceDone())
+                (*b)->clearInService();
+
+    unsigned bank_id = cache->getBankId(pkt->getAddr());
+    bool bank_busy = cache->enableBankModel && cache->bank[bank_id]->isBusy();
     // always let inhibited requests through even if blocked
-    if (!pkt->memInhibitAsserted() && blocked) {
+    if (!pkt->memInhibitAsserted() && (blocked || bank_busy)) {
         assert(!cache->system->bypassCaches());
-        DPRINTF(Cache,"Scheduling a retry while blocked\n");
-        mustSendRetry = true;
+        DPRINTF(Cache, "Scheduling a retry while blocked\n");
+        if (blocked) {
+            // not because of bank is busy
+            // the cache port is blocked (e.g. no MSHR)
+            // wait until the cache is unblocked and then send a retry
+            mustSendRetry = true;
+        } else {
+            DPRINTF(CachePort, "Cache port %s denying new requests because the"
+                    " accessing bank is busy\n", name());
+            // because of bank is busy
+            // precisely know which tick the service will finish
+            assert(!sendRetryEvent.scheduled());
+            owner.schedule(sendRetryEvent, cache->bank[bank_id]->finishTick());
+        }
         return false;
     }
 
@@ -1813,6 +1864,16 @@
 bool
 Cache<TagStore>::MemSidePort::recvTimingResp(PacketPtr pkt)
 {
+    // unmark bank in service
+    // NOTE: Ideally, the bank status should be updated immedidately after the
+    // nextIdleTick expires, but we will need to create new events to do that.
+    // Instead, we only check-and-unmark the inService bit before we really
+    // want to know the bank status.
+    if (cache->enableBankModel)
+        for (auto b = cache->bank.begin(); b != cache->bank.end(); ++b)
+            if ((*b)->serviceDone())
+                (*b)->clearInService();
+
     cache->recvTimingResp(pkt);
     return true;
 }
diff -r 2492d7ccda7e -r 0e146004005e src/mem/cache/tags/Tags.py
--- a/src/mem/cache/tags/Tags.py	Fri Jul 19 11:52:07 2013 +0200
+++ b/src/mem/cache/tags/Tags.py	Wed Jul 31 23:50:10 2013 -0700
@@ -50,7 +50,7 @@
     block_size = Param.Int(Parent.cache_line_size, "block size in bytes")
 
     # Get the hit latency from the parent (cache)
-    hit_latency = Param.Cycles(Parent.hit_latency,
+    hit_latency = Param.Cycles(Parent.read_latency,
                                "The hit latency for this cache")
 
 class LRU(BaseTags):