# HG changeset patch
# Parent 80e79ae636ca6b021cbf7aa985b5fd56cb5b2708
cache: Split the hit latency into tag lookup latency and data access latency

If the cache access mode is parallel ("sequential_access" parameter set to "False"), tags and data are accessed in parallel. Therefore, the hit latency is the maximum latency between tag lookup latency and data access latency. On the other hand, if the cache access mode is sequential ("sequential_access" parameter set to "True"), tags and data are accessed sequentially. Therefore, the hit latency is the sum of tag lookup latency plus data access latency.

diff --git a/configs/common/Caches.py b/configs/common/Caches.py
--- a/configs/common/Caches.py
+++ b/configs/common/Caches.py
@@ -48,7 +48,8 @@
 
 class L1Cache(Cache):
     assoc = 2
-    hit_latency = 2
+    tag_latency = 2
+    data_latency = 2
     response_latency = 2
     mshrs = 4
     tgts_per_mshr = 20
@@ -63,7 +64,8 @@
 
 class L2Cache(Cache):
     assoc = 8
-    hit_latency = 20
+    tag_latency = 20
+    data_latency = 20
     response_latency = 20
     mshrs = 20
     tgts_per_mshr = 12
@@ -71,7 +73,8 @@
 
 class IOCache(Cache):
     assoc = 8
-    hit_latency = 50
+    tag_latency = 50
+    data_latency = 50
     response_latency = 50
     mshrs = 20
     size = '1kB'
@@ -79,7 +82,8 @@
 
 class PageTableWalkerCache(Cache):
     assoc = 2
-    hit_latency = 2
+    tag_latency = 2
+    data_latency = 2
     response_latency = 2
     mshrs = 10
     size = '1kB'
diff --git a/src/mem/cache/Cache.py b/src/mem/cache/Cache.py
--- a/src/mem/cache/Cache.py
+++ b/src/mem/cache/Cache.py
@@ -53,7 +53,8 @@
     size = Param.MemorySize("Capacity")
     assoc = Param.Unsigned("Associativity")
 
-    hit_latency = Param.Cycles("Hit latency")
+    tag_latency = Param.Cycles("Tag lookup latency")
+    data_latency = Param.Cycles("Data access latency")
     response_latency = Param.Cycles("Latency for the return path on a miss");
 
     max_miss_count = Param.Counter(0,
diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh
--- a/src/mem/cache/base.hh
+++ b/src/mem/cache/base.hh
@@ -265,6 +265,12 @@
     const Cycles lookupLatency;
 
     /**
+     * The latency of data access of a cache. It occurs when there is
+     * an access to the cache.
+     */
+    const Cycles dataLatency;
+
+    /**
      * This is the forward latency of the cache. It occurs when there
      * is a cache miss and a request is forwarded downstream, in
      * particular an outbound miss.
diff --git a/src/mem/cache/base.cc b/src/mem/cache/base.cc
--- a/src/mem/cache/base.cc
+++ b/src/mem/cache/base.cc
@@ -72,9 +72,10 @@
       mshrQueue("MSHRs", p->mshrs, 0, p->demand_mshr_reserve), // see below
       writeBuffer("write buffer", p->write_buffers, p->mshrs), // see below
       blkSize(blk_size),
-      lookupLatency(p->hit_latency),
-      forwardLatency(p->hit_latency),
-      fillLatency(p->response_latency),
+      lookupLatency(p->tag_latency),
+      dataLatency(p->data_latency),
+      forwardLatency(p->tag_latency),
+      fillLatency(p->data_latency),
       responseLatency(p->response_latency),
       numTarget(p->tgts_per_mshr),
       forwardSnoops(true),
diff --git a/src/mem/cache/tags/Tags.py b/src/mem/cache/tags/Tags.py
--- a/src/mem/cache/tags/Tags.py
+++ b/src/mem/cache/tags/Tags.py
@@ -49,9 +49,13 @@
     # Get the block size from the parent (system)
     block_size = Param.Int(Parent.cache_line_size, "block size in bytes")
 
-    # Get the hit latency from the parent (cache)
-    hit_latency = Param.Cycles(Parent.hit_latency,
-                               "The hit latency for this cache")
+    # Get the tag lookup latency from the parent (cache)
+    tag_latency = Param.Cycles(Parent.tag_latency,
+                               "The tag lookup latency for this cache")
+
+    # Get the RAM access latency from the parent (cache)
+    data_latency = Param.Cycles(Parent.data_latency,
+                               "The data access latency for this cache")
 
 class BaseSetAssoc(BaseTags):
     type = 'BaseSetAssoc'
@@ -75,3 +79,5 @@
     type = 'FALRU'
     cxx_class = 'FALRU'
     cxx_header = "mem/cache/tags/fa_lru.hh"
+    sequential_access = Param.Bool(Parent.sequential_access,
+        "Whether to access tags and data sequentially")
diff --git a/src/mem/cache/tags/base.hh b/src/mem/cache/tags/base.hh
--- a/src/mem/cache/tags/base.hh
+++ b/src/mem/cache/tags/base.hh
@@ -69,8 +69,10 @@
     const unsigned blkSize;
     /** The size of the cache. */
     const unsigned size;
-    /** The access latency of the cache. */
-    const Cycles accessLatency;
+    /** The tag lookup latency of the cache. */
+    const Cycles lookupLatency;
+    /** The data access latency of the cache. */
+    const Cycles dataLatency;
     /** Pointer to the parent cache. */
     BaseCache *cache;
 
diff --git a/src/mem/cache/tags/base.cc b/src/mem/cache/tags/base.cc
--- a/src/mem/cache/tags/base.cc
+++ b/src/mem/cache/tags/base.cc
@@ -56,7 +56,8 @@
 
 BaseTags::BaseTags(const Params *p)
     : ClockedObject(p), blkSize(p->block_size), size(p->size),
-      accessLatency(p->hit_latency), cache(nullptr), warmupBound(0),
+      lookupLatency(p->tag_latency), dataLatency(p->data_latency),
+      cache(nullptr), warmupBound(0),
       warmedUp(false), numBlocks(0)
 {
 }
diff --git a/src/mem/cache/tags/base_set_assoc.hh b/src/mem/cache/tags/base_set_assoc.hh
--- a/src/mem/cache/tags/base_set_assoc.hh
+++ b/src/mem/cache/tags/base_set_assoc.hh
@@ -208,24 +208,35 @@
         Addr tag = extractTag(addr);
         int set = extractSet(addr);
         BlkType *blk = sets[set].findBlk(tag, is_secure);
-        lat = accessLatency;;
 
         // Access all tags in parallel, hence one in each way.  The data side
         // either accesses all blocks in parallel, or one block sequentially on
         // a hit.  Sequential access with a miss doesn't access data.
         tagAccesses += allocAssoc;
         if (sequentialAccess) {
+
+            // If sequential access, sum tag lookup and data access latencies
+            lat = lookupLatency + dataLatency;
+
             if (blk != nullptr) {
                 dataAccesses += 1;
             }
         } else {
+
+            // If parallel access, take the max latency between tag lookup and data access
+            if (lookupLatency >= dataLatency ) {
+                lat = lookupLatency;
+            } else {
+                lat = dataLatency;
+            }
+
             dataAccesses += allocAssoc;
         }
 
         if (blk != nullptr) {
             if (blk->whenReady > curTick()
                 && cache->ticksToCycles(blk->whenReady - curTick())
-                > accessLatency) {
+                > lat) {
                 lat = cache->ticksToCycles(blk->whenReady - curTick());
             }
             blk->refCount += 1;
diff --git a/src/mem/cache/tags/fa_lru.hh b/src/mem/cache/tags/fa_lru.hh
--- a/src/mem/cache/tags/fa_lru.hh
+++ b/src/mem/cache/tags/fa_lru.hh
@@ -116,6 +116,9 @@
     /** The address hash table. */
     hash_t tagHash;
 
+    /** Whether tags and data are accessed sequentially. */
+    const bool sequentialAccess;
+
     /**
      * Find the cache block for the given address.
      * @param addr The address to find.
diff --git a/src/mem/cache/tags/fa_lru.cc b/src/mem/cache/tags/fa_lru.cc
--- a/src/mem/cache/tags/fa_lru.cc
+++ b/src/mem/cache/tags/fa_lru.cc
@@ -56,7 +56,7 @@
 using namespace std;
 
 FALRU::FALRU(const Params *p)
-    : BaseTags(p), cacheBoundaries(nullptr)
+    : BaseTags(p), cacheBoundaries(nullptr), sequentialAccess(p->sequential_access)
 {
     if (!isPowerOf2(blkSize))
         fatal("cache block size (in bytes) `%d' must be a power of two",
@@ -209,7 +209,19 @@
         *inCache = tmp_in_cache;
     }
 
-    lat = accessLatency;
+    if (sequentialAccess) {
+
+        // If sequential access, sum tag lookup and data access latencies
+        lat = lookupLatency + dataLatency;
+    } else {
+
+        // If parallel access, take the max latency between tag lookup and data access
+        if (lookupLatency >= dataLatency ) {
+            lat = lookupLatency;
+        } else {
+            lat = dataLatency;
+        }
+    }
     //assert(check());
     return blk;
 }