diff --git a/configs/common/Caches.py b/configs/common/Caches.py --- a/configs/common/Caches.py +++ b/configs/common/Caches.py @@ -48,15 +48,18 @@ class L1Cache(BaseCache): assoc = 2 - hit_latency = 2 + lookup_latency = 2 + ram_latency = 2 response_latency = 2 mshrs = 4 tgts_per_mshr = 20 is_top_level = True + sequential_access = True class L2Cache(BaseCache): assoc = 8 - hit_latency = 20 + lookup_latency = 20 + ram_latency = 20 response_latency = 20 mshrs = 20 tgts_per_mshr = 12 @@ -64,7 +67,8 @@ class IOCache(BaseCache): assoc = 8 - hit_latency = 50 + lookup_latency = 50 + ram_latency = 50 response_latency = 50 mshrs = 20 size = '1kB' @@ -74,7 +78,8 @@ class PageTableWalkerCache(BaseCache): assoc = 2 - hit_latency = 2 + lookup_latency = 2 + ram_latency = 2 response_latency = 2 mshrs = 10 size = '1kB' diff --git a/src/mem/cache/BaseCache.py b/src/mem/cache/BaseCache.py --- a/src/mem/cache/BaseCache.py +++ b/src/mem/cache/BaseCache.py @@ -51,7 +51,8 @@ size = Param.MemorySize("Capacity") assoc = Param.Unsigned("Associativity") - hit_latency = Param.Cycles("Hit latency") + lookup_latency = Param.Cycles("Tag lookup latency") + ram_latency = Param.Cycles("RAM access latency") response_latency = Param.Cycles("Latency for the return path on a miss"); max_miss_count = Param.Counter(0, diff --git a/src/mem/cache/base.hh b/src/mem/cache/base.hh --- a/src/mem/cache/base.hh +++ b/src/mem/cache/base.hh @@ -282,6 +282,12 @@ const Cycles lookupLatency; /** + * The latency of RAM access of a cache. It occurs when there is + * an access to the cache. + */ + const Cycles ramLatency; + + /** * This is the forward latency of the cache. It occurs when there * is a cache miss and a request is forwarded downstream, in * particular an outbound miss. diff --git a/src/mem/cache/base.cc b/src/mem/cache/base.cc --- a/src/mem/cache/base.cc +++ b/src/mem/cache/base.cc @@ -72,9 +72,10 @@ writeBuffer("write buffer", p->write_buffers, p->mshrs+1000, 0, MSHRQueue_WriteBuffer), blkSize(p->system->cacheLineSize()), - lookupLatency(p->hit_latency), - forwardLatency(p->hit_latency), - fillLatency(p->response_latency), + lookupLatency(p->lookup_latency), + ramLatency(p->ram_latency), + forwardLatency(p->lookup_latency), + fillLatency(p->ram_latency), responseLatency(p->response_latency), numTarget(p->tgts_per_mshr), forwardSnoops(p->forward_snoops), diff --git a/src/mem/cache/tags/Tags.py b/src/mem/cache/tags/Tags.py --- a/src/mem/cache/tags/Tags.py +++ b/src/mem/cache/tags/Tags.py @@ -49,9 +49,13 @@ # Get the block size from the parent (system) block_size = Param.Int(Parent.cache_line_size, "block size in bytes") - # Get the hit latency from the parent (cache) - hit_latency = Param.Cycles(Parent.hit_latency, - "The hit latency for this cache") + # Get the tag lookup latency from the parent (cache) + lookup_latency = Param.Cycles(Parent.lookup_latency, + "The tag lookup latency for this cache") + + # Get the RAM access latency from the parent (cache) + ram_latency = Param.Cycles(Parent.ram_latency, + "The RAM access latency for this cache") class BaseSetAssoc(BaseTags): type = 'BaseSetAssoc' @@ -75,3 +79,5 @@ type = 'FALRU' cxx_class = 'FALRU' cxx_header = "mem/cache/tags/fa_lru.hh" + sequential_access = Param.Bool(Parent.sequential_access, + "Whether to access tags and data sequentially") diff --git a/src/mem/cache/tags/base.hh b/src/mem/cache/tags/base.hh --- a/src/mem/cache/tags/base.hh +++ b/src/mem/cache/tags/base.hh @@ -69,8 +69,10 @@ const unsigned blkSize; /** The size of the cache. */ const unsigned size; - /** The access latency of the cache. */ - const Cycles accessLatency; + /** The tag lookup latency of the cache. */ + const Cycles lookupLatency; + /** The RAM access latency of the cache. */ + const Cycles ramLatency; /** Pointer to the parent cache. */ BaseCache *cache; diff --git a/src/mem/cache/tags/base.cc b/src/mem/cache/tags/base.cc --- a/src/mem/cache/tags/base.cc +++ b/src/mem/cache/tags/base.cc @@ -55,7 +55,7 @@ BaseTags::BaseTags(const Params *p) : ClockedObject(p), blkSize(p->block_size), size(p->size), - accessLatency(p->hit_latency), cache(nullptr), warmupBound(0), + lookupLatency(p->lookup_latency), ramLatency(p->ram_latency), cache(nullptr), warmupBound(0), warmedUp(false), numBlocks(0) { } diff --git a/src/mem/cache/tags/base_set_assoc.hh b/src/mem/cache/tags/base_set_assoc.hh --- a/src/mem/cache/tags/base_set_assoc.hh +++ b/src/mem/cache/tags/base_set_assoc.hh @@ -178,24 +178,35 @@ Addr tag = extractTag(addr); int set = extractSet(addr); BlkType *blk = sets[set].findBlk(tag, is_secure); - lat = accessLatency;; // Access all tags in parallel, hence one in each way. The data side // either accesses all blocks in parallel, or one block sequentially on // a hit. Sequential access with a miss doesn't access data. tagAccesses += assoc; if (sequentialAccess) { + + // If sequential access, sum tag lookup and RAM access latencies + lat = lookupLatency + ramLatency; + if (blk != NULL) { dataAccesses += 1; } } else { + + // If parallel access, take the max latency between tag lookup and RAM access + if (lookupLatency >= ramLatency ) { + lat = lookupLatency; + } else { + lat = ramLatency; + } + dataAccesses += assoc; } if (blk != NULL) { if (blk->whenReady > curTick() && cache->ticksToCycles(blk->whenReady - curTick()) - > accessLatency) { + > lat) { lat = cache->ticksToCycles(blk->whenReady - curTick()); } blk->refCount += 1; diff --git a/src/mem/cache/tags/fa_lru.hh b/src/mem/cache/tags/fa_lru.hh --- a/src/mem/cache/tags/fa_lru.hh +++ b/src/mem/cache/tags/fa_lru.hh @@ -116,6 +116,9 @@ /** The address hash table. */ hash_t tagHash; + /** Whether tags and data are accessed sequentially. */ + const bool sequentialAccess; + /** * Find the cache block for the given address. * @param addr The address to find. diff --git a/src/mem/cache/tags/fa_lru.cc b/src/mem/cache/tags/fa_lru.cc --- a/src/mem/cache/tags/fa_lru.cc +++ b/src/mem/cache/tags/fa_lru.cc @@ -55,7 +55,7 @@ using namespace std; FALRU::FALRU(const Params *p) - : BaseTags(p), cacheBoundaries(nullptr) + : BaseTags(p), cacheBoundaries(nullptr), sequentialAccess(p->sequential_access) { if (!isPowerOf2(blkSize)) fatal("cache block size (in bytes) `%d' must be a power of two", @@ -206,7 +206,19 @@ *inCache = tmp_in_cache; } - lat = accessLatency; + if (sequentialAccess) { + + // If sequential access, sum tag lookup and RAM access latencies + lat = lookupLatency + ramLatency; + } else { + + // If parallel access, take the max latency between tag lookup and RAM access + if (lookupLatency >= ramLatency ) { + lat = lookupLatency; + } else { + lat = ramLatency; + } + } //assert(check()); return blk; }