diff --git a/src/mem/SConscript b/src/mem/SConscript --- a/src/mem/SConscript +++ b/src/mem/SConscript @@ -86,6 +86,7 @@ DebugFlag('RubySystem') DebugFlag('RubyTester') DebugFlag('RubyStats') +DebugFlag('RubyResourceStalls') CompoundFlag('Ruby', [ 'RubyQueue', 'RubyNetwork', 'RubyTester', 'RubyGenerated', 'RubySlicc', 'RubySystem', 'RubyCache', diff --git a/src/mem/protocol/RubySlicc_Exports.sm b/src/mem/protocol/RubySlicc_Exports.sm --- a/src/mem/protocol/RubySlicc_Exports.sm +++ b/src/mem/protocol/RubySlicc_Exports.sm @@ -183,6 +183,11 @@ TagArrayWrite, desc="Write access to the cache's tag array"; } +enumeration(CacheResourceType, desc="...", default="CacheResourceType_NULL") { + DataArray, desc="Access to the cache's data array"; + TagArray, desc="Access to the cache's tag array"; +} + enumeration(DirectoryStatisticType, desc="...", default="DirectoryStatisticType_NULL") { Default, desc="Replace this with statistics passed to the Directory Ruby object"; } diff --git a/src/mem/protocol/RubySlicc_Types.sm b/src/mem/protocol/RubySlicc_Types.sm --- a/src/mem/protocol/RubySlicc_Types.sm +++ b/src/mem/protocol/RubySlicc_Types.sm @@ -154,6 +154,7 @@ void setMRU(Address); void recordStatistic(CacheStatisticType); + bool checkResourceAvailable(CacheResourceType, Address); } structure (WireBuffer, inport="yes", outport="yes", external = "yes") { diff --git a/src/mem/ruby/system/BankedArray.hh b/src/mem/ruby/system/BankedArray.hh --- /dev/null +++ b/src/mem/ruby/system/BankedArray.hh @@ -0,0 +1,47 @@ + +#ifndef __MEM_RUBY_SYSTEM_BANKEDARRAY_HH__ +#define __MEM_RUBY_SYSTEM_BANKEDARRAY_HH__ + +#include + +#include "mem/ruby/common/TypeDefines.hh" +#include "sim/eventq.hh" + + + +class BankedArray : public EventManager +{ +private: + unsigned int banks; + unsigned int accessLatency; + unsigned int bankBits; + unsigned int startIndexBit; + + //std::vector busyBanks; + + class TickEvent : public Event + { + public: + TickEvent() : Event() {} + void process() {} + Index idx; + Tick startAccess; + }; + friend class TickEvent; + + // If the tick event is scheduled then the bank is busy + // otherwise, schedule the event and wait for it to complete + std::vector busyBanks; + + unsigned int mapIndexToBank(Index idx); + +public: + BankedArray(unsigned int banks, unsigned int accessLatency, unsigned int startIndexBit); + + // Note: We try the access based on the cache index, not the address + // This is so we don't get aliasing on blocks being replaced + bool tryAccess(Index idx); + +}; + +#endif diff --git a/src/mem/ruby/system/BankedArray.cc b/src/mem/ruby/system/BankedArray.cc --- /dev/null +++ b/src/mem/ruby/system/BankedArray.cc @@ -0,0 +1,57 @@ + + +#include + +#include "base/intmath.hh" +#include "mem/ruby/common/TypeDefines.hh" +#include "mem/ruby/system/BankedArray.hh" +#include "sim/eventq.hh" + +BankedArray::BankedArray(unsigned int banks, unsigned int accessLatency, unsigned int startIndexBit) : + EventManager(&mainEventQueue) +{ + this->banks = banks; + this->accessLatency = accessLatency; + this->startIndexBit = startIndexBit; + + if (banks != 0) { + bankBits = floorLog2(banks); + } + + busyBanks.resize(banks); +} + +bool +BankedArray::tryAccess(Index idx) +{ + if (accessLatency == 0) + return true; + + unsigned int bank = mapIndexToBank(idx); + assert(bank < banks); + + if (busyBanks[bank].scheduled()) { + if (!(busyBanks[bank].startAccess == curTick() && busyBanks[bank].idx == idx)) { + return false; + } else { + return true; // We tried to allocate resources twice in the same cycle for the same addr + } + } + + busyBanks[bank].idx = idx; + busyBanks[bank].startAccess = curTick(); + + // substract 1 so that next cycle the resource available + schedule(busyBanks[bank], curTick()+accessLatency-1); + + return true; +} + +unsigned int +BankedArray::mapIndexToBank(Index idx) +{ + if (banks == 1) { + return 0; + } + return idx % banks; +} diff --git a/src/mem/ruby/system/Cache.py b/src/mem/ruby/system/Cache.py --- a/src/mem/ruby/system/Cache.py +++ b/src/mem/ruby/system/Cache.py @@ -40,3 +40,9 @@ replacement_policy = Param.String("PSEUDO_LRU", ""); start_index_bit = Param.Int(6, "index start, default 6 for 64-byte line"); is_icache = Param.Bool(False, "is instruction only cache"); + + dataArrayBanks = Param.Int(1, "Number of banks for the data array") + tagArrayBanks = Param.Int(1, "Number of banks for the tag array") + dataAccessLatency = Param.Int(1, "Gem5 cycles for the data array") + tagAccessLatency = Param.Int(1, "Gem5 cycles for the tag array") + resourceStalls = Param.Bool(True, "stall if there is a resource failure") diff --git a/src/mem/ruby/system/CacheMemory.hh b/src/mem/ruby/system/CacheMemory.hh --- a/src/mem/ruby/system/CacheMemory.hh +++ b/src/mem/ruby/system/CacheMemory.hh @@ -35,6 +35,7 @@ #include "base/hashmap.hh" #include "base/statistics.hh" +#include "mem/protocol/CacheResourceType.hh" #include "mem/protocol/CacheStatisticType.hh" #include "mem/protocol/GenericRequestType.hh" #include "mem/protocol/RubyRequest.hh" @@ -43,6 +44,7 @@ #include "mem/ruby/recorder/CacheRecorder.hh" #include "mem/ruby/slicc_interface/AbstractCacheEntry.hh" #include "mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh" +#include "mem/ruby/system/BankedArray.hh" #include "mem/ruby/system/LRUPolicy.hh" #include "mem/ruby/system/PseudoLRUPolicy.hh" #include "params/RubyCache.hh" @@ -125,6 +127,10 @@ Stats::Scalar numTagArrayReads; Stats::Scalar numTagArrayWrites; + bool checkResourceAvailable(CacheResourceType stat, Address addr); + + Stats::Scalar numTagArrayStalls; + Stats::Scalar numDataArrayStalls; private: // convert a Address to its location in the cache Index addressToCacheSet(const Address& address) const; @@ -155,12 +161,16 @@ CacheProfiler* m_profiler_ptr; + BankedArray dataArray; + BankedArray tagArray; + int m_cache_size; std::string m_policy; int m_cache_num_sets; int m_cache_num_set_bits; int m_cache_assoc; int m_start_index_bit; + bool m_resource_stalls; }; #endif // __MEM_RUBY_SYSTEM_CACHEMEMORY_HH__ diff --git a/src/mem/ruby/system/CacheMemory.cc b/src/mem/ruby/system/CacheMemory.cc --- a/src/mem/ruby/system/CacheMemory.cc +++ b/src/mem/ruby/system/CacheMemory.cc @@ -29,6 +29,7 @@ #include "base/intmath.hh" #include "debug/RubyCache.hh" #include "debug/RubyCacheTrace.hh" +#include "debug/RubyResourceStalls.hh" #include "debug/RubyStats.hh" #include "mem/protocol/AccessPermission.hh" #include "mem/ruby/system/CacheMemory.hh" @@ -51,7 +52,9 @@ } CacheMemory::CacheMemory(const Params *p) - : SimObject(p) + : SimObject(p), + dataArray(p->dataArrayBanks, p->dataAccessLatency, p->start_index_bit), + tagArray(p->tagArrayBanks, p->tagAccessLatency, p->start_index_bit) { m_cache_size = p->size; m_latency = p->latency; @@ -60,6 +63,7 @@ m_profiler_ptr = new CacheProfiler(name()); m_start_index_bit = p->start_index_bit; m_is_instruction_only_cache = p->is_icache; + m_resource_stalls = p->resourceStalls; } void @@ -523,4 +527,42 @@ .name(name() + ".num_tag_array_writes") .desc("number of tag array writes") ; + + numTagArrayStalls + .name(name() + ".num_tag_array_stalls") + .desc("number of stalls caused by tag array") + ; + + numDataArrayStalls + .name(name() + ".num_data_array_stalls") + .desc("number of stalls caused by data array") + ; } + +bool +CacheMemory::checkResourceAvailable(CacheResourceType res, Address addr) +{ + if (!m_resource_stalls) { + return true; + } + + if (res == CacheResourceType_TagArray) { + if (tagArray.tryAccess(addressToCacheSet(addr))) return true; + else { + DPRINTF(RubyResourceStalls, "Tag array stall on addr %s in set %d\n", addr, addressToCacheSet(addr)); + numTagArrayStalls++; + return false; + } + } else if (res == CacheResourceType_DataArray) { + if (dataArray.tryAccess(addressToCacheSet(addr))) return true; + else { + DPRINTF(RubyResourceStalls, "Data array stall on addr %s in set %d\n", addr, addressToCacheSet(addr)); + numDataArrayStalls++; + return false; + } + } else { + assert(false); + return true; + } +} + diff --git a/src/mem/ruby/system/SConscript b/src/mem/ruby/system/SConscript --- a/src/mem/ruby/system/SConscript +++ b/src/mem/ruby/system/SConscript @@ -55,3 +55,4 @@ Source('Sequencer.cc') Source('System.cc') Source('TimerTable.cc') +Source('BankedArray.cc') diff --git a/src/mem/slicc/symbols/StateMachine.py b/src/mem/slicc/symbols/StateMachine.py --- a/src/mem/slicc/symbols/StateMachine.py +++ b/src/mem/slicc/symbols/StateMachine.py @@ -1238,6 +1238,14 @@ ''' % (key.code, val) case_sorter.append(val) + # Check all of the statistics for resource constraints + for statistic in statistics: + val = ''' +if (!checkResourceAvailable(%s_Statistic_%s, addr)) { + return TransitionResult_ResourceStall; +} +''' % (self.ident, statistic.ident) + case_sorter.append(val) # Emit the code sequences in a sorted order. This makes the # output deterministic (without this the output order can vary