diff --git a/src/mem/ruby/filters/BulkBloomFilter.hh b/src/mem/ruby/filters/BulkBloomFilter.hh --- a/src/mem/ruby/filters/BulkBloomFilter.hh +++ b/src/mem/ruby/filters/BulkBloomFilter.hh @@ -38,7 +38,7 @@ class BulkBloomFilter : public AbstractBloomFilter { public: - BulkBloomFilter(int size); + BulkBloomFilter(int size, uint32_t block_size_bytes); ~BulkBloomFilter(); void clear(); diff --git a/src/mem/ruby/filters/BulkBloomFilter.cc b/src/mem/ruby/filters/BulkBloomFilter.cc --- a/src/mem/ruby/filters/BulkBloomFilter.cc +++ b/src/mem/ruby/filters/BulkBloomFilter.cc @@ -35,7 +35,8 @@ using namespace std; -BulkBloomFilter::BulkBloomFilter(int size) +BulkBloomFilter::BulkBloomFilter(int size, uint32_t block_size_bytes) + : AbstractBloomFilter(block_size_bytes) { m_filter_size = size; m_filter_size_bits = floorLog2(m_filter_size); diff --git a/src/mem/ruby/filters/H3BloomFilter.hh b/src/mem/ruby/filters/H3BloomFilter.hh --- a/src/mem/ruby/filters/H3BloomFilter.hh +++ b/src/mem/ruby/filters/H3BloomFilter.hh @@ -38,7 +38,8 @@ class H3BloomFilter : public AbstractBloomFilter { public: - H3BloomFilter(int size, int hashes, bool parallel); + H3BloomFilter(int size, int hashes, bool parallel, + uint32_t block_size_bytes); ~H3BloomFilter(); void clear(); diff --git a/src/mem/ruby/filters/H3BloomFilter.cc b/src/mem/ruby/filters/H3BloomFilter.cc --- a/src/mem/ruby/filters/H3BloomFilter.cc +++ b/src/mem/ruby/filters/H3BloomFilter.cc @@ -353,7 +353,9 @@ 394261773, 848616745, 15446017, 517723271, }, }; -H3BloomFilter::H3BloomFilter(int size, int hashes, bool parallel) +H3BloomFilter::H3BloomFilter(int size, int hashes, bool parallel, + uint32_t block_size_bytes) + : AbstractBloomFilter(block_size_bytes) { //TODO: change this ugly init code... primes_list[0] = 9323; diff --git a/src/mem/ruby/filters/LSB_CountingBloomFilter.hh b/src/mem/ruby/filters/LSB_CountingBloomFilter.hh --- a/src/mem/ruby/filters/LSB_CountingBloomFilter.hh +++ b/src/mem/ruby/filters/LSB_CountingBloomFilter.hh @@ -38,7 +38,7 @@ class LSB_CountingBloomFilter : public AbstractBloomFilter { public: - LSB_CountingBloomFilter(int head, int tail); + LSB_CountingBloomFilter(int head, int tail, uint32_t block_size_bytes); ~LSB_CountingBloomFilter(); void clear(); diff --git a/src/mem/protocol/RubySlicc_Defines.sm b/src/mem/protocol/RubySlicc_Defines.sm --- a/src/mem/protocol/RubySlicc_Defines.sm +++ b/src/mem/protocol/RubySlicc_Defines.sm @@ -31,6 +31,8 @@ MachineID machineID; NodeID clusterID; Cycles recycle_latency; +uint32_t block_size_bits; +uint32_t block_size_bytes; // Functions implemented in the AbstractController class for // making timing access to the memory maintained by the diff --git a/src/mem/ruby/filters/AbstractBloomFilter.hh b/src/mem/ruby/filters/AbstractBloomFilter.hh --- a/src/mem/ruby/filters/AbstractBloomFilter.hh +++ b/src/mem/ruby/filters/AbstractBloomFilter.hh @@ -36,6 +36,7 @@ class AbstractBloomFilter { public: + AbstractBloomFilter(uint32_t block_size_bytes); virtual ~AbstractBloomFilter() {}; virtual void clear() = 0; virtual void increment(Addr addr) = 0; @@ -53,6 +54,10 @@ virtual int getIndex(Addr addr) = 0; virtual int readBit(const int index) = 0; virtual void writeBit(const int index, const int value) = 0; + + protected: + const uint32_t m_block_size_bytes; + const uint32_t m_block_size_bits; }; #endif // __MEM_RUBY_FILTERS_ABSTRACTBLOOMFILTER_HH__ diff --git a/src/mem/ruby/filters/AbstractBloomFilter.cc b/src/mem/ruby/filters/AbstractBloomFilter.cc new file mode 100644 --- /dev/null +++ b/src/mem/ruby/filters/AbstractBloomFilter.cc @@ -0,0 +1,9 @@ +#include "mem/ruby/filters/AbstractBloomFilter.hh" + +#include "base/intmath.hh" + +AbstractBloomFilter::AbstractBloomFilter(uint32_t block_size_bytes) + : m_block_size_bytes(block_size_bytes), + m_block_size_bits(floorLog2(m_block_size_bytes)) +{ +} diff --git a/src/mem/ruby/filters/BlockBloomFilter.hh b/src/mem/ruby/filters/BlockBloomFilter.hh --- a/src/mem/ruby/filters/BlockBloomFilter.hh +++ b/src/mem/ruby/filters/BlockBloomFilter.hh @@ -38,7 +38,7 @@ class BlockBloomFilter : public AbstractBloomFilter { public: - BlockBloomFilter(int size); + BlockBloomFilter(int size, uint32_t block_size_bytes); ~BlockBloomFilter(); void clear(); diff --git a/src/mem/ruby/filters/BlockBloomFilter.cc b/src/mem/ruby/filters/BlockBloomFilter.cc --- a/src/mem/ruby/filters/BlockBloomFilter.cc +++ b/src/mem/ruby/filters/BlockBloomFilter.cc @@ -33,7 +33,8 @@ using namespace std; -BlockBloomFilter::BlockBloomFilter(int size) +BlockBloomFilter::BlockBloomFilter(int size, uint32_t block_size_bytes) + : AbstractBloomFilter(block_size_bytes) { m_filter_size = size; m_filter_size_bits = floorLog2(m_filter_size); diff --git a/src/mem/ruby/filters/MultiGrainBloomFilter.hh b/src/mem/ruby/filters/MultiGrainBloomFilter.hh --- a/src/mem/ruby/filters/MultiGrainBloomFilter.hh +++ b/src/mem/ruby/filters/MultiGrainBloomFilter.hh @@ -38,7 +38,7 @@ class MultiGrainBloomFilter : public AbstractBloomFilter { public: - MultiGrainBloomFilter(int head, int tail); + MultiGrainBloomFilter(int head, int tail, uint32_t block_size_bytes); ~MultiGrainBloomFilter(); void clear(); diff --git a/src/mem/ruby/filters/MultiGrainBloomFilter.cc b/src/mem/ruby/filters/MultiGrainBloomFilter.cc --- a/src/mem/ruby/filters/MultiGrainBloomFilter.cc +++ b/src/mem/ruby/filters/MultiGrainBloomFilter.cc @@ -33,7 +33,9 @@ using namespace std; -MultiGrainBloomFilter::MultiGrainBloomFilter(int head, int tail) +MultiGrainBloomFilter::MultiGrainBloomFilter(int head, int tail, + uint32_t block_size_bytes) + : AbstractBloomFilter(block_size_bytes) { // head contains size of 1st bloom filter, tail contains size of // 2nd bloom filter diff --git a/src/mem/ruby/filters/NonCountingBloomFilter.hh b/src/mem/ruby/filters/NonCountingBloomFilter.hh --- a/src/mem/ruby/filters/NonCountingBloomFilter.hh +++ b/src/mem/ruby/filters/NonCountingBloomFilter.hh @@ -38,7 +38,7 @@ class NonCountingBloomFilter : public AbstractBloomFilter { public: - NonCountingBloomFilter(int head, int tail); + NonCountingBloomFilter(int head, int tail, uint32_t block_size_bytes); ~NonCountingBloomFilter(); void clear(); diff --git a/src/mem/ruby/filters/NonCountingBloomFilter.cc b/src/mem/ruby/filters/NonCountingBloomFilter.cc --- a/src/mem/ruby/filters/NonCountingBloomFilter.cc +++ b/src/mem/ruby/filters/NonCountingBloomFilter.cc @@ -33,7 +33,9 @@ using namespace std; -NonCountingBloomFilter::NonCountingBloomFilter(int head, int tail) +NonCountingBloomFilter::NonCountingBloomFilter(int head, int tail, + uint32_t block_size_bytes) + : AbstractBloomFilter(block_size_bytes) { // head contains filter size, tail contains bit offset from block number m_filter_size = head; diff --git a/src/mem/ruby/filters/SConscript b/src/mem/ruby/filters/SConscript --- a/src/mem/ruby/filters/SConscript +++ b/src/mem/ruby/filters/SConscript @@ -33,6 +33,7 @@ if env['PROTOCOL'] == 'None': Return() +Source('AbstractBloomFilter.cc') Source('BlockBloomFilter.cc') Source('BulkBloomFilter.cc') Source('H3BloomFilter.cc') diff --git a/src/mem/ruby/network/Network.py b/src/mem/ruby/network/Network.py --- a/src/mem/ruby/network/Network.py +++ b/src/mem/ruby/network/Network.py @@ -44,7 +44,9 @@ "assumes the protocol numbers vnets starting from 0. Therefore, " "the number of virtual networks should be one more than the " "highest numbered vnet in use.") - control_msg_size = Param.Int(8, "") + + block_size_bytes = Param.UInt32(64, "block size used for data messages.") + control_msg_size = Param.UInt32(8, "") ruby_system = Param.RubySystem("") routers = VectorParam.BasicRouter("Network routers") diff --git a/src/mem/ruby/filters/LSB_CountingBloomFilter.cc b/src/mem/ruby/filters/LSB_CountingBloomFilter.cc --- a/src/mem/ruby/filters/LSB_CountingBloomFilter.cc +++ b/src/mem/ruby/filters/LSB_CountingBloomFilter.cc @@ -32,7 +32,9 @@ using namespace std; -LSB_CountingBloomFilter::LSB_CountingBloomFilter(int head, int tail) +LSB_CountingBloomFilter::LSB_CountingBloomFilter(int head, int tail, + uint32_t block_size_bytes) + : AbstractBloomFilter(block_size_bytes) { m_filter_size = head; m_filter_size_bits = floorLog2(m_filter_size); diff --git a/src/mem/ruby/filters/MultiBitSelBloomFilter.hh b/src/mem/ruby/filters/MultiBitSelBloomFilter.hh --- a/src/mem/ruby/filters/MultiBitSelBloomFilter.hh +++ b/src/mem/ruby/filters/MultiBitSelBloomFilter.hh @@ -41,7 +41,7 @@ { public: MultiBitSelBloomFilter(int size, int hashes, int skip_bits, - bool parallel); + bool parallel, uint32_t block_size_bytes); ~MultiBitSelBloomFilter(); void clear(); diff --git a/src/mem/ruby/filters/MultiBitSelBloomFilter.cc b/src/mem/ruby/filters/MultiBitSelBloomFilter.cc --- a/src/mem/ruby/filters/MultiBitSelBloomFilter.cc +++ b/src/mem/ruby/filters/MultiBitSelBloomFilter.cc @@ -35,7 +35,9 @@ using namespace std; MultiBitSelBloomFilter::MultiBitSelBloomFilter(int size, int hashes, - int skip_bits, bool parallel) + int skip_bits, bool parallel, + uint32_t block_size_bytes) + : AbstractBloomFilter(block_size_bytes) { m_filter_size = size; m_num_hashes = hashes; diff --git a/src/cpu/testers/rubytest/Check.hh b/src/cpu/testers/rubytest/Check.hh --- a/src/cpu/testers/rubytest/Check.hh +++ b/src/cpu/testers/rubytest/Check.hh @@ -45,8 +45,8 @@ class Check { public: - Check(Addr address, Addr pc, int _num_writers, - int _num_readers, RubyTester* _tester); + Check(Addr address, Addr pc, uint32_t block_size_bits, + int _num_writers, int _num_readers, RubyTester* _tester); void initiate(); // Does Action or Check or nether void performCallback(NodeID proc, Addr address, DataBlock *data, @@ -73,6 +73,7 @@ NodeID m_initiatingNode; Addr m_address; Addr m_pc; + const uint32_t m_block_size_bits; RubyAccessMode m_access_mode; int m_num_writers; int m_num_readers; # Node ID 41c966b4e307e574e660b33e7987a61bb5beb78c # Parent 6fc675f956dcb68647abb7f69a6f3fe89fb1a318 diff --git a/src/cpu/testers/rubytest/Check.cc b/src/cpu/testers/rubytest/Check.cc --- a/src/cpu/testers/rubytest/Check.cc +++ b/src/cpu/testers/rubytest/Check.cc @@ -33,10 +33,10 @@ typedef RubyTester::SenderState SenderState; -Check::Check(Addr address, Addr pc, int _num_writers, int _num_readers, - RubyTester* _tester) - : m_num_writers(_num_writers), m_num_readers(_num_readers), - m_tester_ptr(_tester) +Check::Check(Addr address, Addr pc, uint32_t block_size_bits, + int _num_writers, int _num_readers, RubyTester* _tester) + : m_block_size_bits(block_size_bits), m_num_writers(_num_writers), + m_num_readers(_num_readers), m_tester_ptr(_tester) { m_status = TesterStatus_Idle; diff --git a/src/cpu/testers/rubytest/CheckTable.hh b/src/cpu/testers/rubytest/CheckTable.hh --- a/src/cpu/testers/rubytest/CheckTable.hh +++ b/src/cpu/testers/rubytest/CheckTable.hh @@ -42,7 +42,8 @@ class CheckTable { public: - CheckTable(int _num_writers, int _num_readers, RubyTester* _tester); + CheckTable(uint32_t block_size_bytes, int _num_writers, int _num_readers, + RubyTester* _tester); ~CheckTable(); Check* getRandomCheck(); @@ -65,6 +66,7 @@ std::vector m_check_vector; std::unordered_map m_lookup_map; + const uint32_t m_block_size_bits; int m_num_writers; int m_num_readers; RubyTester* m_tester_ptr; diff --git a/src/cpu/testers/rubytest/CheckTable.cc b/src/cpu/testers/rubytest/CheckTable.cc --- a/src/cpu/testers/rubytest/CheckTable.cc +++ b/src/cpu/testers/rubytest/CheckTable.cc @@ -33,8 +33,10 @@ #include "cpu/testers/rubytest/CheckTable.hh" #include "debug/RubyTest.hh" -CheckTable::CheckTable(int _num_writers, int _num_readers, RubyTester* _tester) - : m_num_writers(_num_writers), m_num_readers(_num_readers), +CheckTable::CheckTable(uint32_t block_size_bytes, int _num_writers, + int _num_readers, RubyTester* _tester) + : m_block_size_bits(floorLog2(block_size_bytes)), + m_num_writers(_num_writers), m_num_readers(_num_readers), m_tester_ptr(_tester) { Addr physical = 0; @@ -97,7 +99,8 @@ DPRINTF(RubyTest, "Adding check for address: %s\n", address); Check* check_ptr = new Check(address, 100 + m_check_vector.size(), - m_num_writers, m_num_readers, m_tester_ptr); + m_block_size_bits, m_num_writers, + m_num_readers, m_tester_ptr); for (int i = 0; i < CHECK_SIZE; i++) { // Insert it once per byte m_lookup_map[address + i] = check_ptr; diff --git a/src/cpu/testers/rubytest/RubyTester.hh b/src/cpu/testers/rubytest/RubyTester.hh --- a/src/cpu/testers/rubytest/RubyTester.hh +++ b/src/cpu/testers/rubytest/RubyTester.hh @@ -147,7 +147,8 @@ CheckTable* m_checkTable_ptr; std::vector m_last_progress_vector; - int m_num_cpus; + const int m_num_cpus; + const uint32_t m_block_size_bytes; uint64_t m_checks_completed; std::vector writePorts; std::vector readPorts; diff --git a/src/cpu/testers/rubytest/RubyTester.cc b/src/cpu/testers/rubytest/RubyTester.cc --- a/src/cpu/testers/rubytest/RubyTester.cc +++ b/src/cpu/testers/rubytest/RubyTester.cc @@ -51,7 +51,7 @@ : MemObject(p), checkStartEvent(this), _masterId(p->system->getMasterId(name())), m_checkTable_ptr(nullptr), - m_num_cpus(p->num_cpus), + m_num_cpus(p->num_cpus), m_block_size_bytes(p->block_size_bytes), m_checks_to_complete(p->checks_to_complete), m_deadlock_threshold(p->deadlock_threshold), m_num_writers(0), @@ -121,7 +121,8 @@ m_num_readers = readPorts.size(); assert(m_num_readers == m_num_cpus); - m_checkTable_ptr = new CheckTable(m_num_writers, m_num_readers, this); + m_checkTable_ptr = new CheckTable(m_block_size_bytes, m_num_writers, + m_num_readers, this); } BaseMasterPort & diff --git a/src/cpu/testers/rubytest/RubyTester.py b/src/cpu/testers/rubytest/RubyTester.py --- a/src/cpu/testers/rubytest/RubyTester.py +++ b/src/cpu/testers/rubytest/RubyTester.py @@ -34,6 +34,7 @@ type = 'RubyTester' cxx_header = "cpu/testers/rubytest/RubyTester.hh" num_cpus = Param.Int("number of cpus / RubyPorts") + block_size_bytes = Param.UInt32(64, "cache block size.") cpuInstDataPort = VectorMasterPort("cpu combo ports to inst & data caches") cpuInstPort = VectorMasterPort("cpu ports to only inst caches") cpuDataPort = VectorMasterPort("cpu ports to only data caches") diff --git a/src/gpu-compute/GPU.py b/src/gpu-compute/GPU.py --- a/src/gpu-compute/GPU.py +++ b/src/gpu-compute/GPU.py @@ -134,6 +134,8 @@ vector_register_file = VectorParam.VectorRegisterFile("Vector register "\ "file") + block_size_bytes = Param.UInt32(64, "cache block size") + class Shader(ClockedObject): type = 'Shader' cxx_class = 'Shader' @@ -150,7 +152,9 @@ timing = Param.Bool(False, 'timing memory accesses') cpu_pointer = Param.BaseCPU(NULL, "pointer to base CPU") - translation = Param.Bool(False, "address translation"); + translation = Param.Bool(False, "address translation") + + block_size_bytes = Param.UInt32(64, "cache block size") class ClDriver(EmulatedDriver): type = 'ClDriver' diff --git a/src/gpu-compute/fetch_unit.hh b/src/gpu-compute/fetch_unit.hh --- a/src/gpu-compute/fetch_unit.hh +++ b/src/gpu-compute/fetch_unit.hh @@ -84,6 +84,8 @@ // Pointer to list of waves dispatched on to this SIMD unit std::vector *waveList; + + const uint32_t m_block_size_bytes; }; #endif // __FETCH_UNIT_HH__ diff --git a/src/gpu-compute/fetch_unit.cc b/src/gpu-compute/fetch_unit.cc --- a/src/gpu-compute/fetch_unit.cc +++ b/src/gpu-compute/fetch_unit.cc @@ -51,7 +51,8 @@ timingSim(true), computeUnit(nullptr), fetchScheduler(params), - waveList(nullptr) + waveList(nullptr), + m_block_size_bytes(params->block_size_bytes) { } diff --git a/src/gpu-compute/shader.hh b/src/gpu-compute/shader.hh --- a/src/gpu-compute/shader.hh +++ b/src/gpu-compute/shader.hh @@ -77,6 +77,8 @@ class Shader : public SimObject { protected: + const uint32_t m_block_size_bytes; + // Shader's clock period in terms of number of ticks of curTime, // aka global simulation clock Tick clock; diff --git a/src/gpu-compute/shader.cc b/src/gpu-compute/shader.cc --- a/src/gpu-compute/shader.cc +++ b/src/gpu-compute/shader.cc @@ -51,6 +51,7 @@ #include "sim/sim_exit.hh" Shader::Shader(const Params *p) : SimObject(p), + m_block_size_bytes(p->block_size_bytes), clock(p->clk_domain->clockPeriod()), cpuThread(nullptr), gpuTc(nullptr), cpuPointer(p->cpu_pointer), tickEvent(this), timingSim(p->timing), hsail_mode(SIMT), impl_kern_boundary_sync(p->impl_kern_boundary_sync), diff --git a/src/mem/protocol/MOESI_CMP_directory-L2cache.sm b/src/mem/protocol/MOESI_CMP_directory-L2cache.sm --- a/src/mem/protocol/MOESI_CMP_directory-L2cache.sm +++ b/src/mem/protocol/MOESI_CMP_directory-L2cache.sm @@ -225,7 +225,8 @@ } TBETable * TBEs, template="", constructor="m_number_of_TBEs"; - PerfectCacheMemory * localDirectory, template=""; + PerfectCacheMemory * localDirectory, template="", + constructor="m_block_size_bits"; Tick clockEdge(); Tick cyclesToTicks(Cycles c); diff --git a/src/mem/protocol/MOESI_CMP_token-L1cache.sm b/src/mem/protocol/MOESI_CMP_token-L1cache.sm --- a/src/mem/protocol/MOESI_CMP_token-L1cache.sm +++ b/src/mem/protocol/MOESI_CMP_token-L1cache.sm @@ -199,7 +199,7 @@ bool starving, default="false"; int l2_select_low_bit, default="RubySystem::getBlockSizeBits()"; - PersistentTable * persistentTable; + PersistentTable * persistentTable, constructor="m_block_size_bits"; TimerTable * useTimerTable; TimerTable * reissueTimerTable; diff --git a/src/mem/protocol/MOESI_CMP_token-L2cache.sm b/src/mem/protocol/MOESI_CMP_token-L2cache.sm --- a/src/mem/protocol/MOESI_CMP_token-L2cache.sm +++ b/src/mem/protocol/MOESI_CMP_token-L2cache.sm @@ -146,8 +146,9 @@ int countReadStarvingForAddress(Addr); } - PersistentTable * persistentTable; - PerfectCacheMemory * localDirectory, template=""; + PersistentTable * persistentTable, constructor="m_block_size_bits"; + PerfectCacheMemory * localDirectory, template="", + constructor="m_block_size_bits"; Tick clockEdge(); void set_cache_entry(AbstractCacheEntry b); diff --git a/src/mem/protocol/MOESI_CMP_token-dir.sm b/src/mem/protocol/MOESI_CMP_token-dir.sm --- a/src/mem/protocol/MOESI_CMP_token-dir.sm +++ b/src/mem/protocol/MOESI_CMP_token-dir.sm @@ -164,7 +164,7 @@ // ** OBJECTS ** - PersistentTable * persistentTable; + PersistentTable * persistentTable, constructor="m_block_size_bits"; TimerTable * reissueTimerTable; TBETable * TBEs, template="", constructor="m_number_of_TBEs"; diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc --- a/src/mem/ruby/system/Sequencer.cc +++ b/src/mem/ruby/system/Sequencer.cc @@ -612,7 +612,8 @@ nullptr : pkt->getPtr(), pkt->getSize(), pc, secondary_type, RubyAccessMode_Supervisor, pkt, - PrefetchBit_No, proc_id, core_id); + m_block_size_bits, PrefetchBit_No, + proc_id, core_id); DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %#x %s\n", curTick(), m_version, "Seq", "Begin", "", "", diff --git a/src/mem/ruby/system/Sequencer.py b/src/mem/ruby/system/Sequencer.py --- a/src/mem/ruby/system/Sequencer.py +++ b/src/mem/ruby/system/Sequencer.py @@ -48,6 +48,7 @@ no_retry_on_stall = Param.Bool(False, "") ruby_system = Param.RubySystem(Parent.any, "") system = Param.System(Parent.any, "system object") + block_size_bytes = Param.UInt32(64, "cache block size") support_data_reqs = Param.Bool(True, "data cache requests supported") support_inst_reqs = Param.Bool(True, "inst cache requests supported") is_cpu_sequencer = Param.Bool(True, "connected to a cpu") diff --git a/src/mem/ruby/system/VIPERCoalescer.cc b/src/mem/ruby/system/VIPERCoalescer.cc --- a/src/mem/ruby/system/VIPERCoalescer.cc +++ b/src/mem/ruby/system/VIPERCoalescer.cc @@ -215,7 +215,7 @@ std::shared_ptr msg = std::make_shared( clockEdge(), addr, (uint8_t*) 0, 0, 0, RubyRequestType_REPLACEMENT, RubyAccessMode_Supervisor, - nullptr); + nullptr, m_block_size_bits); assert(m_mandatory_q_ptr != NULL); m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency); m_outstanding_inv++; @@ -242,7 +242,7 @@ std::shared_ptr msg = std::make_shared( clockEdge(), addr, (uint8_t*) 0, 0, 0, RubyRequestType_FLUSH, RubyAccessMode_Supervisor, - nullptr); + nullptr, m_block_size_bits); assert(m_mandatory_q_ptr != NULL); m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency); m_outstanding_wb++; @@ -266,7 +266,7 @@ std::shared_ptr msg = std::make_shared( clockEdge(), addr, (uint8_t*) 0, 0, 0, RubyRequestType_REPLACEMENT, RubyAccessMode_Supervisor, - nullptr); + nullptr, m_block_size_bits); assert(m_mandatory_q_ptr != NULL); m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency); m_outstanding_inv++; @@ -278,7 +278,7 @@ std::shared_ptr msg = std::make_shared( clockEdge(), addr, (uint8_t*) 0, 0, 0, RubyRequestType_FLUSH, RubyAccessMode_Supervisor, - nullptr); + nullptr, m_block_size_bits); assert(m_mandatory_q_ptr != NULL); m_mandatory_q_ptr->enqueue(msg, clockEdge(), m_data_cache_hit_latency); m_outstanding_wb++; diff --git a/src/mem/ruby/profiler/Profiler.hh b/src/mem/ruby/profiler/Profiler.hh --- a/src/mem/ruby/profiler/Profiler.hh +++ b/src/mem/ruby/profiler/Profiler.hh @@ -63,6 +63,8 @@ class Profiler { + friend class AddressProfiler; + public: Profiler(const RubySystemParams *params, RubySystem *rs); ~Profiler(); @@ -139,6 +141,7 @@ std::vector m_FirstResponseToCompletionDelayHistCoalsr; //added by SS + const uint32_t m_block_size_bytes; const bool m_hot_lines; const bool m_all_instructions; const uint32_t m_num_vnets; diff --git a/src/mem/ruby/profiler/Profiler.cc b/src/mem/ruby/profiler/Profiler.cc --- a/src/mem/ruby/profiler/Profiler.cc +++ b/src/mem/ruby/profiler/Profiler.cc @@ -63,8 +63,8 @@ using m5::stl_helpers::operator<<; Profiler::Profiler(const RubySystemParams *p, RubySystem *rs) - : m_ruby_system(rs), m_hot_lines(p->hot_lines), - m_all_instructions(p->all_instructions), + : m_ruby_system(rs), m_block_size_bytes(p->block_size_bytes), + m_hot_lines(p->hot_lines), m_all_instructions(p->all_instructions), m_num_vnets(p->number_of_virtual_networks) { m_address_profiler_ptr = new AddressProfiler(p->num_of_sequencers, this); diff --git a/src/mem/ruby/slicc_interface/AbstractController.hh b/src/mem/ruby/slicc_interface/AbstractController.hh --- a/src/mem/ruby/slicc_interface/AbstractController.hh +++ b/src/mem/ruby/slicc_interface/AbstractController.hh @@ -141,6 +141,9 @@ void wakeUpAllBuffers(Addr addr); void wakeUpAllBuffers(); + uint32_t getBlockSizeBytes() const { return m_block_size_bytes; } + uint32_t getBlockSizeBits() const { return m_block_size_bits; } + protected: const NodeID m_version; MachineID m_machineID; @@ -158,6 +161,10 @@ typedef std::map WaitingBufType; WaitingBufType m_waiting_buffers; + //! Block size used by this controller. + const uint32_t m_block_size_bytes; + const uint32_t m_block_size_bits; + unsigned int m_in_ports; unsigned int m_cur_in_port; const int m_number_of_TBEs; diff --git a/src/mem/ruby/slicc_interface/AbstractController.cc b/src/mem/ruby/slicc_interface/AbstractController.cc --- a/src/mem/ruby/slicc_interface/AbstractController.cc +++ b/src/mem/ruby/slicc_interface/AbstractController.cc @@ -39,11 +39,15 @@ : MemObject(p), Consumer(this), m_version(p->version), m_clusterID(p->cluster_id), m_masterId(p->system->getMasterId(name())), m_is_blocking(false), + m_block_size_bytes(p->block_size_bytes), + m_block_size_bits(floorLog2(m_block_size_bytes)), m_number_of_TBEs(p->number_of_TBEs), m_transitions_per_cycle(p->transitions_per_cycle), m_buffer_size(p->buffer_size), m_recycle_latency(p->recycle_latency), memoryPort(csprintf("%s.memory", name()), this, "") { + assert(isPowerOf2(m_block_size_bytes)); + if (m_version == 0) { // Combine the statistics from all controllers // of this particular type. diff --git a/src/mem/ruby/slicc_interface/Controller.py b/src/mem/ruby/slicc_interface/Controller.py --- a/src/mem/ruby/slicc_interface/Controller.py +++ b/src/mem/ruby/slicc_interface/Controller.py @@ -39,6 +39,8 @@ version = Param.Int("") cluster_id = Param.UInt32(0, "Id of this controller's cluster") + block_size_bytes = Param.UInt32(64, + "block size used for reading / writing from memory.") transitions_per_cycle = \ Param.Int(32, "no. of SLICC state machine transitions per cycle") buffer_size = Param.UInt32(0, "max buffer size 0 means infinite") diff --git a/src/mem/ruby/slicc_interface/RubyRequest.hh b/src/mem/ruby/slicc_interface/RubyRequest.hh --- a/src/mem/ruby/slicc_interface/RubyRequest.hh +++ b/src/mem/ruby/slicc_interface/RubyRequest.hh @@ -64,9 +64,9 @@ RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len, uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode, - PacketPtr _pkt, PrefetchBit _pb = PrefetchBit_No, - ContextID _proc_id = 100, ContextID _core_id = 99, - HSAScope _scope = HSAScope_UNSPECIFIED, + PacketPtr _pkt, uint32_t block_size_bits, + PrefetchBit _pb = PrefetchBit_No, ContextID _proc_id = 100, + ContextID _core_id = 99, HSAScope _scope = HSAScope_UNSPECIFIED, HSASegment _segment = HSASegment_GLOBAL) : Message(curTime), m_PhysicalAddress(_paddr), @@ -85,11 +85,10 @@ } RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len, - uint64_t _pc, RubyRequestType _type, - RubyAccessMode _access_mode, PacketPtr _pkt, PrefetchBit _pb, - unsigned _proc_id, unsigned _core_id, - int _wm_size, std::vector & _wm_mask, - DataBlock & _Data, + uint64_t _pc, RubyRequestType _type, RubyAccessMode _access_mode, + PacketPtr _pkt, uint32_t block_size_bits, PrefetchBit _pb, + unsigned _proc_id, unsigned _core_id, int _wm_size, + std::vector & _wm_mask, DataBlock & _Data, HSAScope _scope = HSAScope_UNSPECIFIED, HSASegment _segment = HSASegment_GLOBAL) : Message(curTime), @@ -113,9 +112,9 @@ RubyRequest(Tick curTime, uint64_t _paddr, uint8_t* _data, int _len, uint64_t _pc, RubyRequestType _type, - RubyAccessMode _access_mode, PacketPtr _pkt, PrefetchBit _pb, - unsigned _proc_id, unsigned _core_id, - int _wm_size, std::vector & _wm_mask, + RubyAccessMode _access_mode, PacketPtr _pkt, + uint32_t block_size_bits, PrefetchBit _pb, unsigned _proc_id, + unsigned _core_id, int _wm_size, std::vector & _wm_mask, DataBlock & _Data, std::vector< std::pair > _atomicOps, HSAScope _scope = HSAScope_UNSPECIFIED, diff --git a/src/mem/ruby/structures/CacheMemory.hh b/src/mem/ruby/structures/CacheMemory.hh --- a/src/mem/ruby/structures/CacheMemory.hh +++ b/src/mem/ruby/structures/CacheMemory.hh @@ -185,6 +185,10 @@ int m_start_index_bit; bool m_resource_stalls; int m_block_size; + + //! Size of the cache blocks + const uint32_t m_block_size_bytes; + const uint32_t m_block_size_bits; }; std::ostream& operator<<(std::ostream& out, const CacheMemory& obj); diff --git a/src/mem/ruby/structures/CacheMemory.cc b/src/mem/ruby/structures/CacheMemory.cc --- a/src/mem/ruby/structures/CacheMemory.cc +++ b/src/mem/ruby/structures/CacheMemory.cc @@ -58,7 +58,9 @@ dataArray(p->dataArrayBanks, p->dataAccessLatency, p->start_index_bit, p->ruby_system), tagArray(p->tagArrayBanks, p->tagAccessLatency, - p->start_index_bit, p->ruby_system) + p->start_index_bit, p->ruby_system), + m_block_size_bytes(p->block_size_bytes), + m_block_size_bits(floorLog2(m_block_size_bytes)) { m_cache_size = p->size; m_cache_assoc = p->assoc; diff --git a/src/mem/ruby/structures/DirectoryMemory.hh b/src/mem/ruby/structures/DirectoryMemory.hh --- a/src/mem/ruby/structures/DirectoryMemory.hh +++ b/src/mem/ruby/structures/DirectoryMemory.hh @@ -65,14 +65,16 @@ DirectoryMemory& operator=(const DirectoryMemory& obj); private: - const std::string m_name; + const int m_version; AbstractEntry **m_entries; - // int m_size; // # of memory module blocks this directory is - // responsible for - uint64_t m_size_bytes; + + //! Block size used for maintaining ownership info. + const uint32_t m_block_size_bytes; + const uint32_t m_block_size_bits; + + const uint64_t m_size_bytes; uint64_t m_size_bits; uint64_t m_num_entries; - int m_version; static int m_num_directories; static int m_num_directories_bits; diff --git a/src/mem/ruby/structures/DirectoryMemory.cc b/src/mem/ruby/structures/DirectoryMemory.cc --- a/src/mem/ruby/structures/DirectoryMemory.cc +++ b/src/mem/ruby/structures/DirectoryMemory.cc @@ -40,10 +40,11 @@ int DirectoryMemory::m_numa_high_bit = 0; DirectoryMemory::DirectoryMemory(const Params *p) - : SimObject(p) + : SimObject(p), m_version(p->version), + m_block_size_bytes(p->block_size_bytes), + m_block_size_bits(floorLog2(m_block_size_bytes)), + m_size_bytes(p->size) { - m_version = p->version; - m_size_bytes = p->size; m_size_bits = floorLog2(m_size_bytes); m_num_entries = 0; m_numa_high_bit = p->numa_high_bit; diff --git a/src/mem/ruby/structures/DirectoryMemory.py b/src/mem/ruby/structures/DirectoryMemory.py --- a/src/mem/ruby/structures/DirectoryMemory.py +++ b/src/mem/ruby/structures/DirectoryMemory.py @@ -36,6 +36,8 @@ cxx_class = 'DirectoryMemory' cxx_header = "mem/ruby/structures/DirectoryMemory.hh" version = Param.Int(0, "") + block_size_bytes = Param.UInt32(64, + "block size used for ownership info.") size = Param.MemorySize("1GB", "capacity in bytes") # the default value of the numa high bit is specified in the command line # option and must be passed into the directory memory sim object diff --git a/src/mem/ruby/structures/PerfectCacheMemory.hh b/src/mem/ruby/structures/PerfectCacheMemory.hh --- a/src/mem/ruby/structures/PerfectCacheMemory.hh +++ b/src/mem/ruby/structures/PerfectCacheMemory.hh @@ -53,7 +53,7 @@ class PerfectCacheMemory { public: - PerfectCacheMemory(); + PerfectCacheMemory(uint32_t block_size_bits); // tests to see if an address is present in the cache bool isTagPresent(Addr address) const; @@ -88,6 +88,7 @@ PerfectCacheMemory& operator=(const PerfectCacheMemory& obj); // Data Members (m_prefix) + const uint32_t m_block_size_bits; std::unordered_map > m_map; }; @@ -102,7 +103,8 @@ template inline -PerfectCacheMemory::PerfectCacheMemory() +PerfectCacheMemory::PerfectCacheMemory(uint32_t block_size_bits) + : m_block_size_bits(block_size_bits) { } diff --git a/src/mem/ruby/structures/PersistentTable.hh b/src/mem/ruby/structures/PersistentTable.hh --- a/src/mem/ruby/structures/PersistentTable.hh +++ b/src/mem/ruby/structures/PersistentTable.hh @@ -52,7 +52,7 @@ { public: // Constructors - PersistentTable(); + PersistentTable(uint32_t block_size_bits); // Destructor ~PersistentTable(); @@ -72,6 +72,9 @@ void print(std::ostream& out) const; private: + // Size of each cache block in bits + const uint32_t m_block_size_bits; + // Private copy constructor and assignment operator PersistentTable(const PersistentTable& obj); PersistentTable& operator=(const PersistentTable& obj); diff --git a/src/mem/ruby/structures/PersistentTable.cc b/src/mem/ruby/structures/PersistentTable.cc --- a/src/mem/ruby/structures/PersistentTable.cc +++ b/src/mem/ruby/structures/PersistentTable.cc @@ -31,7 +31,8 @@ using namespace std; -PersistentTable::PersistentTable() +PersistentTable::PersistentTable(uint32_t block_size_bits) + : m_block_size_bits(block_size_bits) { } diff --git a/src/mem/ruby/structures/Prefetcher.hh b/src/mem/ruby/structures/Prefetcher.hh --- a/src/mem/ruby/structures/Prefetcher.hh +++ b/src/mem/ruby/structures/Prefetcher.hh @@ -192,6 +192,9 @@ AbstractController *m_controller; const Addr m_page_shift; + //! Cache block size + const uint32_t m_block_size_bytes; + const uint32_t m_block_size_bits; //! Count of accesses to the prefetcher Stats::Scalar numMissObserved; diff --git a/src/mem/ruby/structures/Prefetcher.cc b/src/mem/ruby/structures/Prefetcher.cc --- a/src/mem/ruby/structures/Prefetcher.cc +++ b/src/mem/ruby/structures/Prefetcher.cc @@ -46,7 +46,9 @@ m_negative_filter(p->unit_filter, 0), m_nonunit_filter(p->nonunit_filter, 0), m_prefetch_cross_pages(p->cross_page), - m_page_shift(p->sys->getPageShift()) + m_page_shift(p->sys->getPageShift()), + m_block_size_bytes(p->block_size_bytes), + m_block_size_bits(floorLog2(p->block_size_bytes)) { assert(m_num_streams > 0); assert(m_num_startup_pfs <= MAX_PF_INFLIGHT); diff --git a/src/mem/ruby/structures/RubyCache.py b/src/mem/ruby/structures/RubyCache.py --- a/src/mem/ruby/structures/RubyCache.py +++ b/src/mem/ruby/structures/RubyCache.py @@ -40,6 +40,8 @@ assoc = Param.Int(""); replacement_policy = Param.ReplacementPolicy(PseudoLRUReplacementPolicy(), "") + block_size_bytes = Param.UInt32(64, "cache block size.") + start_index_bit = Param.Int(6, "index start, default 6 for 64-byte line"); is_icache = Param.Bool(False, "is instruction only cache"); block_size = Param.MemorySize("0B", "block size in bytes. 0 means default RubyBlockSize") diff --git a/src/mem/ruby/structures/RubyPrefetcher.py b/src/mem/ruby/structures/RubyPrefetcher.py --- a/src/mem/ruby/structures/RubyPrefetcher.py +++ b/src/mem/ruby/structures/RubyPrefetcher.py @@ -48,3 +48,4 @@ cross_page = Param.Bool(False, """True if prefetched address can be on a page different from the observed address""") sys = Param.System(Parent.any, "System this prefetcher belongs to") + block_size_bytes = Param.UInt32(64, "cache block size.") diff --git a/src/mem/ruby/system/CacheRecorder.hh b/src/mem/ruby/system/CacheRecorder.hh --- a/src/mem/ruby/system/CacheRecorder.hh +++ b/src/mem/ruby/system/CacheRecorder.hh @@ -66,13 +66,13 @@ class CacheRecorder { public: - CacheRecorder(); + CacheRecorder(uint32_t block_size_bytes); ~CacheRecorder(); CacheRecorder(uint8_t* uncompressed_trace, uint64_t uncompressed_trace_size, std::vector& SequencerMap, - uint64_t block_size_bytes); + uint32_t block_size_bytes); void addRecord(int cntrl, Addr data_addr, Addr pc_addr, RubyRequestType type, Tick time, DataBlock& data); @@ -109,7 +109,7 @@ uint64_t m_bytes_read; uint64_t m_records_read; uint64_t m_records_flushed; - uint64_t m_block_size_bytes; + const uint32_t m_block_size_bytes; }; inline bool diff --git a/src/mem/ruby/system/CacheRecorder.cc b/src/mem/ruby/system/CacheRecorder.cc --- a/src/mem/ruby/system/CacheRecorder.cc +++ b/src/mem/ruby/system/CacheRecorder.cc @@ -42,17 +42,17 @@ << m_type << ", Time: " << m_time << "]"; } -CacheRecorder::CacheRecorder() +CacheRecorder::CacheRecorder(uint32_t block_size_bytes) : m_uncompressed_trace(NULL), m_uncompressed_trace_size(0), - m_block_size_bytes(RubySystem::getBlockSizeBytes()) + m_block_size_bytes(block_size_bytes) { } CacheRecorder::CacheRecorder(uint8_t* uncompressed_trace, uint64_t uncompressed_trace_size, std::vector& seq_map, - uint64_t block_size_bytes) + uint32_t block_size_bytes) : m_uncompressed_trace(uncompressed_trace), m_uncompressed_trace_size(uncompressed_trace_size), m_seq_map(seq_map), m_bytes_read(0), m_records_read(0), diff --git a/src/mem/ruby/system/GPUCoalescer.cc b/src/mem/ruby/system/GPUCoalescer.cc --- a/src/mem/ruby/system/GPUCoalescer.cc +++ b/src/mem/ruby/system/GPUCoalescer.cc @@ -933,8 +933,8 @@ pkt->getPtr(), pkt->getSize(), pc, secondary_type, RubyAccessMode_Supervisor, pkt, - PrefetchBit_No, proc_id, 100, - blockSize, accessMask, + m_block_size_bits, PrefetchBit_No, proc_id, + 100, m_block_size_bytes, accessMask, dataBlock, atomicOps, accessScope, accessSegment); } else { @@ -942,8 +942,8 @@ pkt->getPtr(), pkt->getSize(), pc, secondary_type, RubyAccessMode_Supervisor, pkt, - PrefetchBit_No, proc_id, 100, - blockSize, accessMask, + m_block_size_bits, PrefetchBit_No, proc_id, + 100, m_block_size_bytes, accessMask, dataBlock, accessScope, accessSegment); } diff --git a/src/mem/ruby/system/RubyPort.hh b/src/mem/ruby/system/RubyPort.hh --- a/src/mem/ruby/system/RubyPort.hh +++ b/src/mem/ruby/system/RubyPort.hh @@ -194,6 +194,9 @@ std::vector slave_ports; + const uint32_t m_block_size_bytes; + const uint32_t m_block_size_bits; + private: bool onRetryList(MemSlavePort * port) { diff --git a/src/mem/ruby/system/RubyPort.cc b/src/mem/ruby/system/RubyPort.cc --- a/src/mem/ruby/system/RubyPort.cc +++ b/src/mem/ruby/system/RubyPort.cc @@ -54,6 +54,8 @@ : MemObject(p), m_ruby_system(p->ruby_system), m_version(p->version), m_controller(NULL), m_mandatory_q_ptr(NULL), m_usingRubyTester(p->using_ruby_tester), system(p->system), + m_block_size_bytes(p->block_size_bytes), + m_block_size_bits(floorLog2(m_block_size_bytes)), pioMasterPort(csprintf("%s.pio-master-port", name()), this), pioSlavePort(csprintf("%s.pio-slave-port", name()), this), memMasterPort(csprintf("%s.mem-master-port", name()), this), diff --git a/src/mem/ruby/system/RubySystem.hh b/src/mem/ruby/system/RubySystem.hh --- a/src/mem/ruby/system/RubySystem.hh +++ b/src/mem/ruby/system/RubySystem.hh @@ -64,6 +64,7 @@ }; friend class RubyEvent; + friend class AddressProfiler; typedef RubySystemParams Params; RubySystem(const Params *p);