diff --git a/src/mem/protocol/RubySlicc_Util.sm b/src/mem/protocol/RubySlicc_Util.sm --- a/src/mem/protocol/RubySlicc_Util.sm +++ b/src/mem/protocol/RubySlicc_Util.sm @@ -1,6 +1,5 @@ - /* - * Copyright (c) 1999-2005 Mark D. Hill and David A. Wood + * Copyright (c) 1999-2015 Mark D. Hill and David A. Wood * All rights reserved. * * Redistribution and use in source and binary forms, with or without diff --git a/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh b/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh --- a/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh +++ b/src/mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh @@ -32,59 +32,6 @@ #include "mem/protocol/MachineType.hh" #include "mem/ruby/common/Address.hh" #include "mem/ruby/common/MachineID.hh" -#include "mem/ruby/common/NetDest.hh" -#include "mem/ruby/structures/DirectoryMemory.hh" - -// used to determine the home directory -// returns a value between 0 and total_directories_within_the_system -inline NodeID -map_Address_to_DirectoryNode(Addr addr) -{ - return DirectoryMemory::mapAddressToDirectoryVersion(addr); -} - -inline NodeID -map_Address_to_TCCdirNode(Addr addr) -{ - return DirectoryMemory::mapAddressToDirectoryVersion(addr); -} - -// used to determine the home directory -// returns a value between 0 and total_directories_within_the_system -inline MachineID -map_Address_to_Directory(Addr addr) -{ - MachineID mach = - {MachineType_Directory, map_Address_to_DirectoryNode(addr)}; - return mach; -} - -inline MachineID -map_Address_to_RegionDir(Addr addr) -{ - MachineID mach = {MachineType_RegionDir, - map_Address_to_DirectoryNode(addr)}; - return mach; -} - -inline MachineID -map_Address_to_TCCdir(Addr addr) -{ - MachineID mach = - {MachineType_TCCdir, map_Address_to_TCCdirNode(addr)}; - return mach; -} - -inline NetDest -broadcast(MachineType type) -{ - NetDest dest; - for (NodeID i = 0; i < MachineType_base_count(type); i++) { - MachineID mach = {type, i}; - dest.add(mach); - } - return dest; -} inline MachineID mapAddressToRange(Addr addr, MachineType type, int low_bit, @@ -111,12 +58,6 @@ return machID.type; } -inline int -machineCount(MachineType machType) -{ - return MachineType_base_count(machType); -} - inline MachineID createMachineID(MachineType type, NodeID id) { diff --git a/src/mem/ruby/structures/DirectoryMemory.hh b/src/mem/ruby/structures/DirectoryMemory.hh --- a/src/mem/ruby/structures/DirectoryMemory.hh +++ b/src/mem/ruby/structures/DirectoryMemory.hh @@ -46,13 +46,9 @@ ~DirectoryMemory(); void init(); + uint64_t getSize() const { return m_size_bytes; } - uint64_t mapAddressToLocalIdx(Addr address); - static uint64_t mapAddressToDirectoryVersion(Addr address); - - uint64_t getSize() { return m_size_bytes; } - - bool isPresent(Addr address); + bool isPresent(Addr address) const; AbstractEntry *lookup(Addr address); AbstractEntry *allocate(Addr address, AbstractEntry* new_entry); @@ -64,6 +60,9 @@ DirectoryMemory(const DirectoryMemory& obj); DirectoryMemory& operator=(const DirectoryMemory& obj); + uint64_t mapAddressToLocalIdx(Addr address) const; + uint64_t mapAddressToDirectoryVersion(Addr address) const; + private: const int m_version; AbstractEntry **m_entries; @@ -73,12 +72,12 @@ const uint32_t m_block_size_bits; const uint64_t m_size_bytes; - uint64_t m_size_bits; + const uint64_t m_size_bits; uint64_t m_num_entries; - static int m_num_directories; - static int m_num_directories_bits; - static int m_numa_high_bit; + const int m_num_directories; + const int m_num_directories_bits; + const int m_numa_high_bit; }; inline std::ostream& diff --git a/src/mem/ruby/structures/DirectoryMemory.cc b/src/mem/ruby/structures/DirectoryMemory.cc --- a/src/mem/ruby/structures/DirectoryMemory.cc +++ b/src/mem/ruby/structures/DirectoryMemory.cc @@ -29,25 +29,23 @@ #include "base/intmath.hh" #include "debug/RubyCache.hh" #include "debug/RubyStats.hh" -#include "mem/ruby/slicc_interface/RubySlicc_Util.hh" #include "mem/ruby/structures/DirectoryMemory.hh" #include "mem/ruby/system/RubySystem.hh" using namespace std; -int DirectoryMemory::m_num_directories = 0; -int DirectoryMemory::m_num_directories_bits = 0; -int DirectoryMemory::m_numa_high_bit = 0; - DirectoryMemory::DirectoryMemory(const Params *p) : SimObject(p), m_version(p->version), m_block_size_bytes(p->block_size_bytes), m_block_size_bits(floorLog2(m_block_size_bytes)), - m_size_bytes(p->size) + m_size_bytes(p->size), + m_size_bits(floorLog2(m_size_bytes)), + m_num_directories(p->num_directories), + m_num_directories_bits(floorLog2(m_num_directories)), + m_numa_high_bit(p->numa_high_bit) { - m_size_bits = floorLog2(m_size_bytes); m_num_entries = 0; - m_numa_high_bit = p->numa_high_bit; + assert(m_numa_high_bit != 0); } void @@ -57,14 +55,6 @@ m_entries = new AbstractEntry*[m_num_entries]; for (int i = 0; i < m_num_entries; i++) m_entries[i] = NULL; - - m_num_directories++; - m_num_directories_bits = ceilLog2(m_num_directories); - - if (m_numa_high_bit == 0) { - m_numa_high_bit = RubySystem::getMemorySizeBits() - 1; - } - assert(m_numa_high_bit != 0); } DirectoryMemory::~DirectoryMemory() @@ -79,7 +69,7 @@ } uint64_t -DirectoryMemory::mapAddressToDirectoryVersion(Addr address) +DirectoryMemory::mapAddressToDirectoryVersion(Addr address) const { if (m_num_directories_bits == 0) return 0; @@ -91,14 +81,14 @@ } bool -DirectoryMemory::isPresent(Addr address) +DirectoryMemory::isPresent(Addr address) const { bool ret = (mapAddressToDirectoryVersion(address) == m_version); return ret; } uint64_t -DirectoryMemory::mapAddressToLocalIdx(Addr address) +DirectoryMemory::mapAddressToLocalIdx(Addr address) const { uint64_t ret; if (m_num_directories_bits > 0) { diff --git a/src/mem/ruby/structures/DirectoryMemory.py b/src/mem/ruby/structures/DirectoryMemory.py --- a/src/mem/ruby/structures/DirectoryMemory.py +++ b/src/mem/ruby/structures/DirectoryMemory.py @@ -35,10 +35,11 @@ type = 'RubyDirectoryMemory' cxx_class = 'DirectoryMemory' cxx_header = "mem/ruby/structures/DirectoryMemory.hh" - version = Param.Int(0, "") + version = Param.Unsigned(0, "") block_size_bytes = Param.UInt32(64, "block size used for ownership info.") size = Param.MemorySize("1GB", "capacity in bytes") # the default value of the numa high bit is specified in the command line # option and must be passed into the directory memory sim object - numa_high_bit = Param.Int("numa high bit") + numa_high_bit = Param.Unsigned("numa high bit") + num_directories = Param.Unsigned("number of directories") diff --git a/src/mem/ruby/structures/Prefetcher.cc b/src/mem/ruby/structures/Prefetcher.cc --- a/src/mem/ruby/structures/Prefetcher.cc +++ b/src/mem/ruby/structures/Prefetcher.cc @@ -26,9 +26,10 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ +#include "mem/ruby/structures/Prefetcher.hh" + #include "debug/RubyPrefetcher.hh" #include "mem/ruby/slicc_interface/RubySlicc_ComponentMapping.hh" -#include "mem/ruby/structures/Prefetcher.hh" Prefetcher* PrefetcherParams::create() diff --git a/src/mem/protocol/MOESI_AMD_Base-dir.sm b/src/mem/protocol/MOESI_AMD_Base-dir.sm --- a/src/mem/protocol/MOESI_AMD_Base-dir.sm +++ b/src/mem/protocol/MOESI_AMD_Base-dir.sm @@ -40,9 +40,12 @@ Cycles l3_hit_latency := 50; bool noTCCdir := "False"; bool CPUonly := "False"; - int TCC_select_num_bits; bool useL3OnWT := "False"; Cycles to_memory_controller_latency := 1; + int tcc_low_bit; + int tcc_num_bits; + int tccdir_low_bit; + int tccdir_num_bits; // From the Cores MessageBuffer * requestFromCores, network="From", virtual_network="0", vnet_type="request"; @@ -159,8 +162,6 @@ TBETable * TBEs, template="", constructor="m_number_of_TBEs, m_block_size_bytes"; - int TCC_select_low_bit, default="getBlockSizeBits()"; - Tick clockEdge(); Tick cyclesToTicks(Cycles c); @@ -562,12 +563,15 @@ in_msg.NoWriteConflict) || CPUonly) { } else if (noTCCdir) { - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCC, + tcc_low_bit, + tcc_num_bits)); } else { out_msg.Destination.add(mapAddressToRange(address, MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); + tccdir_low_bit, + tccdir_num_bits)); } out_msg.Destination.remove(in_msg.Requestor); tbe.NumPendingAcks := out_msg.Destination.count(); @@ -599,12 +603,15 @@ } else { out_msg.Destination.add(mapAddressToRange(address, MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); + tccdir_low_bit, + tccdir_num_bits)); tbe.NumPendingAcks := tbe.NumPendingAcks + 1; } if (noTCCdir && !CPUonly) { - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCC, + tcc_low_bit, + tcc_num_bits)); } out_msg.Destination.remove(in_msg.Requestor); tbe.NumPendingAcks := out_msg.Destination.count(); @@ -633,14 +640,16 @@ // add relevant TCC node to the list. This replaces all TCPs and SQCs if (noTCCdir && !CPUonly) { - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCC, + tcc_low_bit, + tcc_num_bits)); } else { if (!noTCCdir) { out_msg.Destination.add(mapAddressToRange(address, MachineType:TCCdir, - TCC_select_low_bit, - TCC_select_num_bits)); + tccdir_low_bit, + tccdir_num_bits)); } } out_msg.Destination.remove(in_msg.Requestor); diff --git a/src/mem/protocol/MOESI_AMD_Base-probeFilter.sm b/src/mem/protocol/MOESI_AMD_Base-probeFilter.sm --- a/src/mem/protocol/MOESI_AMD_Base-probeFilter.sm +++ b/src/mem/protocol/MOESI_AMD_Base-probeFilter.sm @@ -51,10 +51,13 @@ Cycles l3_hit_latency := 50; bool noTCCdir := "False"; bool CAB_TCC := "False"; - int TCC_select_num_bits:=1; bool useL3OnWT := "False"; bool inclusiveDir := "True"; Cycles to_memory_controller_latency := 1; + int tcc_low_bit; + int tcc_num_bits:=1; + int tccdir_low_bit; + int tccdir_num_bits; // From the Cores MessageBuffer * requestFromCores, network="From", virtual_network="0", ordered="false", vnet_type="request"; @@ -191,7 +194,6 @@ TBETable * TBEs, template="", constructor="m_number_of_TBEs, m_block_size_bytes"; - int TCC_select_low_bit, default="getBlockSizeBits()"; Tick clockEdge(); Tick cyclesToTicks(Cycles c); @@ -651,10 +653,15 @@ in_msg.NoWriteConflict) { // Don't Include TCCs unless there was write-CAB conflict in the TCC } else if(noTCCdir) { - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCC, + tcc_low_bit, + tcc_num_bits)); } else { - out_msg.Destination.add(map_Address_to_TCCdir(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCCdir, + tccdir_low_bit, + tccdir_num_bits)); } } out_msg.Destination.remove(in_msg.Requestor); @@ -688,12 +695,17 @@ if (noTCCdir) { //Don't need to notify TCC about reads } else { - out_msg.Destination.add(map_Address_to_TCCdir(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCCdir, + tccdir_low_bit, + tccdir_num_bits)); tbe.NumPendingAcks := tbe.NumPendingAcks + 1; } if (noTCCdir && CAB_TCC) { - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCC, + tcc_low_bit, + tcc_num_bits)); } } tbe.NumPendingAcks := out_msg.Destination.count(); @@ -726,12 +738,17 @@ if (noTCCdir) { //Don't need to notify TCC about reads } else { - out_msg.Destination.add(map_Address_to_TCCdir(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCCdir, + tccdir_low_bit, + tccdir_num_bits)); tbe.NumPendingAcks := tbe.NumPendingAcks + 1; } if (noTCCdir && CAB_TCC) { - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCC, + tcc_low_bit, + tcc_num_bits)); } } out_msg.Destination.remove(in_msg.Requestor); @@ -764,10 +781,15 @@ // add relevant TCC node to the list. This replaces all TCPs and SQCs if(isGPUSharer(address)) { if (noTCCdir) { - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCC, + tcc_low_bit, + tcc_num_bits)); } else { - out_msg.Destination.add(map_Address_to_TCCdir(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCCdir, + tccdir_low_bit, + tccdir_num_bits)); } } out_msg.Destination.remove(in_msg.Requestor); diff --git a/src/mem/protocol/MOESI_CMP_directory-L2cache.sm b/src/mem/protocol/MOESI_CMP_directory-L2cache.sm --- a/src/mem/protocol/MOESI_CMP_directory-L2cache.sm +++ b/src/mem/protocol/MOESI_CMP_directory-L2cache.sm @@ -27,9 +27,11 @@ */ machine(MachineType:L2Cache, "Token protocol") -: CacheMemory * L2cache; - Cycles response_latency := 2; - Cycles request_latency := 2; + : CacheMemory * L2cache; + Cycles response_latency := 2; + Cycles request_latency := 2; + int dir_select_num_bits; + int dir_select_low_bit; // L2 BANK QUEUES // From local bank of L2 cache TO the network @@ -731,7 +733,8 @@ out_msg.Type := CoherenceRequestType:GETS; out_msg.RequestorMachine := MachineType:L2Cache; out_msg.Requestor := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, dir_select_low_bit, dir_select_num_bits)); out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.DataBlk.alloc(block_size_bytes); } @@ -745,7 +748,8 @@ out_msg.Type := CoherenceRequestType:GETX; out_msg.RequestorMachine := MachineType:L2Cache; out_msg.Requestor := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, dir_select_low_bit, dir_select_num_bits)); out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.DataBlk.alloc(block_size_bytes); } @@ -758,7 +762,8 @@ out_msg.Type := CoherenceRequestType:PUTX; out_msg.RequestorMachine := MachineType:L2Cache; out_msg.Requestor := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, MachineType:Directory, + dir_select_low_bit, dir_select_num_bits)); out_msg.MessageSize := MessageSizeType:Writeback_Control; out_msg.DataBlk.alloc(block_size_bytes); } @@ -770,7 +775,8 @@ out_msg.Type := CoherenceRequestType:PUTO; out_msg.Requestor := machineID; out_msg.RequestorMachine := MachineType:L2Cache; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, MachineType:Directory, + dir_select_low_bit, dir_select_num_bits)); out_msg.MessageSize := MessageSizeType:Writeback_Control; out_msg.DataBlk.alloc(block_size_bytes); } @@ -783,7 +789,8 @@ out_msg.Type := CoherenceRequestType:PUTO_SHARERS; out_msg.Requestor := machineID; out_msg.RequestorMachine := MachineType:L2Cache; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, MachineType:Directory, + dir_select_low_bit, dir_select_num_bits)); out_msg.MessageSize := MessageSizeType:Writeback_Control; out_msg.DataBlk.alloc(block_size_bytes); } @@ -1160,7 +1167,8 @@ enqueue(responseNetwork_out, ResponseMsg, response_latency) { out_msg.addr := address; out_msg.Type := CoherenceResponseType:UNBLOCK; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, MachineType:Directory, + dir_select_low_bit, dir_select_num_bits)); out_msg.Sender := machineID; out_msg.SenderMachine := MachineType:L2Cache; out_msg.MessageSize := MessageSizeType:Unblock_Control; @@ -1173,7 +1181,8 @@ enqueue(responseNetwork_out, ResponseMsg, response_latency) { out_msg.addr := address; out_msg.Type := CoherenceResponseType:UNBLOCK_EXCLUSIVE; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, MachineType:Directory, + dir_select_low_bit, dir_select_num_bits)); out_msg.Sender := machineID; out_msg.SenderMachine := MachineType:L2Cache; out_msg.MessageSize := MessageSizeType:Unblock_Control; @@ -1456,7 +1465,8 @@ out_msg.addr := address; out_msg.Sender := machineID; out_msg.SenderMachine := MachineType:L2Cache; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, MachineType:Directory, + dir_select_low_bit, dir_select_num_bits)); out_msg.Dirty := tbe.Dirty; if (tbe.Dirty) { out_msg.Type := CoherenceResponseType:WRITEBACK_DIRTY_DATA; @@ -1585,7 +1595,8 @@ enqueue(responseNetwork_out, ResponseMsg, response_latency) { out_msg.addr := address; out_msg.Type := CoherenceResponseType:DMA_ACK; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, MachineType:Directory, + dir_select_low_bit, dir_select_num_bits)); out_msg.Sender := machineID; out_msg.SenderMachine := MachineType:L2Cache; out_msg.MessageSize := MessageSizeType:Unblock_Control; diff --git a/src/mem/protocol/MOESI_CMP_directory-dma.sm b/src/mem/protocol/MOESI_CMP_directory-dma.sm --- a/src/mem/protocol/MOESI_CMP_directory-dma.sm +++ b/src/mem/protocol/MOESI_CMP_directory-dma.sm @@ -31,6 +31,8 @@ : DMASequencer * dma_sequencer; Cycles request_latency := 14; Cycles response_latency := 14; + int dir_select_num_bits; + int dir_select_low_bit; MessageBuffer * responseFromDir, network="From", virtual_network="2", vnet_type="response"; @@ -159,7 +161,8 @@ out_msg.Type := CoherenceRequestType:DMA_READ; out_msg.DataBlk := in_msg.DataBlk; out_msg.Len := in_msg.Len; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, dir_select_low_bit, dir_select_num_bits)); out_msg.Requestor := machineID; out_msg.RequestorMachine := MachineType:DMA; out_msg.MessageSize := MessageSizeType:Writeback_Control; @@ -170,16 +173,17 @@ action(s_sendWriteRequest, "\s", desc="Send a DMA write request to memory") { peek(dmaRequestQueue_in, SequencerMsg) { enqueue(reqToDirectory_out, RequestMsg, request_latency) { - out_msg.addr := in_msg.PhysicalAddress; - out_msg.Type := CoherenceRequestType:DMA_WRITE; - out_msg.DataBlk := in_msg.DataBlk; - out_msg.Len := in_msg.Len; - out_msg.Destination.add(map_Address_to_Directory(address)); - out_msg.Requestor := machineID; - out_msg.RequestorMachine := MachineType:DMA; - out_msg.MessageSize := MessageSizeType:Writeback_Control; - } + out_msg.addr := in_msg.PhysicalAddress; + out_msg.Type := CoherenceRequestType:DMA_WRITE; + out_msg.DataBlk := in_msg.DataBlk; + out_msg.Len := in_msg.Len; + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, dir_select_low_bit, dir_select_num_bits)); + out_msg.Requestor := machineID; + out_msg.RequestorMachine := MachineType:DMA; + out_msg.MessageSize := MessageSizeType:Writeback_Control; } + } } action(a_ackCallback, "a", desc="Notify dma controller that write request completed") { @@ -207,7 +211,8 @@ enqueue(respToDirectory_out, ResponseMsg, response_latency) { out_msg.addr := address; out_msg.Type := CoherenceResponseType:UNBLOCK_EXCLUSIVE; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, MachineType:Directory, + dir_select_low_bit, dir_select_num_bits)); out_msg.Sender := machineID; out_msg.SenderMachine := MachineType:DMA; out_msg.MessageSize := MessageSizeType:Writeback_Control; diff --git a/src/mem/protocol/MOESI_CMP_token-L1cache.sm b/src/mem/protocol/MOESI_CMP_token-L1cache.sm --- a/src/mem/protocol/MOESI_CMP_token-L1cache.sm +++ b/src/mem/protocol/MOESI_CMP_token-L1cache.sm @@ -35,7 +35,10 @@ : Sequencer * sequencer; CacheMemory * L1Icache; CacheMemory * L1Dcache; + int l2_select_low_bit; int l2_select_num_bits; + int dir_select_num_bits; + int dir_select_low_bit; int N_tokens; Cycles l1_request_latency := 2; @@ -197,7 +200,6 @@ TBETable * L1_TBEs, template="", constructor="m_number_of_TBEs, m_block_size_bytes"; bool starving, default="false"; - int l2_select_low_bit, default="getBlockSizeBits()"; PersistentTable * persistentTable, constructor="m_block_size_bits"; TimerTable * useTimerTable; @@ -766,7 +768,8 @@ MachineType:L2Cache, l2_select_low_bit, l2_select_num_bits, intToID(0))); - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, dir_select_low_bit, dir_select_num_bits)); out_msg.MessageSize := MessageSizeType:Persistent_Control; out_msg.Prefetch := tbe.Prefetch; out_msg.AccessMode := tbe.AccessMode; @@ -888,7 +891,8 @@ MachineType:L2Cache, l2_select_low_bit, l2_select_num_bits, intToID(0))); - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, dir_select_low_bit, dir_select_num_bits)); out_msg.MessageSize := MessageSizeType:Persistent_Control; out_msg.Prefetch := tbe.Prefetch; out_msg.AccessMode := tbe.AccessMode; @@ -992,7 +996,8 @@ out_msg.addr := address; out_msg.Type := in_msg.Type; out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, dir_select_low_bit, dir_select_num_bits)); out_msg.Tokens := in_msg.Tokens; out_msg.MessageSize := in_msg.MessageSize; out_msg.DataBlk := in_msg.DataBlk; @@ -1459,7 +1464,8 @@ MachineType:L2Cache, l2_select_low_bit, l2_select_num_bits, intToID(0))); - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, dir_select_low_bit, dir_select_num_bits)); out_msg.MessageSize := MessageSizeType:Persistent_Control; } starving := false; diff --git a/src/mem/protocol/MOESI_CMP_token-L2cache.sm b/src/mem/protocol/MOESI_CMP_token-L2cache.sm --- a/src/mem/protocol/MOESI_CMP_token-L2cache.sm +++ b/src/mem/protocol/MOESI_CMP_token-L2cache.sm @@ -32,6 +32,8 @@ Cycles l2_request_latency := 5; Cycles l2_response_latency := 5; bool filtering_enabled := "True"; + int dir_select_num_bits; + int dir_select_low_bit; // L2 BANK QUEUES // From local bank of L2 cache TO the network @@ -523,14 +525,13 @@ //out_msg.Destination.addNetDest(getAllPertinentL2Banks(address)); //out_msg.Destination.remove(map_L1CacheMachId_to_L2Cache(address, in_msg.Requestor)); - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, dir_select_low_bit, + dir_select_num_bits)); out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.AccessMode := in_msg.AccessMode; out_msg.Prefetch := in_msg.Prefetch; } //enqueue - // } // if - - //profile_filter_action(0); } // peek } //action @@ -542,7 +543,10 @@ out_msg.addr := address; out_msg.Type := in_msg.Type; out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, dir_select_low_bit, + dir_select_num_bits)); + out_msg.Tokens := in_msg.Tokens; out_msg.MessageSize := in_msg.MessageSize; out_msg.DataBlk := in_msg.DataBlk; @@ -558,7 +562,10 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:ACK; out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, dir_select_low_bit, + dir_select_num_bits)); + out_msg.Tokens := cache_entry.Tokens; out_msg.MessageSize := MessageSizeType:Writeback_Control; out_msg.DataBlk.alloc(block_size_bytes); @@ -572,7 +579,8 @@ enqueue(responseNetwork_out, ResponseMsg, l2_response_latency) { out_msg.addr := address; out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, MachineType:Directory, + dir_select_low_bit, dir_select_num_bits)); out_msg.Tokens := cache_entry.Tokens; out_msg.DataBlk := cache_entry.DataBlk; out_msg.Dirty := cache_entry.Dirty; @@ -792,7 +800,6 @@ action(j_forwardTransientRequestToLocalSharers, "j", desc="Forward external transient request to local sharers") { peek(requestNetwork_in, RequestMsg) { if (filtering_enabled && in_msg.RetryNum == 0 && sharersExist(in_msg.addr) == false) { - //profile_filter_action(1); DPRINTF(RubySlicc, "filtered message, Retry Num: %d\n", in_msg.RetryNum); } @@ -814,7 +821,6 @@ out_msg.AccessMode := in_msg.AccessMode; out_msg.Prefetch := in_msg.Prefetch; } - //profile_filter_action(0); } } } diff --git a/src/mem/protocol/MOESI_CMP_token-dir.sm b/src/mem/protocol/MOESI_CMP_token-dir.sm --- a/src/mem/protocol/MOESI_CMP_token-dir.sm +++ b/src/mem/protocol/MOESI_CMP_token-dir.sm @@ -28,12 +28,15 @@ machine(MachineType:Directory, "Token protocol") : DirectoryMemory * directory; + int l2_select_low_bit; int l2_select_num_bits; Cycles directory_latency := 5; bool distributed_persistent := "True"; Cycles fixed_timeout_latency := 100; Cycles reissue_wakeup_latency := 10; Cycles to_memory_controller_latency := 1; + int dir_select_num_bits; + int dir_select_low_bit; // Message Queues from dir to other controllers / network MessageBuffer * dmaResponseFromDir, network="To", virtual_network="5", @@ -170,7 +173,6 @@ TBETable * TBEs, template="", constructor="m_number_of_TBEs, m_block_size_bytes"; bool starving, default="false"; - int l2_select_low_bit, default="getBlockSizeBits()"; Tick clockEdge(); Tick cyclesToTicks(Cycles c); @@ -476,7 +478,10 @@ MachineType:L2Cache, l2_select_low_bit, l2_select_num_bits, intToID(0))); - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, dir_select_low_bit, + dir_select_num_bits)); + out_msg.MessageSize := MessageSizeType:Persistent_Control; out_msg.Prefetch := PrefetchBit:No; out_msg.AccessMode := RubyAccessMode:Supervisor; @@ -544,7 +549,10 @@ MachineType:L2Cache, l2_select_low_bit, l2_select_num_bits, intToID(0))); - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, dir_select_low_bit, + dir_select_num_bits)); + out_msg.MessageSize := MessageSizeType:Persistent_Control; out_msg.Prefetch := PrefetchBit:No; out_msg.AccessMode := RubyAccessMode:Supervisor; @@ -726,7 +734,9 @@ MachineType:L2Cache, l2_select_low_bit, l2_select_num_bits, intToID(0))); - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, dir_select_low_bit, + dir_select_num_bits)); out_msg.MessageSize := MessageSizeType:Persistent_Control; } starving := false; diff --git a/src/mem/protocol/MOESI_CMP_token-dma.sm b/src/mem/protocol/MOESI_CMP_token-dma.sm --- a/src/mem/protocol/MOESI_CMP_token-dma.sm +++ b/src/mem/protocol/MOESI_CMP_token-dma.sm @@ -28,16 +28,19 @@ machine(MachineType:DMA, "DMA Controller") - : DMASequencer * dma_sequencer; - Cycles request_latency := 6; + : DMASequencer * dma_sequencer; + Cycles request_latency := 6; + int dir_select_num_bits; + int dir_select_low_bit; - // Messsage Queues - MessageBuffer * responseFromDir, network="From", virtual_network="5", - vnet_type="response"; - MessageBuffer * reqToDirectory, network="To", virtual_network="0", - vnet_type="request"; - MessageBuffer * mandatoryQueue; + // Messsage Queues + MessageBuffer * responseFromDir, network="From", virtual_network="5", + vnet_type="response"; + MessageBuffer * reqToDirectory, network="To", virtual_network="0", + vnet_type="request"; + + MessageBuffer * mandatoryQueue; { state_declaration(State, desc="DMA states", default="DMA_State_READY") { READY, AccessPermission:Invalid, desc="Ready to accept a new request"; @@ -118,7 +121,9 @@ out_msg.Requestor := machineID; out_msg.DataBlk := in_msg.DataBlk; out_msg.Len := in_msg.Len; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, dir_select_low_bit, + dir_select_num_bits)); out_msg.MessageSize := MessageSizeType:Writeback_Control; } } @@ -133,7 +138,9 @@ out_msg.Requestor := machineID; out_msg.DataBlk := in_msg.DataBlk; out_msg.Len := in_msg.Len; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, dir_select_low_bit, + dir_select_num_bits)); out_msg.MessageSize := MessageSizeType:Writeback_Control; } } diff --git a/src/mem/protocol/MOESI_hammer-cache.sm b/src/mem/protocol/MOESI_hammer-cache.sm --- a/src/mem/protocol/MOESI_hammer-cache.sm +++ b/src/mem/protocol/MOESI_hammer-cache.sm @@ -43,6 +43,9 @@ Cycles l2_cache_hit_latency := 10; bool no_mig_atomic := "True"; bool send_evictions; + int num_l1_machines; + int dir_select_num_bits; + int dir_select_low_bit; // NETWORK BUFFERS MessageBuffer * requestFromCache, network="To", virtual_network="2", @@ -389,7 +392,7 @@ } else if (in_msg.Type == CoherenceRequestType:MERGED_GETS) { trigger(Event:Merged_GETS, in_msg.addr, cache_entry, tbe); } else if (in_msg.Type == CoherenceRequestType:GETS) { - if (machineCount(MachineType:L1Cache) > 1) { + if (num_l1_machines > 1) { if (is_valid(cache_entry)) { if (IsAtomicAccessed(cache_entry) && no_mig_atomic) { trigger(Event:Other_GETS_No_Mig, in_msg.addr, cache_entry, tbe); @@ -554,12 +557,13 @@ out_msg.addr := address; out_msg.Type := CoherenceRequestType:GETS; out_msg.Requestor := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, MachineType:Directory, + dir_select_low_bit, dir_select_num_bits)); out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.InitialRequestTime := curCycle(); // One from each other cache (n-1) plus the memory (+1) - tbe.NumPendingMsgs := machineCount(MachineType:L1Cache); + tbe.NumPendingMsgs := num_l1_machines; } } @@ -569,30 +573,33 @@ out_msg.addr := address; out_msg.Type := CoherenceRequestType:GETX; out_msg.Requestor := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, MachineType:Directory, + dir_select_low_bit, dir_select_num_bits)); out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.InitialRequestTime := curCycle(); // One from each other cache (n-1) plus the memory (+1) - tbe.NumPendingMsgs := machineCount(MachineType:L1Cache); + tbe.NumPendingMsgs := num_l1_machines; } } action(b_issueGETXIfMoreThanOne, "bo", desc="Issue GETX") { - if (machineCount(MachineType:L1Cache) > 1) { + if (num_l1_machines > 1) { enqueue(requestNetwork_out, RequestMsg, issue_latency) { assert(is_valid(tbe)); out_msg.addr := address; out_msg.Type := CoherenceRequestType:GETX; out_msg.Requestor := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, dir_select_low_bit, + dir_select_num_bits)); out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.InitialRequestTime := curCycle(); } } // One from each other cache (n-1) plus the memory (+1) - tbe.NumPendingMsgs := machineCount(MachineType:L1Cache); + tbe.NumPendingMsgs := num_l1_machines; } action(bf_issueGETF, "bf", desc="Issue GETF") { @@ -601,12 +608,13 @@ out_msg.addr := address; out_msg.Type := CoherenceRequestType:GETF; out_msg.Requestor := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, MachineType:Directory, + dir_select_low_bit, dir_select_num_bits)); out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.InitialRequestTime := curCycle(); // One from each other cache (n-1) plus the memory (+1) - tbe.NumPendingMsgs := machineCount(MachineType:L1Cache); + tbe.NumPendingMsgs := num_l1_machines; } } @@ -621,7 +629,7 @@ out_msg.DataBlk := cache_entry.DataBlk; out_msg.Dirty := cache_entry.Dirty; if (in_msg.DirectedProbe) { - out_msg.Acks := machineCount(MachineType:L1Cache); + out_msg.Acks := num_l1_machines; } else { out_msg.Acks := 2; } @@ -644,7 +652,7 @@ out_msg.DataBlk := tbe.DataBlk; out_msg.Dirty := tbe.Dirty; if (in_msg.DirectedProbe) { - out_msg.Acks := machineCount(MachineType:L1Cache); + out_msg.Acks := num_l1_machines; } else { out_msg.Acks := 2; } @@ -661,7 +669,8 @@ out_msg.addr := address; out_msg.Type := CoherenceRequestType:PUT; out_msg.Requestor := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, MachineType:Directory, + dir_select_low_bit, dir_select_num_bits)); out_msg.MessageSize := MessageSizeType:Writeback_Control; } } @@ -671,7 +680,8 @@ out_msg.addr := address; out_msg.Type := CoherenceRequestType:PUTF; out_msg.Requestor := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, MachineType:Directory, + dir_select_low_bit, dir_select_num_bits)); out_msg.MessageSize := MessageSizeType:Writeback_Control; } } @@ -687,7 +697,7 @@ out_msg.DataBlk := cache_entry.DataBlk; out_msg.Dirty := cache_entry.Dirty; if (in_msg.DirectedProbe) { - out_msg.Acks := machineCount(MachineType:L1Cache); + out_msg.Acks := num_l1_machines; } else { out_msg.Acks := 2; } @@ -711,7 +721,7 @@ out_msg.Dirty := cache_entry.Dirty; DPRINTF(RubySlicc, "%s\n", out_msg.DataBlk); if (in_msg.DirectedProbe) { - out_msg.Acks := machineCount(MachineType:L1Cache); + out_msg.Acks := num_l1_machines; } else { out_msg.Acks := 2; } @@ -735,7 +745,7 @@ out_msg.Dirty := tbe.Dirty; DPRINTF(RubySlicc, "%s\n", out_msg.DataBlk); if (in_msg.DirectedProbe) { - out_msg.Acks := machineCount(MachineType:L1Cache); + out_msg.Acks := num_l1_machines; } else { out_msg.Acks := 2; } @@ -758,7 +768,7 @@ out_msg.DataBlk := cache_entry.DataBlk; out_msg.Dirty := cache_entry.Dirty; DPRINTF(RubySlicc, "%s\n", out_msg.DataBlk); - out_msg.Acks := machineCount(MachineType:L1Cache); + out_msg.Acks := num_l1_machines; out_msg.SilentAcks := in_msg.SilentAcks; out_msg.MessageSize := MessageSizeType:Response_Data; out_msg.InitialRequestTime := in_msg.InitialRequestTime; @@ -778,7 +788,7 @@ out_msg.DataBlk := tbe.DataBlk; out_msg.Dirty := tbe.Dirty; DPRINTF(RubySlicc, "%s\n", out_msg.DataBlk); - out_msg.Acks := machineCount(MachineType:L1Cache); + out_msg.Acks := num_l1_machines; out_msg.SilentAcks := in_msg.SilentAcks; out_msg.MessageSize := MessageSizeType:Response_Data; out_msg.InitialRequestTime := in_msg.InitialRequestTime; @@ -828,7 +838,8 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:UNBLOCK; out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, MachineType:Directory, + dir_select_low_bit, dir_select_num_bits)); out_msg.MessageSize := MessageSizeType:Unblock_Control; out_msg.DataBlk.alloc(block_size_bytes); } @@ -839,7 +850,8 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:UNBLOCKM; out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, MachineType:Directory, + dir_select_low_bit, dir_select_num_bits)); out_msg.MessageSize := MessageSizeType:Unblock_Control; out_msg.DataBlk.alloc(block_size_bytes); } @@ -852,7 +864,8 @@ out_msg.Type := CoherenceResponseType:UNBLOCKS; out_msg.Sender := machineID; out_msg.CurOwner := tbe.CurOwner; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, MachineType:Directory, + dir_select_low_bit, dir_select_num_bits)); out_msg.MessageSize := MessageSizeType:Unblock_Control; out_msg.DataBlk.alloc(block_size_bytes); } @@ -1073,7 +1086,7 @@ out_msg.DataBlk := tbe.DataBlk; out_msg.Dirty := tbe.Dirty; if (in_msg.DirectedProbe) { - out_msg.Acks := machineCount(MachineType:L1Cache); + out_msg.Acks := num_l1_machines; } else { out_msg.Acks := 2; } @@ -1098,7 +1111,7 @@ out_msg.DataBlk := tbe.DataBlk; out_msg.Dirty := tbe.Dirty; if (in_msg.DirectedProbe) { - out_msg.Acks := machineCount(MachineType:L1Cache); + out_msg.Acks := num_l1_machines; } else { out_msg.Acks := 2; } @@ -1121,7 +1134,7 @@ DPRINTF(RubySlicc, "%s\n", out_msg.Destination); out_msg.DataBlk := tbe.DataBlk; out_msg.Dirty := tbe.Dirty; - out_msg.Acks := machineCount(MachineType:L1Cache); + out_msg.Acks := num_l1_machines; out_msg.SilentAcks := in_msg.SilentAcks; out_msg.MessageSize := MessageSizeType:Response_Data; out_msg.InitialRequestTime := in_msg.InitialRequestTime; @@ -1135,7 +1148,8 @@ assert(is_valid(tbe)); out_msg.addr := address; out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, MachineType:Directory, + dir_select_low_bit, dir_select_num_bits)); out_msg.Dirty := tbe.Dirty; if (tbe.Dirty) { out_msg.Type := CoherenceResponseType:WB_DIRTY; @@ -1166,7 +1180,8 @@ assert(is_valid(tbe)); out_msg.addr := address; out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, MachineType:Directory, + dir_select_low_bit, dir_select_num_bits)); out_msg.DataBlk := tbe.DataBlk; out_msg.Dirty := tbe.Dirty; if (tbe.Dirty) { diff --git a/src/mem/protocol/MOESI_hammer-dir.sm b/src/mem/protocol/MOESI_hammer-dir.sm --- a/src/mem/protocol/MOESI_hammer-dir.sm +++ b/src/mem/protocol/MOESI_hammer-dir.sm @@ -40,6 +40,7 @@ Cycles to_memory_controller_latency := 1; bool probe_filter_enabled := "False"; bool full_bit_dir_enabled := "False"; + int num_l1_machines; MessageBuffer * forwardFromDir, network="To", virtual_network="3", vnet_type="forward"; @@ -516,7 +517,9 @@ action(oc_sendBlockAck, "oc", desc="Send block ack to the owner") { peek(requestQueue_in, RequestMsg) { - if (((probe_filter_enabled || full_bit_dir_enabled) && (in_msg.Requestor == cache_entry.Owner)) || machineCount(MachineType:L1Cache) == 1) { + if (((probe_filter_enabled || full_bit_dir_enabled) && + (in_msg.Requestor == cache_entry.Owner)) || + num_l1_machines == 1) { enqueue(forwardNetwork_out, RequestMsg, from_memory_controller_latency) { out_msg.addr := address; out_msg.Type := CoherenceRequestType:BLOCK_ACK; @@ -545,7 +548,7 @@ peek(requestQueue_in, RequestMsg) { set_cache_entry(probeFilter.allocate(address, new PfEntry(block_size_bytes))); cache_entry.Owner := in_msg.Requestor; - cache_entry.Sharers.setSize(machineCount(MachineType:L1Cache)); + cache_entry.Sharers.setSize(num_l1_machines); } } } @@ -587,7 +590,7 @@ // // One ack for each last-level cache // - tbe.NumPendingMsgs := machineCount(MachineType:L1Cache); + tbe.NumPendingMsgs := num_l1_machines; // // Assume initially that the caches store a clean copy and that memory // will provide the data @@ -602,7 +605,7 @@ assert(is_valid(cache_entry)); tbe.NumPendingMsgs := cache_entry.Sharers.count(); } else { - tbe.NumPendingMsgs := machineCount(MachineType:L1Cache); + tbe.NumPendingMsgs := num_l1_machines; } } @@ -629,10 +632,10 @@ fwd_set.remove(machineIDToNodeID(in_msg.Requestor)); if (fwd_set.count() > 0) { tbe.Acks := 1; - tbe.SilentAcks := machineCount(MachineType:L1Cache) - fwd_set.count(); + tbe.SilentAcks := num_l1_machines - fwd_set.count(); tbe.SilentAcks := tbe.SilentAcks - 1; } else { - tbe.Acks := machineCount(MachineType:L1Cache); + tbe.Acks := num_l1_machines; tbe.SilentAcks := 0; } } else { @@ -644,7 +647,7 @@ action(saa_setAcksToAllIfPF, "saa", desc="Non-forwarded request, set the ack amount to all") { assert(is_valid(tbe)); if (probe_filter_enabled || full_bit_dir_enabled) { - tbe.Acks := machineCount(MachineType:L1Cache); + tbe.Acks := num_l1_machines; tbe.SilentAcks := 0; } else { tbe.Acks := 1; @@ -861,7 +864,7 @@ action(fn_forwardRequestIfNecessary, "fn", desc="Forward requests if necessary") { assert(is_valid(tbe)); - if ((machineCount(MachineType:L1Cache) > 1) && (tbe.Acks <= 1)) { + if ((num_l1_machines > 1) && (tbe.Acks <= 1)) { if (full_bit_dir_enabled) { assert(is_valid(cache_entry)); peek(requestQueue_in, RequestMsg) { @@ -899,7 +902,7 @@ } action(ia_invalidateAllRequest, "ia", desc="invalidate all copies") { - if (machineCount(MachineType:L1Cache) > 1) { + if (num_l1_machines > 1) { if (full_bit_dir_enabled) { assert(cache_entry.Sharers.count() > 0); peek(requestQueue_in, RequestMsg) { @@ -924,7 +927,7 @@ } action(io_invalidateOwnerRequest, "io", desc="invalidate all copies") { - if (machineCount(MachineType:L1Cache) > 1) { + if (num_l1_machines > 1) { enqueue(forwardNetwork_out, RequestMsg, from_memory_controller_latency) { assert(is_valid(cache_entry)); out_msg.addr := address; @@ -938,7 +941,7 @@ } action(fb_forwardRequestBcast, "fb", desc="Forward requests to all nodes") { - if (machineCount(MachineType:L1Cache) > 1) { + if (num_l1_machines > 1) { peek(requestQueue_in, RequestMsg) { if (full_bit_dir_enabled) { fwd_set := cache_entry.Sharers; @@ -952,7 +955,7 @@ out_msg.MessageSize := MessageSizeType:Multicast_Control; out_msg.InitialRequestTime := in_msg.InitialRequestTime; out_msg.ForwardRequestTime := curCycle(); - out_msg.SilentAcks := machineCount(MachineType:L1Cache) - fwd_set.count(); + out_msg.SilentAcks := num_l1_machines - fwd_set.count(); out_msg.SilentAcks := out_msg.SilentAcks - 1; } } @@ -988,7 +991,7 @@ } action(fr_forwardMergeReadRequestsToOwner, "frr", desc="Forward coalesced read request to owner") { - assert(machineCount(MachineType:L1Cache) > 1); + assert(num_l1_machines > 1); // // Fixme! The unblock network should not stall on the forward network. Add a trigger queue to // decouple the two. @@ -1012,7 +1015,7 @@ } action(fc_forwardRequestConditionalOwner, "fc", desc="Forward request to one or more nodes") { - assert(machineCount(MachineType:L1Cache) > 1); + assert(num_l1_machines > 1); if (probe_filter_enabled || full_bit_dir_enabled) { peek(requestQueue_in, RequestMsg) { enqueue(forwardNetwork_out, RequestMsg, from_memory_controller_latency) { @@ -1043,8 +1046,9 @@ } } - action(nofc_forwardRequestConditionalOwner, "nofc", desc="Forward request to one or more nodes if the requestor is not the owner") { - if (machineCount(MachineType:L1Cache) > 1) { + action(nofc_forwardRequestConditionalOwner, "nofc", + desc="Forward request to one or more nodes if the requestor is not the owner") { + if (num_l1_machines > 1) { if (probe_filter_enabled || full_bit_dir_enabled) { peek(requestQueue_in, RequestMsg) { diff --git a/src/mem/protocol/MOESI_hammer-dma.sm b/src/mem/protocol/MOESI_hammer-dma.sm --- a/src/mem/protocol/MOESI_hammer-dma.sm +++ b/src/mem/protocol/MOESI_hammer-dma.sm @@ -30,6 +30,8 @@ machine(MachineType:DMA, "DMA Controller") : DMASequencer * dma_sequencer; Cycles request_latency := 6; + int dir_select_num_bits; + int dir_select_low_bit; MessageBuffer * responseFromDir, network="From", virtual_network="1", vnet_type="response"; @@ -115,7 +117,8 @@ out_msg.Requestor := machineID; out_msg.DataBlk := in_msg.DataBlk; out_msg.Len := in_msg.Len; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, dir_select_low_bit, dir_select_num_bits)); out_msg.MessageSize := MessageSizeType:Writeback_Control; } } @@ -130,7 +133,8 @@ out_msg.Requestor := machineID; out_msg.DataBlk := in_msg.DataBlk; out_msg.Len := in_msg.Len; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, dir_select_low_bit, dir_select_num_bits)); out_msg.MessageSize := MessageSizeType:Writeback_Control; } } diff --git a/src/mem/protocol/Network_test-cache.sm b/src/mem/protocol/Network_test-cache.sm --- a/src/mem/protocol/Network_test-cache.sm +++ b/src/mem/protocol/Network_test-cache.sm @@ -34,6 +34,8 @@ machine(MachineType:L1Cache, "Network_test L1 Cache") : Sequencer * sequencer; Cycles issue_latency := 2; + int dir_select_num_bits; + int dir_select_low_bit; // NETWORK BUFFERS MessageBuffer * requestFromCache, network="To", virtual_network="0", @@ -148,8 +150,8 @@ out_msg.addr := address; out_msg.Type := CoherenceRequestType:MSG; out_msg.Requestor := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); - //out_msg.Destination := broadcast(MachineType:Directory); + out_msg.Destination.add(mapAddressToRange(address, MachineType:Directory, + dir_select_low_bit, dir_select_num_bits)); out_msg.MessageSize := MessageSizeType:Control; out_msg.DataBlk.alloc(block_size_bytes); } @@ -160,7 +162,8 @@ out_msg.addr := address; out_msg.Type := CoherenceRequestType:MSG; out_msg.Requestor := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, MachineType:Directory, + dir_select_low_bit, dir_select_num_bits)); out_msg.MessageSize := MessageSizeType:Control; out_msg.DataBlk.alloc(block_size_bytes); } @@ -171,7 +174,8 @@ out_msg.addr := address; out_msg.Type := CoherenceRequestType:MSG; out_msg.Requestor := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, MachineType:Directory, + dir_select_low_bit, dir_select_num_bits)); out_msg.MessageSize := MessageSizeType:Data; out_msg.DataBlk.alloc(block_size_bytes); } diff --git a/src/mem/protocol/RubySlicc_ComponentMapping.sm b/src/mem/protocol/RubySlicc_ComponentMapping.sm --- a/src/mem/protocol/RubySlicc_ComponentMapping.sm +++ b/src/mem/protocol/RubySlicc_ComponentMapping.sm @@ -29,19 +29,10 @@ // Mapping functions -int machineCount(MachineType machType); MachineID mapAddressToRange(Addr addr, MachineType type, int low, int high); MachineID mapAddressToRange(Addr addr, MachineType type, int low, int high, NodeID n); -NetDest broadcast(MachineType type); -MachineID map_Address_to_DMA(Addr addr); -MachineID map_Address_to_Directory(Addr addr); -MachineID map_Address_to_RegionDir(Addr addr); -NodeID map_Address_to_DirectoryNode(Addr addr); -MachineID map_Address_to_TCCdir(Addr addr); -NodeID map_Address_to_TCCdirNode(Addr addr); NodeID machineIDToNodeID(MachineID machID); -NodeID machineIDToVersion(MachineID machID); MachineType machineIDToMachineType(MachineID machID); MachineID createMachineID(MachineType t, NodeID i); diff --git a/src/mem/protocol/MOESI_AMD_Base-Region-CorePair.sm b/src/mem/protocol/MOESI_AMD_Base-Region-CorePair.sm --- a/src/mem/protocol/MOESI_AMD_Base-Region-CorePair.sm +++ b/src/mem/protocol/MOESI_AMD_Base-Region-CorePair.sm @@ -44,6 +44,8 @@ bool send_evictions := "False"; Cycles issue_latency := 5; Cycles l2_hit_latency := 18; + int dir_low_bit; + int dir_num_bits; // BEGIN Core Buffers @@ -1153,7 +1155,10 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:StaleNotif; out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.MessageSize := MessageSizeType:Response_Control; out_msg.DataBlk.alloc(block_size_bytes); DPRINTF(RubySlicc, "%s\n", out_msg); @@ -1236,7 +1241,10 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUData; out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.DataBlk := tbe.DataBlk; out_msg.Dirty := tbe.Dirty; if (tbe.Shared) { @@ -1256,7 +1264,11 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket + // will this always be ok? probably not for multisocket + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.Dirty := false; out_msg.Hit := false; out_msg.Ntsl := true; @@ -1272,7 +1284,11 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket + // will this always be ok? probably not for multisocket + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.Dirty := false; out_msg.Ntsl := true; out_msg.Hit := false; @@ -1289,7 +1305,11 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket + // will this always be ok? probably not for multisocket + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); assert(addressInCore(address) || is_valid(tbe)); out_msg.Dirty := false; // only true if sending back data i think out_msg.Hit := true; @@ -1306,7 +1326,11 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket + // will this always be ok? probably not for multisocket + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); if (addressInCore(address)) { out_msg.Hit := true; } else { @@ -1327,7 +1351,11 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUPrbResp; out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket + // will this always be ok? probably not for multisocket + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.DataBlk := cache_entry.DataBlk; assert(cache_entry.Dirty); out_msg.Dirty := true; @@ -1344,7 +1372,11 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUPrbResp; out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket + // will this always be ok? probably not for multisocket + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.DataBlk := cache_entry.DataBlk; assert(cache_entry.Dirty); out_msg.Dirty := true; @@ -1362,7 +1394,10 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUPrbResp; out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.DataBlk := tbe.DataBlk; assert(tbe.Dirty); out_msg.Dirty := true; @@ -1401,7 +1436,10 @@ enqueue(responseNetwork_out, ResponseMsg, issue_latency) { out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUCancelWB; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.Sender := machineID; out_msg.MessageSize := MessageSizeType:Response_Control; out_msg.DataBlk.alloc(block_size_bytes); @@ -1416,7 +1454,10 @@ action(uu_sendUnblock, "uu", desc="state changed, unblock") { enqueue(unblockNetwork_out, UnblockMsg, issue_latency) { out_msg.addr := address; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.MessageSize := MessageSizeType:Unblock_Control; out_msg.wasValid := isValid(address); DPRINTF(RubySlicc, "%s\n", out_msg); diff --git a/src/mem/protocol/MOESI_AMD_Base-Region-dir.sm b/src/mem/protocol/MOESI_AMD_Base-Region-dir.sm --- a/src/mem/protocol/MOESI_AMD_Base-Region-dir.sm +++ b/src/mem/protocol/MOESI_AMD_Base-Region-dir.sm @@ -41,6 +41,11 @@ Cycles l3_hit_latency := 30; bool useL3OnWT := "False"; Cycles to_memory_controller_latency := 1; + int num_core_pair; + int num_tcp; + int num_sqc; + int region_low_bit; + int region_num_bits; // From the Cores MessageBuffer * requestFromCores, network="From", virtual_network="0", vnet_type="request"; @@ -823,7 +828,10 @@ enqueue(responseNetwork_out, ResponseMsg, response_latency_regionDir) { out_msg.addr := address; out_msg.Type := CoherenceResponseType:DirReadyAck; - out_msg.Destination.add(map_Address_to_RegionDir(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:RegionDir, + region_low_bit, + region_num_bits)); out_msg.Sender := machineID; out_msg.MessageSize := MessageSizeType:Writeback_Control; out_msg.DataBlk.alloc(block_size_bytes); @@ -916,11 +924,11 @@ out_msg.ReturnData := true; out_msg.MessageSize := MessageSizeType:Control; out_msg.Destination.broadcast(MachineType:CorePair); // won't be realistic for multisocket - tbe.NumPendingAcks := tbe.NumPendingAcks +machineCount(MachineType:CorePair) - 1; + tbe.NumPendingAcks := tbe.NumPendingAcks + num_core_pair - 1; out_msg.Destination.broadcast(MachineType:TCP); - tbe.NumPendingAcks := tbe.NumPendingAcks + machineCount(MachineType:TCP); + tbe.NumPendingAcks := tbe.NumPendingAcks + num_tcp; out_msg.Destination.broadcast(MachineType:SQC); - tbe.NumPendingAcks := tbe.NumPendingAcks + machineCount(MachineType:SQC); + tbe.NumPendingAcks := tbe.NumPendingAcks + num_sqc; out_msg.Destination.remove(in_msg.Requestor); DPRINTF(RubySlicc, "%s\n", (out_msg)); APPEND_TRANSITION_COMMENT(" sc: Acks remaining: "); @@ -938,11 +946,11 @@ out_msg.ReturnData := false; out_msg.MessageSize := MessageSizeType:Control; out_msg.Destination.broadcast(MachineType:CorePair); // won't be realistic for multisocket - tbe.NumPendingAcks := tbe.NumPendingAcks +machineCount(MachineType:CorePair) - 1; + tbe.NumPendingAcks := tbe.NumPendingAcks + num_core_pair - 1; out_msg.Destination.broadcast(MachineType:TCP); - tbe.NumPendingAcks := tbe.NumPendingAcks + machineCount(MachineType:TCP); + tbe.NumPendingAcks := tbe.NumPendingAcks + num_tcp; out_msg.Destination.broadcast(MachineType:SQC); - tbe.NumPendingAcks := tbe.NumPendingAcks + machineCount(MachineType:SQC); + tbe.NumPendingAcks := tbe.NumPendingAcks + num_sqc; out_msg.Destination.remove(in_msg.Requestor); APPEND_TRANSITION_COMMENT(" ic: Acks remaining: "); APPEND_TRANSITION_COMMENT(tbe.NumPendingAcks); @@ -1215,14 +1223,20 @@ out_msg.addr := address; out_msg.Type := in_msg.Type; out_msg.Requestor := in_msg.Requestor; - out_msg.Destination.add(map_Address_to_RegionDir(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:RegionDir, + region_low_bit, + region_num_bits)); out_msg.Shared := in_msg.Shared; out_msg.MessageSize := in_msg.MessageSize; out_msg.DataBlk.alloc(block_size_bytes); if (out_msg.writeMask.isEmpty() == true) { out_msg.writeMask.clear(block_size_bytes); } - DPRINTF(RubySlicc, "out dest: %s\n", map_Address_to_RegionDir(address)); + DPRINTF(RubySlicc, "out dest: %s\n", mapAddressToRange(address, + MachineType:RegionDir, + region_low_bit, + region_num_bits)); } } } diff --git a/src/mem/protocol/MOESI_AMD_Base-RegionBuffer.sm b/src/mem/protocol/MOESI_AMD_Base-RegionBuffer.sm --- a/src/mem/protocol/MOESI_AMD_Base-RegionBuffer.sm +++ b/src/mem/protocol/MOESI_AMD_Base-RegionBuffer.sm @@ -1,3 +1,4 @@ + /* * Copyright (c) 2010-2015 Advanced Micro Devices, Inc. * All rights reserved. @@ -41,7 +42,12 @@ Cycles toRegionDirLatency := 5; // Latency for requests and acks to directory Cycles nextEvictLatency := 1; // latency added between each block while evicting region bool noTCCdir := "False"; - int TCC_select_num_bits := 1; + int tcc_low_bit; + int tcc_num_bits := 1; + int dir_low_bit; + int dir_num_bits; + int regiondir_low_bit; + int regiondir_num_bits; // From the Cores MessageBuffer * requestFromCore, network="From", virtual_network="0", vnet_type="request"; @@ -185,7 +191,6 @@ // Stores only region addresses TBETable * TBEs, template="", constructor="m_number_of_TBEs, m_block_size_bytes"; - int TCC_select_low_bit, default="getBlockSizeBits()"; Tick clockEdge(); Tick cyclesToTicks(Cycles c); @@ -228,8 +233,10 @@ if (isOnCPU) { return createMachineID(MachineType:CorePair, intToID(0)); } else if (noTCCdir) { - return mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits); + return mapAddressToRange(address, + MachineType:TCC, + tcc_low_bit, + tcc_num_bits); } else { return createMachineID(MachineType:TCCdir, intToID(0)); } @@ -493,7 +500,10 @@ out_msg.Dirty := in_msg.Dirty; out_msg.Requestor := in_msg.Requestor; out_msg.WTRequestor := in_msg.WTRequestor; - out_msg.Destination.add(map_Address_to_Directory(in_msg.addr)); + out_msg.Destination.add(mapAddressToRange(in_msg.addr, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.Shared := in_msg.Shared; out_msg.MessageSize := in_msg.MessageSize; out_msg.Private := true; @@ -594,7 +604,10 @@ out_msg.WTRequestor := in_msg.WTRequestor; out_msg.InitialRequestTime := curCycle(); // will this always be ok? probably not for multisocket - out_msg.Destination.add(map_Address_to_RegionDir(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:RegionDir, + regiondir_low_bit, + regiondir_num_bits)); out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.DataBlk.alloc(block_size_bytes); if (out_msg.writeMask.isEmpty() == true) { @@ -620,7 +633,10 @@ out_msg.WTRequestor := in_msg.WTRequestor; out_msg.InitialRequestTime := curCycle(); // will this always be ok? probably not for multisocket - out_msg.Destination.add(map_Address_to_RegionDir(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:RegionDir, + regiondir_low_bit, + regiondir_num_bits)); out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.DataBlk.alloc(block_size_bytes); if (out_msg.writeMask.isEmpty() == true) { @@ -639,7 +655,11 @@ out_msg.addr := getRegionBase(address); // use the actual address so the demand request can be fulfilled out_msg.Type := CoherenceRequestType:CleanWbRequest; out_msg.Requestor := machineID; - out_msg.Destination.add(map_Address_to_RegionDir(address)); // will this always be ok? probably not for multisocket + // will this always be ok? probably not for multisocket + out_msg.Destination.add(mapAddressToRange(address, + MachineType:RegionDir, + regiondir_low_bit, + regiondir_num_bits)); out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.DataBlk.alloc(block_size_bytes); if (out_msg.writeMask.isEmpty() == true) { @@ -662,7 +682,10 @@ out_msg.WTRequestor := in_msg.WTRequestor; out_msg.InitialRequestTime := curCycle(); // will this always be ok? probably not for multisocket - out_msg.Destination.add(map_Address_to_RegionDir(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:RegionDir, + regiondir_low_bit, + regiondir_num_bits)); out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.DataBlk.alloc(block_size_bytes); if (out_msg.writeMask.isEmpty() == true) { @@ -682,7 +705,11 @@ out_msg.addr := getRegionBase(address); out_msg.Type := CoherenceResponseType:CPUPrbResp; out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_RegionDir(address)); // will this always be ok? probably not for multisocket + // will this always be ok? probably not for multisocket + out_msg.Destination.add(mapAddressToRange(address, + MachineType:RegionDir, + regiondir_low_bit, + regiondir_num_bits)); out_msg.MessageSize := MessageSizeType:Response_Control; out_msg.DataBlk.alloc(block_size_bytes); } @@ -695,7 +722,11 @@ out_msg.addr := tbe.DemandAddress; out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes out_msg.Sender := getPeer(machineID,address); - out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket + // will this always be ok? probably not for multisocket + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.Dirty := false; // only true if sending back data i think out_msg.Hit := false; out_msg.Ntsl := false; @@ -716,7 +747,11 @@ out_msg.Type := CoherenceResponseType:CPUPrbResp; out_msg.Sender := machineID; out_msg.NotCached := true; - out_msg.Destination.add(map_Address_to_RegionDir(address)); // will this always be ok? probably not for multisocket + // will this always be ok? probably not for multisocket + out_msg.Destination.add(mapAddressToRange(address, + MachineType:RegionDir, + regiondir_low_bit, + regiondir_num_bits)); out_msg.MessageSize := MessageSizeType:Response_Control; out_msg.Dirty := tbe.dirty; out_msg.DataBlk.alloc(block_size_bytes); @@ -728,7 +763,11 @@ out_msg.addr := getRegionBase(address); out_msg.Type := CoherenceResponseType:CPUPrbResp; out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_RegionDir(address)); // will this always be ok? probably not for multisocket + // will this always be ok? probably not for multisocket + out_msg.Destination.add(mapAddressToRange(address, + MachineType:RegionDir, + regiondir_low_bit, + regiondir_num_bits)); out_msg.MessageSize := MessageSizeType:Response_Control; out_msg.DataBlk.alloc(block_size_bytes); } @@ -740,7 +779,11 @@ out_msg.Type := CoherenceResponseType:CPUPrbResp; out_msg.Sender := machineID; out_msg.NotCached := true; - out_msg.Destination.add(map_Address_to_RegionDir(address)); // will this always be ok? probably not for multisocket + // will this always be ok? probably not for multisocket + out_msg.Destination.add(mapAddressToRange(address, + MachineType:RegionDir, + regiondir_low_bit, + regiondir_num_bits)); out_msg.MessageSize := MessageSizeType:Response_Control; out_msg.DataBlk.alloc(block_size_bytes); } @@ -751,7 +794,11 @@ out_msg.addr := getRegionBase(address); out_msg.Type := CoherenceResponseType:PrivateAck; out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_RegionDir(address)); // will this always be ok? probably not for multisocket + // will this always be ok? probably not for multisocket + out_msg.Destination.add(mapAddressToRange(address, + MachineType:RegionDir, + regiondir_low_bit, + regiondir_num_bits)); out_msg.MessageSize := MessageSizeType:Response_Control; out_msg.DataBlk.alloc(block_size_bytes); } @@ -764,7 +811,11 @@ out_msg.addr := getRegionBase(address); out_msg.Type := CoherenceResponseType:RegionWbAck; out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_RegionDir(address)); // will this always be ok? probably not for multisocket + // will this always be ok? probably not for multisocket + out_msg.Destination.add(mapAddressToRange(address, + MachineType:RegionDir, + regiondir_low_bit, + regiondir_num_bits)); out_msg.MessageSize := MessageSizeType:Response_Control; out_msg.DataBlk.alloc(block_size_bytes); } @@ -788,7 +839,7 @@ out_msg.DemandRequest := true; } out_msg.MessageSize := MessageSizeType:Control; - out_msg.Destination.add(getPeer(machineID,address)); + out_msg.Destination.add(getPeer(machineID, address)); DPRINTF(RubySlicc, "%s\n", out_msg); } APPEND_TRANSITION_COMMENT(" current "); @@ -815,7 +866,7 @@ out_msg.ReturnData := true; out_msg.DemandRequest := true; out_msg.MessageSize := MessageSizeType:Control; - out_msg.Destination.add(getPeer(machineID,address)); + out_msg.Destination.add(getPeer(machineID, address)); DPRINTF(RubySlicc, "%s\n", out_msg); tbe.AllAcksReceived := false; } @@ -835,8 +886,12 @@ enqueue(responseNetwork_out, ResponseMsg, toDirLatency) { out_msg.addr := in_msg.DemandAddress; out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes - out_msg.Sender := getPeer(machineID,address); - out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket + out_msg.Sender := getPeer(machineID, address); + // will this always be ok? probably not for multisocket + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.Dirty := false; // only true if sending back data i think out_msg.Hit := false; out_msg.Ntsl := false; diff --git a/src/mem/protocol/MOESI_AMD_Base-RegionDir.sm b/src/mem/protocol/MOESI_AMD_Base-RegionDir.sm --- a/src/mem/protocol/MOESI_AMD_Base-RegionDir.sm +++ b/src/mem/protocol/MOESI_AMD_Base-RegionDir.sm @@ -43,7 +43,10 @@ bool sym_migrate := "False"; bool asym_migrate := "False"; bool noTCCdir := "False"; - int TCC_select_num_bits := 1; + int tcc_low_bit; + int tcc_num_bits := 1; + int dir_low_bit; + int dir_num_bits; // To the directory MessageBuffer * requestToDir, network="To", virtual_network="5", vnet_type="request"; @@ -159,7 +162,6 @@ // Stores only region addresses TBETable * TBEs, template="", constructor="m_number_of_TBEs, m_block_size_bytes"; - int TCC_select_low_bit, default="getBlockSizeBits()"; Tick clockEdge(); Tick cyclesToTicks(Cycles c); @@ -183,8 +185,10 @@ return createMachineID(MachineType:CorePair, intToID(0)); } else if (machineIDToNodeID(rBuf) == gpuRegionBufferNum) { if (noTCCdir) { - return mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits); + return mapAddressToRange(address, + MachineType:TCC, + tcc_low_bit, + tcc_num_bits); } else { return createMachineID(MachineType:TCCdir, intToID(0)); } @@ -479,7 +483,10 @@ out_msg.Dirty := in_msg.Dirty; out_msg.Requestor := getCoreMachine(in_msg.Requestor,address); out_msg.WTRequestor := in_msg.WTRequestor; - out_msg.Destination.add(map_Address_to_Directory(in_msg.addr)); + out_msg.Destination.add(mapAddressToRange(in_msg.addr, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.Shared := in_msg.Shared; out_msg.MessageSize := in_msg.MessageSize; out_msg.Private := in_msg.Private; @@ -508,7 +515,10 @@ out_msg.Dirty := in_msg.Dirty; out_msg.Requestor := getCoreMachine(in_msg.Requestor,address); out_msg.WTRequestor := in_msg.WTRequestor; - out_msg.Destination.add(map_Address_to_Directory(in_msg.addr)); + out_msg.Destination.add(mapAddressToRange(in_msg.addr, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.Shared := in_msg.Shared; out_msg.MessageSize := in_msg.MessageSize; out_msg.Private := in_msg.Private; @@ -538,7 +548,10 @@ out_msg.Dirty := in_msg.Dirty; out_msg.Requestor := getCoreMachine(in_msg.Requestor,address); out_msg.WTRequestor := in_msg.WTRequestor; - out_msg.Destination.add(map_Address_to_Directory(in_msg.addr)); + out_msg.Destination.add(mapAddressToRange(in_msg.addr, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.Shared := in_msg.Shared; out_msg.MessageSize := in_msg.MessageSize; out_msg.Private := in_msg.Private; @@ -571,7 +584,10 @@ out_msg.Dirty := in_msg.Dirty; out_msg.Requestor := getCoreMachine(in_msg.Requestor,address); out_msg.WTRequestor := in_msg.WTRequestor; - out_msg.Destination.add(map_Address_to_Directory(in_msg.addr)); + out_msg.Destination.add(mapAddressToRange(in_msg.addr, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.Shared := in_msg.Shared; out_msg.MessageSize := in_msg.MessageSize; out_msg.Private := in_msg.Private; diff --git a/src/mem/protocol/MESI_Two_Level-dir.sm b/src/mem/protocol/MESI_Two_Level-dir.sm --- a/src/mem/protocol/MESI_Two_Level-dir.sm +++ b/src/mem/protocol/MESI_Two_Level-dir.sm @@ -30,6 +30,8 @@ : DirectoryMemory * directory; Cycles to_mem_ctrl_latency := 1; Cycles directory_latency := 6; + int dma_select_num_bits; + int dma_select_low_bit; MessageBuffer * requestToDir, network="From", virtual_network="0", vnet_type="request"; @@ -334,8 +336,12 @@ enqueue(responseNetwork_out, ResponseMsg, to_mem_ctrl_latency) { out_msg.addr := address; out_msg.Type := CoherenceResponseType:DATA; - out_msg.DataBlk := in_msg.DataBlk; // we send the entire data block and rely on the dma controller to split it up if need be - out_msg.Destination.add(map_Address_to_DMA(address)); + + // we send the entire data block and rely on the dma controller to + // split it up if need be + out_msg.DataBlk := in_msg.DataBlk; + out_msg.Destination.add(mapAddressToRange(address, MachineType:DMA, + dma_select_low_bit, dma_select_num_bits)); out_msg.MessageSize := MessageSizeType:Response_Data; } } @@ -353,7 +359,8 @@ enqueue(responseNetwork_out, ResponseMsg, to_mem_ctrl_latency) { out_msg.addr := address; out_msg.Type := CoherenceResponseType:ACK; - out_msg.Destination.add(map_Address_to_DMA(address)); + out_msg.Destination.add(mapAddressToRange(address, MachineType:DMA, + dma_select_low_bit, dma_select_num_bits)); out_msg.MessageSize := MessageSizeType:Writeback_Control; out_msg.DataBlk.alloc(block_size_bytes); } @@ -386,8 +393,11 @@ enqueue(responseNetwork_out, ResponseMsg, to_mem_ctrl_latency) { out_msg.addr := address; out_msg.Type := CoherenceResponseType:DATA; - out_msg.DataBlk := in_msg.DataBlk; // we send the entire data block and rely on the dma controller to split it up if need be - out_msg.Destination.add(map_Address_to_DMA(address)); + // we send the entire data block and rely on the dma controller to + // split it up if need be + out_msg.DataBlk := in_msg.DataBlk; + out_msg.Destination.add(mapAddressToRange(address, MachineType:DMA, + dma_select_low_bit, dma_select_num_bits)); out_msg.MessageSize := MessageSizeType:Response_Data; } } diff --git a/src/mem/protocol/MESI_Two_Level-dma.sm b/src/mem/protocol/MESI_Two_Level-dma.sm --- a/src/mem/protocol/MESI_Two_Level-dma.sm +++ b/src/mem/protocol/MESI_Two_Level-dma.sm @@ -29,13 +29,15 @@ machine(MachineType:DMA, "DMA Controller") : DMASequencer * dma_sequencer; - Cycles request_latency := 6; + Cycles request_latency := 6; + int dir_select_num_bits; + int dir_select_low_bit; - MessageBuffer * responseFromDir, network="From", virtual_network="1", + MessageBuffer * responseFromDir, network="From", virtual_network="1", vnet_type="response"; - MessageBuffer * requestToDir, network="To", virtual_network="0", + MessageBuffer * requestToDir, network="To", virtual_network="0", vnet_type="request"; - MessageBuffer * mandatoryQueue; + MessageBuffer * mandatoryQueue; { state_declaration(State, desc="DMA states", default="DMA_State_READY") { READY, AccessPermission:Invalid, desc="Ready to accept a new request"; @@ -114,7 +116,9 @@ out_msg.Type := CoherenceRequestType:DMA_READ; out_msg.DataBlk := in_msg.DataBlk; out_msg.Len := in_msg.Len; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, dir_select_low_bit, + dir_select_num_bits)); out_msg.MessageSize := MessageSizeType:Writeback_Control; } } @@ -127,7 +131,9 @@ out_msg.Type := CoherenceRequestType:DMA_WRITE; out_msg.DataBlk := in_msg.DataBlk; out_msg.Len := in_msg.Len; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, dir_select_low_bit, + dir_select_num_bits)); out_msg.MessageSize := MessageSizeType:Writeback_Control; } } diff --git a/src/mem/protocol/MI_example-cache.sm b/src/mem/protocol/MI_example-cache.sm --- a/src/mem/protocol/MI_example-cache.sm +++ b/src/mem/protocol/MI_example-cache.sm @@ -33,6 +33,8 @@ Cycles cache_response_latency := 12; Cycles issue_latency := 2; bool send_evictions; + int dir_select_low_bit; + int dir_select_num_bits; // NETWORK BUFFERS MessageBuffer * requestFromCache, network="To", virtual_network="2", @@ -272,7 +274,8 @@ out_msg.addr := address; out_msg.Type := CoherenceRequestType:GETX; out_msg.Requestor := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, MachineType:Directory, + dir_select_low_bit, dir_select_num_bits)); out_msg.MessageSize := MessageSizeType:Control; out_msg.DataBlk.alloc(block_size_bytes); } @@ -284,7 +287,8 @@ out_msg.addr := address; out_msg.Type := CoherenceRequestType:PUTX; out_msg.Requestor := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, MachineType:Directory, + dir_select_low_bit, dir_select_num_bits)); out_msg.DataBlk := cache_entry.DataBlk; out_msg.MessageSize := MessageSizeType:Data; } diff --git a/src/mem/protocol/MI_example-dma.sm b/src/mem/protocol/MI_example-dma.sm --- a/src/mem/protocol/MI_example-dma.sm +++ b/src/mem/protocol/MI_example-dma.sm @@ -30,6 +30,8 @@ machine(MachineType:DMA, "DMA Controller") : DMASequencer * dma_sequencer; Cycles request_latency := 6; + int dir_select_low_bit; + int dir_select_num_bits; MessageBuffer * responseFromDir, network="From", virtual_network="1", vnet_type="response"; @@ -117,7 +119,9 @@ out_msg.Requestor := machineID; out_msg.DataBlk := in_msg.DataBlk; out_msg.Len := in_msg.Len; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, dir_select_low_bit, + dir_select_num_bits)); out_msg.MessageSize := MessageSizeType:Writeback_Control; } } @@ -132,7 +136,9 @@ out_msg.Requestor := machineID; out_msg.DataBlk := in_msg.DataBlk; out_msg.Len := in_msg.Len; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, dir_select_low_bit, + dir_select_num_bits)); out_msg.MessageSize := MessageSizeType:Writeback_Control; } } diff --git a/src/mem/protocol/MOESI_AMD_Base-CorePair.sm b/src/mem/protocol/MOESI_AMD_Base-CorePair.sm --- a/src/mem/protocol/MOESI_AMD_Base-CorePair.sm +++ b/src/mem/protocol/MOESI_AMD_Base-CorePair.sm @@ -43,6 +43,8 @@ bool send_evictions := "False"; Cycles issue_latency := 5; // time to send data down to NB Cycles l2_hit_latency := 18; + int dir_low_bit; + int dir_num_bits; // BEGIN Core Buffers @@ -690,7 +692,10 @@ out_msg.addr := address; out_msg.Type := CoherenceRequestType:RdBlk; out_msg.Requestor := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); DPRINTF(RubySlicc,"%s\n",out_msg.Destination); out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.InitialRequestTime := curCycle(); @@ -706,7 +711,10 @@ out_msg.addr := address; out_msg.Type := CoherenceRequestType:RdBlkM; out_msg.Requestor := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.InitialRequestTime := curCycle(); out_msg.DataBlk.alloc(block_size_bytes); @@ -721,7 +729,10 @@ out_msg.addr := address; out_msg.Type := CoherenceRequestType:RdBlkS; out_msg.Requestor := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.InitialRequestTime := curCycle(); out_msg.DataBlk.alloc(block_size_bytes); @@ -738,7 +749,10 @@ assert(is_valid(cache_entry)); out_msg.DataBlk := cache_entry.DataBlk; assert(cache_entry.Dirty); - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.Type := CoherenceRequestType:VicDirty; out_msg.InitialRequestTime := curCycle(); @@ -757,7 +771,10 @@ enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { out_msg.addr := address; out_msg.Requestor := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.Type := CoherenceRequestType:VicClean; out_msg.InitialRequestTime := curCycle(); @@ -1124,7 +1141,10 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:StaleNotif; out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.MessageSize := MessageSizeType:Response_Control; out_msg.DataBlk.alloc(block_size_bytes); DPRINTF(RubySlicc, "%s\n", out_msg); @@ -1138,7 +1158,10 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUData; out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.DataBlk := tbe.DataBlk; out_msg.Dirty := tbe.Dirty; if (tbe.Shared) { @@ -1158,7 +1181,11 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket + // will this always be ok? probably not for multisocket + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.Dirty := false; out_msg.Hit := false; out_msg.Ntsl := true; @@ -1173,7 +1200,11 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket + // will this always be ok? probably not for multisocket + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.Dirty := false; out_msg.Ntsl := true; out_msg.Hit := false; @@ -1188,7 +1219,11 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket + // will this always be ok? probably not for multisocket + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); assert(addressInCore(address) || is_valid(tbe)); out_msg.Dirty := false; // only true if sending back data i think out_msg.Hit := true; @@ -1204,7 +1239,11 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket + // will this always be ok? probably not for multisocket + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); if (addressInCore(address)) { out_msg.Hit := true; } else { @@ -1224,7 +1263,11 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUPrbResp; out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket + // will this always be ok? probably not for multisocket + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.DataBlk := cache_entry.DataBlk; assert(cache_entry.Dirty); out_msg.Dirty := true; @@ -1240,7 +1283,11 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUPrbResp; out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket + // will this always be ok? probably not for multisocket + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.DataBlk := cache_entry.DataBlk; assert(cache_entry.Dirty); out_msg.Dirty := true; @@ -1256,7 +1303,10 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUPrbResp; out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.DataBlk := tbe.DataBlk; assert(tbe.Dirty); out_msg.Dirty := true; @@ -1274,7 +1324,10 @@ action(uu_sendUnblock, "uu", desc="state changed, unblock") { enqueue(unblockNetwork_out, UnblockMsg, issue_latency) { out_msg.addr := address; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.MessageSize := MessageSizeType:Unblock_Control; DPRINTF(RubySlicc, "%s\n", out_msg); } diff --git a/src/mem/protocol/MOESI_AMD_Base-L3cache.sm b/src/mem/protocol/MOESI_AMD_Base-L3cache.sm --- a/src/mem/protocol/MOESI_AMD_Base-L3cache.sm +++ b/src/mem/protocol/MOESI_AMD_Base-L3cache.sm @@ -43,6 +43,8 @@ WireBuffer * respToL3; Cycles l3_request_latency := 1; Cycles l3_response_latency := 35; + int dir_low_bit; + int dir_num_bits; // To the general response network MessageBuffer * responseFromL3, network="To", virtual_network="2", ordered="false", vnet_type="response"; @@ -405,7 +407,10 @@ out_msg.addr := address; out_msg.Type := in_msg.Type; out_msg.Requestor := in_msg.Requestor; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.Shared := false; // unneeded for this request out_msg.MessageSize := in_msg.MessageSize; DPRINTF(RubySlicc, "%s\n", out_msg); @@ -437,7 +442,10 @@ out_msg.addr := address; out_msg.Type := CoherenceRequestType:VicDirty; out_msg.Requestor := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.DataBlk.alloc(block_size_bytes); if (out_msg.writeMask.isEmpty() == true) { @@ -464,7 +472,11 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket + // will this always be ok? probably not for multisocket + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.Dirty := false; out_msg.Hit := false; out_msg.Ntsl := true; @@ -479,7 +491,11 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket + // will this always be ok? probably not for multisocket + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.Dirty := false; out_msg.Hit := true; out_msg.Ntsl := false; @@ -494,7 +510,11 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket + // will this always be ok? probably not for multisocket + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.Dirty := false; out_msg.Hit := false; out_msg.Ntsl := false; @@ -509,7 +529,11 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); // will this always be ok? probably not for multisocket + // will this always be ok? probably not for multisocket + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.DataBlk := cache_entry.DataBlk; assert(cache_entry.Dirty); out_msg.Dirty := true; @@ -524,7 +548,10 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUPrbResp; out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.DataBlk := tbe.DataBlk; assert(tbe.Dirty); out_msg.Dirty := true; @@ -540,7 +567,10 @@ out_msg.addr := address; out_msg.Type := CoherenceRequestType:WrCancel; out_msg.Requestor := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.DataBlk.alloc(block_size_bytes); if (out_msg.writeMask.isEmpty() == true) { @@ -587,7 +617,10 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUData; out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.DataBlk := tbe.DataBlk; out_msg.Dirty := tbe.Dirty; if (tbe.Shared) { @@ -611,7 +644,10 @@ action(uu_sendUnblock, "uu", desc="state changed, unblock") { enqueue(unblockNetwork_out, UnblockMsg, l3_request_latency) { out_msg.addr := address; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.MessageSize := MessageSizeType:Unblock_Control; DPRINTF(RubySlicc, "%s\n", out_msg); } diff --git a/src/mem/protocol/MESI_Two_Level-L1cache.sm b/src/mem/protocol/MESI_Two_Level-L1cache.sm --- a/src/mem/protocol/MESI_Two_Level-L1cache.sm +++ b/src/mem/protocol/MESI_Two_Level-L1cache.sm @@ -32,6 +32,7 @@ CacheMemory * L1Dcache; Prefetcher * prefetcher; int l2_select_num_bits; + int l2_select_low_bit; Cycles l1_request_latency := 2; Cycles l1_response_latency := 2; Cycles to_l2_latency := 1; @@ -154,8 +155,6 @@ TBETable * TBEs, template="", constructor="m_number_of_TBEs, m_block_size_bytes"; - int l2_select_low_bit, default="getBlockSizeBits()"; - Tick clockEdge(); Cycles ticksToCycles(Tick t); void set_cache_entry(AbstractCacheEntry a); diff --git a/src/mem/protocol/MESI_Two_Level-L2cache.sm b/src/mem/protocol/MESI_Two_Level-L2cache.sm --- a/src/mem/protocol/MESI_Two_Level-L2cache.sm +++ b/src/mem/protocol/MESI_Two_Level-L2cache.sm @@ -31,6 +31,8 @@ Cycles l2_request_latency := 2; Cycles l2_response_latency := 2; Cycles to_l1_latency := 1; + int dir_select_num_bits; + int dir_select_low_bit; // Message Queues // From local bank of L2 cache TO the network @@ -400,7 +402,9 @@ out_msg.addr := address; out_msg.Type := CoherenceRequestType:GETS; out_msg.Requestor := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, dir_select_low_bit, + dir_select_num_bits)); out_msg.MessageSize := MessageSizeType:Control; out_msg.DataBlk.alloc(block_size_bytes); } @@ -427,7 +431,8 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:MEMORY_DATA; out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, MachineType:Directory, + dir_select_low_bit, dir_select_num_bits)); out_msg.DataBlk := cache_entry.DataBlk; out_msg.Dirty := cache_entry.Dirty; out_msg.MessageSize := MessageSizeType:Response_Data; @@ -439,7 +444,8 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:ACK; out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, MachineType:Directory, + dir_select_low_bit, dir_select_num_bits)); out_msg.MessageSize := MessageSizeType:Response_Control; out_msg.DataBlk.alloc(block_size_bytes); } @@ -451,7 +457,8 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:MEMORY_DATA; out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, MachineType:Directory, + dir_select_low_bit, dir_select_num_bits)); out_msg.DataBlk := tbe.DataBlk; out_msg.Dirty := tbe.Dirty; out_msg.MessageSize := MessageSizeType:Response_Data; diff --git a/src/mem/protocol/GPU_VIPER_Region-TCC.sm b/src/mem/protocol/GPU_VIPER_Region-TCC.sm --- a/src/mem/protocol/GPU_VIPER_Region-TCC.sm +++ b/src/mem/protocol/GPU_VIPER_Region-TCC.sm @@ -44,6 +44,8 @@ int regionBufferNum; Cycles l2_request_latency := 50; Cycles l2_response_latency := 20; + int dir_low_bit; + int dir_num_bits; // From the TCPs or SQCs MessageBuffer * requestFromTCP, network="From", virtual_network="1", ordered="true", vnet_type="request"; @@ -384,7 +386,10 @@ } enqueue(unblockToNB_out, UnblockMsg, 1) { out_msg.addr := address; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.MessageSize := MessageSizeType:Unblock_Control; DPRINTF(RubySlicc, "%s\n", out_msg); } @@ -571,7 +576,10 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC, L3 respond in same way to probes out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.Dirty := false; out_msg.Hit := false; out_msg.Ntsl := true; diff --git a/src/mem/protocol/MESI_Three_Level-L1cache.sm b/src/mem/protocol/MESI_Three_Level-L1cache.sm --- a/src/mem/protocol/MESI_Three_Level-L1cache.sm +++ b/src/mem/protocol/MESI_Three_Level-L1cache.sm @@ -28,6 +28,7 @@ machine(MachineType:L1Cache, "MESI Directory L1 Cache CMP") : CacheMemory * cache; + int l2_select_low_bit; int l2_select_num_bits; Cycles l1_request_latency := 2; Cycles l1_response_latency := 2; @@ -149,8 +150,6 @@ TBETable * TBEs, template="", constructor="m_number_of_TBEs, m_block_size_bytes"; - int l2_select_low_bit, default="getBlockSizeBits()"; - Tick clockEdge(); Cycles ticksToCycles(Tick t); void set_cache_entry(AbstractCacheEntry a); diff --git a/src/mem/protocol/GPU_VIPER-TCP.sm b/src/mem/protocol/GPU_VIPER-TCP.sm --- a/src/mem/protocol/GPU_VIPER-TCP.sm +++ b/src/mem/protocol/GPU_VIPER-TCP.sm @@ -40,9 +40,10 @@ CacheMemory * L1cache; bool WB; /*is this cache Writeback?*/ bool disableL1; /* bypass L1 cache? */ - int TCC_select_num_bits; Cycles issue_latency := 40; // time to send data down to TCC Cycles l2_hit_latency := 18; + int tcc_low_bit; + int tcc_num_bits; MessageBuffer * requestFromTCP, network="To", virtual_network="1", vnet_type="request"; MessageBuffer * responseFromTCP, network="To", virtual_network="3", vnet_type="response"; @@ -115,7 +116,6 @@ } TBETable * TBEs, template="", constructor="m_number_of_TBEs, m_block_size_bytes"; - int TCC_select_low_bit, default="getBlockSizeBits()"; int WTcnt, default="0"; int Fcnt, default="0"; bool inFlush, default="false"; @@ -346,8 +346,10 @@ out_msg.addr := address; out_msg.Type := CoherenceRequestType:RdBlk; out_msg.Requestor := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCC, + tcc_low_bit, + tcc_num_bits)); out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.InitialRequestTime := curCycle(); out_msg.DataBlk.alloc(block_size_bytes); @@ -395,8 +397,10 @@ out_msg.addr := address; out_msg.Type := CoherenceRequestType:RdBlk; out_msg.Requestor := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCC, + tcc_low_bit, + tcc_num_bits)); out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.InitialRequestTime := curCycle(); out_msg.DataBlk.alloc(block_size_bytes); @@ -419,8 +423,10 @@ out_msg.DataBlk := cache_entry.DataBlk; out_msg.writeMask.clear(block_size_bytes); out_msg.writeMask.orMask(cache_entry.writeMask); - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCC, + tcc_low_bit, + tcc_num_bits)); out_msg.MessageSize := MessageSizeType:Data; out_msg.Type := CoherenceRequestType:WriteThrough; out_msg.InitialRequestTime := curCycle(); @@ -436,8 +442,10 @@ out_msg.DataBlk.alloc(block_size_bytes); out_msg.writeMask.clear(block_size_bytes); out_msg.writeMask.orMask(in_msg.writeMask); - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCC, + tcc_low_bit, + tcc_num_bits)); out_msg.MessageSize := MessageSizeType:Data; out_msg.Type := CoherenceRequestType:Atomic; out_msg.InitialRequestTime := curCycle(); diff --git a/src/mem/protocol/GPU_VIPER-SQC.sm b/src/mem/protocol/GPU_VIPER-SQC.sm --- a/src/mem/protocol/GPU_VIPER-SQC.sm +++ b/src/mem/protocol/GPU_VIPER-SQC.sm @@ -36,9 +36,10 @@ machine(MachineType:SQC, "GPU SQC (L1 I Cache)") : Sequencer* sequencer; CacheMemory * L1cache; - int TCC_select_num_bits; Cycles issue_latency := 80; // time to send data down to TCC Cycles l2_hit_latency := 18; // for 1MB L2, 20 for 2MB + int tcc_low_bit; + int tcc_num_bits; MessageBuffer * requestFromSQC, network="To", virtual_network="1", vnet_type="request"; @@ -91,7 +92,6 @@ } TBETable * TBEs, template="", constructor="m_number_of_TBEs, m_block_size_bytes"; - int TCC_select_low_bit, default="getBlockSizeBits()"; void set_cache_entry(AbstractCacheEntry b); void unset_cache_entry(); @@ -260,8 +260,10 @@ out_msg.addr := address; out_msg.Type := CoherenceRequestType:RdBlk; out_msg.Requestor := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCC, + tcc_low_bit, + tcc_num_bits)); out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.InitialRequestTime := curCycle(); out_msg.DataBlk.alloc(block_size_bytes); diff --git a/src/mem/protocol/GPU_VIPER-TCC.sm b/src/mem/protocol/GPU_VIPER-TCC.sm --- a/src/mem/protocol/GPU_VIPER-TCC.sm +++ b/src/mem/protocol/GPU_VIPER-TCC.sm @@ -38,6 +38,8 @@ bool WB; /*is this cache Writeback?*/ Cycles l2_request_latency := 50; Cycles l2_response_latency := 20; + int dir_low_bit; + int dir_num_bits; // From the TCPs or SQCs MessageBuffer * requestFromTCP, network="From", virtual_network="1", vnet_type="request"; @@ -366,7 +368,10 @@ } enqueue(unblockToNB_out, UnblockMsg, 1) { out_msg.addr := address; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.MessageSize := MessageSizeType:Unblock_Control; DPRINTF(RubySlicc, "%s\n", out_msg); } @@ -380,7 +385,10 @@ out_msg.addr := address; out_msg.Type := in_msg.Type; out_msg.Requestor := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.Shared := false; // unneeded for this request out_msg.MessageSize := in_msg.MessageSize; out_msg.DataBlk.alloc(block_size_bytes); @@ -485,7 +493,10 @@ out_msg.addr := address; out_msg.Requestor := machineID; out_msg.WTRequestor := in_msg.Requestor; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.MessageSize := MessageSizeType:Data; out_msg.Type := CoherenceRequestType:WriteThrough; out_msg.Dirty := true; @@ -503,7 +514,10 @@ out_msg.addr := address; out_msg.Requestor := machineID; out_msg.WTRequestor := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.MessageSize := MessageSizeType:Data; out_msg.Type := CoherenceRequestType:WriteThrough; out_msg.Dirty := true; @@ -521,7 +535,10 @@ out_msg.addr := address; out_msg.Requestor := machineID; out_msg.WTRequestor := in_msg.Requestor; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.MessageSize := MessageSizeType:Data; out_msg.Type := CoherenceRequestType:Atomic; out_msg.Dirty := true; @@ -539,7 +556,10 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC, L3 respond in same way to probes out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.Dirty := false; out_msg.Hit := false; out_msg.Ntsl := true; diff --git a/src/mem/protocol/GPU_RfO-TCCdir.sm b/src/mem/protocol/GPU_RfO-TCCdir.sm --- a/src/mem/protocol/GPU_RfO-TCCdir.sm +++ b/src/mem/protocol/GPU_RfO-TCCdir.sm @@ -42,10 +42,13 @@ WireBuffer * w_reqToTCC; WireBuffer * w_probeToTCC; WireBuffer * w_respToTCC; - int TCC_select_num_bits; Cycles response_latency := 5; Cycles directory_latency := 6; Cycles issue_latency := 120; + int tcc_low_bit; + int tcc_num_bits; + int dir_low_bit; + int dir_num_bits; // From the TCPs or SQCs MessageBuffer * requestFromTCP, network="From", virtual_network="1", vnet_type="request"; @@ -237,7 +240,6 @@ // ** OBJECTS ** TBETable * TBEs, template="", constructor="m_number_of_TBEs, m_block_size_bytes"; - int TCC_select_low_bit, default="getBlockSizeBits()"; NetDest TCC_dir_subtree; NetDest temp; @@ -767,7 +769,10 @@ out_msg.addr := address; out_msg.Type := CoherenceRequestType:RdBlk; out_msg.Requestor := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.DataBlk.alloc(block_size_bytes); if (out_msg.writeMask.isEmpty() == true) { @@ -781,7 +786,10 @@ out_msg.addr := address; out_msg.Type := CoherenceRequestType:RdBlkS; out_msg.Requestor := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.DataBlk.alloc(block_size_bytes); if (out_msg.writeMask.isEmpty() == true) { @@ -795,7 +803,10 @@ out_msg.addr := address; out_msg.Type := CoherenceRequestType:RdBlkM; out_msg.Requestor := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.DataBlk.alloc(block_size_bytes); if (out_msg.writeMask.isEmpty() == true) { @@ -832,7 +843,10 @@ enqueue(requestToNB_out, CPURequestMsg, issue_latency) { out_msg.addr := address; out_msg.Requestor := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.Type := CoherenceRequestType:VicDirty; if (cache_entry.CacheState == State:O) { @@ -852,7 +866,10 @@ enqueue(requestToNB_out, CPURequestMsg, issue_latency) { out_msg.addr := address; out_msg.Requestor := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.Type := CoherenceRequestType:VicClean; if (cache_entry.CacheState == State:S) { @@ -876,8 +893,10 @@ out_msg.Type := in_msg.Type; out_msg.Requestor := in_msg.Requestor; out_msg.DataBlk := in_msg.DataBlk; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCC, + tcc_low_bit, + tcc_num_bits)); out_msg.Shared := in_msg.Shared; out_msg.MessageSize := in_msg.MessageSize; if (out_msg.writeMask.isEmpty() == true) { @@ -892,8 +911,10 @@ action(sc_probeShrCoreData, "sc", desc="probe shared cores, return data") { - MachineID tcc := mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits); + MachineID tcc := mapAddressToRange(address, + MachineType:TCC, + tcc_low_bit, + tcc_num_bits); temp := cache_entry.Sharers; temp.addNetDest(cache_entry.Owner); @@ -917,8 +938,10 @@ } action(ls2_probeShrL2Data, "ls2", desc="local probe downgrade L2, return data") { - MachineID tcc := mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits); + MachineID tcc := mapAddressToRange(address, + MachineType:TCC, + tcc_low_bit, + tcc_num_bits); if ((cache_entry.Sharers.isElement(tcc)) || (cache_entry.Owner.isElement(tcc))) { enqueue(w_probeTCC_out, TDProbeRequestMsg, 1) { out_msg.addr := address; @@ -934,8 +957,10 @@ } action(s2_probeShrL2Data, "s2", desc="probe shared L2, return data") { - MachineID tcc := mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits); + MachineID tcc := mapAddressToRange(address, + MachineType:TCC, + tcc_low_bit, + tcc_num_bits); if ((cache_entry.Sharers.isElement(tcc)) || (cache_entry.Owner.isElement(tcc))) { enqueue(w_probeTCC_out, TDProbeRequestMsg, 1) { out_msg.addr := address; @@ -951,8 +976,10 @@ } action(ldc_probeInvCoreData, "ldc", desc="local probe to inv cores, return data") { - MachineID tcc := mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits); + MachineID tcc := mapAddressToRange(address, + MachineType:TCC, + tcc_low_bit, + tcc_num_bits); peek(coreRequestNetwork_in, CPURequestMsg) { NetDest dest:= cache_entry.Sharers; dest.addNetDest(cache_entry.Owner); @@ -980,8 +1007,10 @@ } action(ld2_probeInvL2Data, "ld2", desc="local probe inv L2, return data") { - MachineID tcc := mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits); + MachineID tcc := mapAddressToRange(address, + MachineType:TCC, + tcc_low_bit, + tcc_num_bits); if ((cache_entry.Sharers.isElement(tcc)) || (cache_entry.Owner.isElement(tcc))) { enqueue(w_probeTCC_out, TDProbeRequestMsg, 1) { out_msg.addr := address; @@ -997,8 +1026,10 @@ } action(dc_probeInvCoreData, "dc", desc="probe inv cores + TCC, return data") { - MachineID tcc := mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits); + MachineID tcc := mapAddressToRange(address, + MachineType:TCC, + tcc_low_bit, + tcc_num_bits); enqueue(probeToCore_out, TDProbeRequestMsg, response_latency) { out_msg.addr := address; out_msg.Type := ProbeRequestType:PrbInv; @@ -1021,8 +1052,10 @@ } action(d2_probeInvL2Data, "d2", desc="probe inv L2, return data") { - MachineID tcc := mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits); + MachineID tcc := mapAddressToRange(address, + MachineType:TCC, + tcc_low_bit, + tcc_num_bits); if ((cache_entry.Sharers.isElement(tcc)) || (cache_entry.Owner.isElement(tcc))) { enqueue(w_probeTCC_out, TDProbeRequestMsg, 1) { out_msg.addr := address; @@ -1097,8 +1130,10 @@ } action(i2_probeInvL2, "i2", desc="probe inv L2, no data") { - MachineID tcc := mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits); + MachineID tcc := mapAddressToRange(address, + MachineType:TCC, + tcc_low_bit, + tcc_num_bits); if ((cache_entry.Sharers.isElement(tcc)) || (cache_entry.Owner.isElement(tcc))) { enqueue(w_probeTCC_out, TDProbeRequestMsg, 1) { tbe.NumPendingAcks := tbe.NumPendingAcks + 1; @@ -1118,7 +1153,10 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC, L3 respond in same way to probes out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.Dirty := false; out_msg.Hit := false; out_msg.Ntsl := true; @@ -1133,7 +1171,10 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and TCC respond in same way to probes out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.Dirty := false; out_msg.Ntsl := true; out_msg.Hit := false; @@ -1148,7 +1189,10 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and TCC respond in same way to probes out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.Dirty := false; // only true if sending back data i think out_msg.Hit := false; out_msg.Ntsl := false; @@ -1166,7 +1210,10 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUPrbResp; out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.DataBlk := getDataBlock(address); if (is_valid(tbe)) { out_msg.Dirty := tbe.Dirty; @@ -1185,7 +1232,10 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUPrbResp; out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.DataBlk := getDataBlock(address); if (is_valid(tbe)) { out_msg.Dirty := tbe.Dirty; @@ -1200,7 +1250,10 @@ enqueue(requestToNB_out, CPURequestMsg, issue_latency) { out_msg.addr := address; out_msg.Type := CoherenceRequestType:WrCancel; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.Requestor := machineID; out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.DataBlk.alloc(block_size_bytes); @@ -1339,8 +1392,10 @@ } action(pso_probeSharedDataOwner, "pso", desc="probe shared data at owner") { - MachineID tcc := mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits); + MachineID tcc := mapAddressToRange(address, + MachineType:TCC, + tcc_low_bit, + tcc_num_bits); if (cache_entry.Owner.isElement(tcc)) { enqueue(w_probeTCC_out, TDProbeRequestMsg, 1) { out_msg.addr := address; @@ -1420,7 +1475,10 @@ action(uu_sendUnblock, "uu", desc="state changed, unblock") { enqueue(unblockToNB_out, UnblockMsg, issue_latency) { out_msg.addr := address; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.MessageSize := MessageSizeType:Unblock_Control; DPRINTF(RubySlicc, "%s\n", out_msg); } @@ -1543,7 +1601,10 @@ enqueue(responseToNB_out, ResponseMsg, issue_latency) { out_msg.addr := address; out_msg.Type := CoherenceResponseType:StaleNotif; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.Sender := machineID; out_msg.MessageSize := MessageSizeType:Response_Control; out_msg.DataBlk.alloc(block_size_bytes); @@ -1555,7 +1616,10 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUData; out_msg.Sender := machineID; - out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:Directory, + dir_low_bit, + dir_num_bits)); out_msg.DataBlk := tbe.DataBlk; out_msg.Dirty := tbe.Dirty; if (tbe.Shared) { diff --git a/src/mem/protocol/GPU_RfO-TCP.sm b/src/mem/protocol/GPU_RfO-TCP.sm --- a/src/mem/protocol/GPU_RfO-TCP.sm +++ b/src/mem/protocol/GPU_RfO-TCP.sm @@ -38,9 +38,12 @@ Sequencer* sequencer; bool use_seq_not_coal; CacheMemory * L1cache; - int TCC_select_num_bits; Cycles issue_latency := 40; // time to send data down to TCC Cycles l2_hit_latency := 18; + int tcc_low_bit; + int tcc_num_bits; + int tccdir_low_bit; + int tccdir_num_bits; MessageBuffer * requestFromTCP, network="To", virtual_network="1", vnet_type="request"; MessageBuffer * responseFromTCP, network="To", virtual_network="3", vnet_type="response"; @@ -125,7 +128,6 @@ } TBETable * TBEs, template="", constructor="m_number_of_TBEs, m_block_size_bytes"; - int TCC_select_low_bit, default="getBlockSizeBits()"; Tick clockEdge(); Tick cyclesToTicks(Cycles c); @@ -368,8 +370,10 @@ out_msg.addr := address; out_msg.Type := CoherenceRequestType:RdBlk; out_msg.Requestor := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCCdir, + tccdir_low_bit, + tccdir_num_bits)); out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.InitialRequestTime := curCycle(); out_msg.DataBlk.alloc(block_size_bytes); @@ -384,8 +388,10 @@ out_msg.addr := address; out_msg.Type := CoherenceRequestType:RdBlkM; out_msg.Requestor := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCCdir, + tccdir_low_bit, + tccdir_num_bits)); out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.InitialRequestTime := curCycle(); out_msg.DataBlk.alloc(block_size_bytes); @@ -401,8 +407,10 @@ out_msg.Requestor := machineID; assert(is_valid(cache_entry)); out_msg.DataBlk := cache_entry.DataBlk; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCCdir, + tccdir_low_bit, + tccdir_num_bits)); out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.Type := CoherenceRequestType:VicDirty; out_msg.InitialRequestTime := curCycle(); @@ -422,8 +430,10 @@ enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { out_msg.addr := address; out_msg.Requestor := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCCdir, + tccdir_low_bit, + tccdir_num_bits)); out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.Type := CoherenceRequestType:VicClean; out_msg.InitialRequestTime := curCycle(); @@ -559,8 +569,10 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:StaleNotif; out_msg.Sender := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCC, + tcc_low_bit, + tcc_num_bits)); out_msg.MessageSize := MessageSizeType:Response_Control; out_msg.DataBlk.alloc(block_size_bytes); DPRINTF(RubySlicc, "%s\n", out_msg); @@ -574,8 +586,10 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUData; out_msg.Sender := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCC, + tcc_low_bit, + tcc_num_bits)); out_msg.DataBlk := tbe.DataBlk; out_msg.Dirty := tbe.Dirty; if (tbe.Shared) { @@ -596,8 +610,10 @@ out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC, L3 respond in same way to probes out_msg.Sender := machineID; // will this always be ok? probably not for multisocket - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCCdir, + tccdir_low_bit, + tccdir_num_bits)); out_msg.Dirty := false; out_msg.Hit := false; out_msg.Ntsl := true; @@ -613,8 +629,10 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC, L3 respond in same way to probes out_msg.Sender := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCCdir, + tccdir_low_bit, + tccdir_num_bits)); out_msg.Dirty := false; out_msg.Hit := false; out_msg.Ntsl := true; @@ -630,8 +648,10 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and TCC respond in same way to probes out_msg.Sender := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCCdir, + tccdir_low_bit, + tccdir_num_bits)); out_msg.Dirty := false; out_msg.Ntsl := true; out_msg.Hit := false; @@ -647,8 +667,10 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and TCC respond in same way to probes out_msg.Sender := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCCdir, + tccdir_low_bit, + tccdir_num_bits)); out_msg.Dirty := false; // only true if sending back data i think out_msg.Hit := false; out_msg.Ntsl := false; @@ -665,8 +687,10 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUPrbResp; out_msg.Sender := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCCdir, + tccdir_low_bit, + tccdir_num_bits)); out_msg.DataBlk := getDataBlock(address); if (is_valid(tbe)) { out_msg.Dirty := tbe.Dirty; @@ -689,8 +713,10 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUPrbResp; out_msg.Sender := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCCdir, + tccdir_low_bit, + tccdir_num_bits)); out_msg.DataBlk := getDataBlock(address); if (is_valid(tbe)) { out_msg.Dirty := tbe.Dirty; @@ -720,8 +746,10 @@ enqueue(unblockNetwork_out, UnblockMsg, issue_latency) { out_msg.addr := address; out_msg.Sender := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCCdir, + tccdir_low_bit, + tccdir_num_bits)); out_msg.MessageSize := MessageSizeType:Unblock_Control; out_msg.wasValid := isValid(address); DPRINTF(RubySlicc, "%s\n", out_msg); diff --git a/src/mem/protocol/GPU_RfO-SQC.sm b/src/mem/protocol/GPU_RfO-SQC.sm --- a/src/mem/protocol/GPU_RfO-SQC.sm +++ b/src/mem/protocol/GPU_RfO-SQC.sm @@ -36,9 +36,12 @@ machine(MachineType:SQC, "GPU SQC (L1 I Cache)") : Sequencer* sequencer; CacheMemory * L1cache; - int TCC_select_num_bits; Cycles issue_latency := 80; // time to send data down to TCC Cycles l2_hit_latency := 18; + int tcc_low_bit; + int tcc_num_bits; + int tccdir_low_bit; + int tccdir_num_bits; MessageBuffer * requestFromSQC, network="To", virtual_network="1", vnet_type="request"; MessageBuffer * responseFromSQC, network="To", virtual_network="3", vnet_type="response"; @@ -107,7 +110,6 @@ } TBETable * TBEs, template="", constructor="m_number_of_TBEs, m_block_size_bytes"; - int TCC_select_low_bit, default="getBlockSizeBits()"; Tick clockEdge(); Tick cyclesToTicks(Cycles c); @@ -306,8 +308,10 @@ out_msg.addr := address; out_msg.Type := CoherenceRequestType:RdBlkS; out_msg.Requestor := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCCdir, + tccdir_low_bit, + tccdir_num_bits)); out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.InitialRequestTime := curCycle(); out_msg.DataBlk.alloc(block_size_bytes); @@ -321,8 +325,10 @@ enqueue(requestNetwork_out, CPURequestMsg, issue_latency) { out_msg.addr := address; out_msg.Requestor := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCCdir, + tccdir_low_bit, + tccdir_num_bits)); out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.Type := CoherenceRequestType:VicClean; out_msg.InitialRequestTime := curCycle(); @@ -407,8 +413,10 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:StaleNotif; out_msg.Sender := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCC, + tcc_low_bit, + tcc_num_bits)); out_msg.MessageSize := MessageSizeType:Response_Control; out_msg.DataBlk.alloc(block_size_bytes); DPRINTF(RubySlicc, "%s\n", out_msg); @@ -422,8 +430,10 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUData; out_msg.Sender := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCC, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCC, + tcc_low_bit, + tcc_num_bits)); out_msg.DataBlk := tbe.DataBlk; out_msg.Dirty := tbe.Dirty; if (tbe.Shared) { @@ -444,8 +454,10 @@ out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes out_msg.Sender := machineID; // will this always be ok? probably not for multisocket - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCCdir, + tccdir_low_bit, + tccdir_num_bits)); out_msg.Dirty := false; out_msg.Hit := false; out_msg.Ntsl := true; @@ -461,8 +473,10 @@ out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes out_msg.Sender := machineID; // will this always be ok? probably not for multisocket - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCCdir, + tccdir_low_bit, + tccdir_num_bits)); out_msg.Dirty := false; out_msg.Ntsl := true; out_msg.Hit := false; @@ -478,8 +492,10 @@ out_msg.Type := CoherenceResponseType:CPUPrbResp; // L3 and CPUs respond in same way to probes out_msg.Sender := machineID; // will this always be ok? probably not for multisocket - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCCdir, + tccdir_low_bit, + tccdir_num_bits)); out_msg.Dirty := false; // only true if sending back data i think out_msg.Hit := false; out_msg.Ntsl := false; @@ -496,8 +512,10 @@ out_msg.Type := CoherenceResponseType:CPUPrbResp; out_msg.Sender := machineID; // will this always be ok? probably not for multisocket - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCCdir, + tccdir_low_bit, + tccdir_num_bits)); out_msg.DataBlk := getDataBlock(address); if (is_valid(tbe)) { out_msg.Dirty := tbe.Dirty; @@ -518,8 +536,10 @@ out_msg.Type := CoherenceResponseType:CPUPrbResp; out_msg.Sender := machineID; // will this always be ok? probably not for multisocket - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCCdir, + tccdir_low_bit, + tccdir_num_bits)); out_msg.DataBlk := getDataBlock(address); if (is_valid(tbe)) { out_msg.Dirty := tbe.Dirty; @@ -541,8 +561,10 @@ enqueue(unblockNetwork_out, UnblockMsg, issue_latency) { out_msg.addr := address; out_msg.Sender := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCCdir, + tccdir_low_bit, + tccdir_num_bits)); out_msg.MessageSize := MessageSizeType:Unblock_Control; DPRINTF(RubySlicc, "%s\n", out_msg); } diff --git a/src/mem/protocol/GPU_RfO-TCC.sm b/src/mem/protocol/GPU_RfO-TCC.sm --- a/src/mem/protocol/GPU_RfO-TCC.sm +++ b/src/mem/protocol/GPU_RfO-TCC.sm @@ -41,9 +41,10 @@ WireBuffer * w_reqToTCC; WireBuffer * w_probeToTCC; WireBuffer * w_respToTCC; - int TCC_select_num_bits; Cycles l2_request_latency := 1; Cycles l2_response_latency := 20; + int tccdir_low_bit; + int tccdir_num_bits; // To the general response network MessageBuffer * responseFromTCC, network="To", virtual_network="3", vnet_type="response"; @@ -147,7 +148,6 @@ } TBETable * TBEs, template="", constructor="m_number_of_TBEs, m_block_size_bytes"; - int TCC_select_low_bit, default="getBlockSizeBits()"; void set_cache_entry(AbstractCacheEntry b); void unset_cache_entry(); @@ -421,8 +421,10 @@ out_msg.addr := address; out_msg.Type := in_msg.Type; out_msg.Requestor := in_msg.Requestor; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCCdir, + tccdir_low_bit, + tccdir_num_bits)); out_msg.Shared := false; // unneeded for this request out_msg.MessageSize := in_msg.MessageSize; out_msg.DataBlk.alloc(block_size_bytes); @@ -455,8 +457,10 @@ out_msg.Type := CoherenceRequestType:VicClean; out_msg.Requestor := machineID; out_msg.DataBlk := cache_entry.DataBlk; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCCdir, + tccdir_low_bit, + tccdir_num_bits)); out_msg.MessageSize := MessageSizeType:Response_Data; if (out_msg.writeMask.isEmpty() == true) { out_msg.writeMask.clear(block_size_bytes); @@ -470,8 +474,10 @@ out_msg.Type := CoherenceRequestType:VicDirty; out_msg.Requestor := machineID; out_msg.DataBlk := cache_entry.DataBlk; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCCdir, + tccdir_low_bit, + tccdir_num_bits)); out_msg.MessageSize := MessageSizeType:Response_Data; if (out_msg.writeMask.isEmpty() == true) { out_msg.writeMask.clear(block_size_bytes); @@ -498,8 +504,10 @@ out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC and CPUs respond in same way to probes out_msg.Sender := machineID; // will this always be ok? probably not for multisocket - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCCdir, + tccdir_low_bit, + tccdir_num_bits)); out_msg.Dirty := false; out_msg.Hit := false; out_msg.Ntsl := true; @@ -515,8 +523,10 @@ out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC and CPUs respond in same way to probes out_msg.Sender := machineID; // will this always be ok? probably not for multisocket - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCCdir, + tccdir_low_bit, + tccdir_num_bits)); out_msg.Dirty := false; out_msg.Hit := true; out_msg.Ntsl := false; @@ -532,8 +542,10 @@ out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC and CPUs respond in same way to probes out_msg.Sender := machineID; // will this always be ok? probably not for multisocket - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCCdir, + tccdir_low_bit, + tccdir_num_bits)); out_msg.Dirty := false; out_msg.Hit := false; out_msg.Ntsl := false; @@ -549,8 +561,10 @@ out_msg.Type := CoherenceResponseType:CPUPrbResp; // TCC and CPUs respond in same way to probes out_msg.Sender := machineID; // will this always be ok? probably not for multisocket - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCCdir, + tccdir_low_bit, + tccdir_num_bits)); out_msg.DataBlk := cache_entry.DataBlk; //assert(cache_entry.Dirty); Not needed in TCC where TCC can supply clean data out_msg.Dirty := cache_entry.Dirty; @@ -565,8 +579,10 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUPrbResp; out_msg.Sender := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCCdir, + tccdir_low_bit, + tccdir_num_bits)); out_msg.DataBlk := tbe.DataBlk; //assert(tbe.Dirty); out_msg.Dirty := tbe.Dirty; @@ -582,8 +598,10 @@ out_msg.addr := address; out_msg.Type := CoherenceRequestType:WrCancel; out_msg.Requestor := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCCdir, + tccdir_low_bit, + tccdir_num_bits)); out_msg.MessageSize := MessageSizeType:Request_Control; out_msg.DataBlk.alloc(block_size_bytes); if (out_msg.writeMask.isEmpty() == true) { @@ -630,8 +648,10 @@ out_msg.addr := address; out_msg.Type := CoherenceResponseType:CPUData; out_msg.Sender := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCCdir, + tccdir_low_bit, + tccdir_num_bits)); out_msg.DataBlk := tbe.DataBlk; out_msg.Dirty := tbe.Dirty; if (tbe.Shared) { @@ -656,8 +676,10 @@ enqueue(w_unblockNetwork_out, UnblockMsg, l2_request_latency) { out_msg.addr := address; out_msg.Sender := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCCdir, + tccdir_low_bit, + tccdir_num_bits)); out_msg.MessageSize := MessageSizeType:Unblock_Control; out_msg.currentOwner := true; out_msg.valid := true; @@ -669,8 +691,10 @@ enqueue(w_unblockNetwork_out, UnblockMsg, l2_request_latency) { out_msg.addr := address; out_msg.Sender := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCCdir, + tccdir_low_bit, + tccdir_num_bits)); out_msg.MessageSize := MessageSizeType:Unblock_Control; out_msg.currentOwner := false; out_msg.valid := true; @@ -682,8 +706,10 @@ enqueue(w_unblockNetwork_out, UnblockMsg, l2_request_latency) { out_msg.addr := address; out_msg.Sender := machineID; - out_msg.Destination.add(mapAddressToRange(address,MachineType:TCCdir, - TCC_select_low_bit, TCC_select_num_bits)); + out_msg.Destination.add(mapAddressToRange(address, + MachineType:TCCdir, + tccdir_low_bit, + tccdir_num_bits)); out_msg.MessageSize := MessageSizeType:Unblock_Control; out_msg.currentOwner := false; out_msg.valid := false; diff --git a/configs/ruby/MI_example.py b/configs/ruby/MI_example.py --- a/configs/ruby/MI_example.py +++ b/configs/ruby/MI_example.py @@ -63,6 +63,7 @@ # controller constructors are called before the network constructor # block_size_bits = int(math.log(options.cacheline_size, 2)) + dir_bits = int(math.log(options.num_dirs, 2)) for i in xrange(options.num_cpus): # @@ -74,7 +75,6 @@ assoc = options.l1d_assoc, start_index_bit = block_size_bits) - # the ruby random tester reuses num_cpus to specify the # number of cpu ports connected to the tester object, which # is stored in system.cpu. because there is only ever one @@ -91,6 +91,8 @@ l1_cntrl = L1Cache_Controller(version=i, cacheMemory=cache, send_evictions=send_evicts(options), transitions_per_cycle=options.ports, + dir_select_low_bit = block_size_bits, + dir_select_num_bits = dir_bits, clk_domain=clk_domain, ruby_system=ruby_system) @@ -130,10 +132,10 @@ dir_size = MemorySize('0B') dir_size.value = mem_module_size dir_cntrl = Directory_Controller(version = i, - directory = RubyDirectoryMemory( - version = i, size = dir_size), - transitions_per_cycle = options.ports, - ruby_system = ruby_system) + directory = RubyDirectoryMemory(version = i, size = dir_size, + num_directories = options.num_dirs), + transitions_per_cycle = options.ports, + ruby_system = ruby_system) exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) dir_cntrl_nodes.append(dir_cntrl) @@ -163,7 +165,9 @@ dma_cntrl = DMA_Controller(version = i, dma_sequencer = dma_seq, transitions_per_cycle = options.ports, - ruby_system = ruby_system) + ruby_system = ruby_system, + dir_select_low_bit = block_size_bits, + dir_select_num_bits = dir_bits) exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) exec("ruby_system.dma_cntrl%d.dma_sequencer.slave = dma_port" % i) @@ -184,7 +188,9 @@ ruby_system._io_port = io_seq io_controller = DMA_Controller(version = len(dma_ports), dma_sequencer = io_seq, - ruby_system = ruby_system) + ruby_system = ruby_system, + dir_select_low_bit = block_size_bits, + dir_select_num_bits = dir_bits) ruby_system.io_controller = io_controller # Connect the dma controller to the network diff --git a/configs/ruby/MOESI_AMD_Base.py b/configs/ruby/MOESI_AMD_Base.py --- a/configs/ruby/MOESI_AMD_Base.py +++ b/configs/ruby/MOESI_AMD_Base.py @@ -182,7 +182,7 @@ self.response_latency = 30 - self.directory = DirMem() + self.directory = DirMem(num_directories=options.num_dirs) self.directory.create(options, ruby_system, system) self.L3CacheMemory = L3Cache() @@ -237,6 +237,9 @@ control_count = 0 + block_size_bits = int(math.log(options.cacheline_size, 2)) + dir_num_bits = int(math.log(options.num_dirs, 2)) + # # Must create the individual controllers before the network to ensure # the controller constructors are called before the network constructor @@ -247,7 +250,10 @@ mainCluster = Cluster(extBW = 512, intBW = 512) # 1 TB/s for i in xrange(options.num_dirs): - dir_cntrl = DirCntrl(TCC_select_num_bits = 0) + dir_cntrl = DirCntrl(tcc_low_bit=block_size_bits, + tcc_num_bits=0, + tccdir_low_bit=block_size_bits, + tccdir_num_bits=0) dir_cntrl.create(options, ruby_system, system) # Connect the Directory controller to the ruby network @@ -285,7 +291,8 @@ cpuCluster = Cluster(extBW = 512, intBW = 512) # 1 TB/s for i in xrange((options.num_cpus + 1) / 2): - cp_cntrl = CPCntrl() + cp_cntrl = CPCntrl(dir_low_bit=block_size_bits, + dir_num_bits=dir_num_bits) cp_cntrl.create(options, ruby_system, system) exec("system.cp_cntrl%d = cp_cntrl" % i) diff --git a/configs/ruby/MOESI_CMP_directory.py b/configs/ruby/MOESI_CMP_directory.py --- a/configs/ruby/MOESI_CMP_directory.py +++ b/configs/ruby/MOESI_CMP_directory.py @@ -125,6 +125,7 @@ l2_index_start = block_size_bits + l2_bits + dir_bits = int(math.log(options.num_dirs, 2)) for i in xrange(options.num_l2caches): # @@ -134,10 +135,10 @@ assoc = options.l2_assoc, start_index_bit = l2_index_start) - l2_cntrl = L2Cache_Controller(version = i, - L2cache = l2_cache, - transitions_per_cycle = options.ports, - ruby_system = ruby_system) + l2_cntrl = L2Cache_Controller(version = i, L2cache = l2_cache, + transitions_per_cycle = options.ports, ruby_system = + ruby_system, dir_select_low_bit = block_size_bits, + dir_select_num_bits = dir_bits) exec("ruby_system.l2_cntrl%d = l2_cntrl" % i) l2_cntrl_nodes.append(l2_cntrl) @@ -163,7 +164,6 @@ assert(phys_mem_size % options.num_dirs == 0) mem_module_size = phys_mem_size / options.num_dirs - # Run each of the ruby memory controllers at a ratio of the frequency of # the ruby system. # clk_divider value is a fix to pass regression. @@ -176,10 +176,11 @@ dir_size.value = mem_module_size dir_cntrl = Directory_Controller(version = i, - directory = RubyDirectoryMemory( - version = i, size = dir_size), - transitions_per_cycle = options.ports, - ruby_system = ruby_system) + directory = RubyDirectoryMemory(version = i, + size = dir_size, + num_directories = options.num_dirs), + transitions_per_cycle = options.ports, + ruby_system = ruby_system) exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) dir_cntrl_nodes.append(dir_cntrl) @@ -207,7 +208,9 @@ dma_cntrl = DMA_Controller(version = i, dma_sequencer = dma_seq, transitions_per_cycle = options.ports, - ruby_system = ruby_system) + ruby_system = ruby_system, + dir_select_low_bit = block_size_bits, + dir_select_num_bits = dir_bits) exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) dma_cntrl_nodes.append(dma_cntrl) @@ -234,7 +237,9 @@ ruby_system._io_port = io_seq io_controller = DMA_Controller(version = len(dma_ports), dma_sequencer = io_seq, - ruby_system = ruby_system) + ruby_system = ruby_system, + dir_select_low_bit = block_size_bits, + dir_select_num_bits = dir_bits) ruby_system.io_controller = io_controller # Connect the dma controller to the network diff --git a/configs/ruby/MOESI_CMP_token.py b/configs/ruby/MOESI_CMP_token.py --- a/configs/ruby/MOESI_CMP_token.py +++ b/configs/ruby/MOESI_CMP_token.py @@ -78,6 +78,7 @@ # controller constructors are called before the network constructor # l2_bits = int(math.log(options.num_l2caches, 2)) + dir_bits = int(math.log(options.num_dirs, 2)) block_size_bits = int(math.log(options.cacheline_size, 2)) for i in xrange(options.num_cpus): @@ -105,6 +106,7 @@ l1_cntrl = L1Cache_Controller(version=i, L1Icache=l1i_cache, L1Dcache=l1d_cache, + l2_select_low_bit = block_size_bits, l2_select_num_bits=l2_bits, N_tokens=n_tokens, retry_threshold=options.l1_retries, @@ -117,7 +119,9 @@ send_evictions=send_evicts(options), transitions_per_cycle=options.ports, clk_domain=clk_domain, - ruby_system=ruby_system) + ruby_system=ruby_system, + dir_select_low_bit=block_size_bits, + dir_select_num_bits=dir_bits) cpu_seq = RubySequencer(version=i, icache=l1i_cache, dcache=l1d_cache, clk_domain=clk_domain, @@ -161,7 +165,9 @@ L2cache = l2_cache, N_tokens = n_tokens, transitions_per_cycle = options.ports, - ruby_system = ruby_system) + ruby_system = ruby_system, + dir_select_low_bit = block_size_bits, + dir_select_num_bits = dir_bits) exec("ruby_system.l2_cntrl%d = l2_cntrl" % i) l2_cntrl_nodes.append(l2_cntrl) @@ -199,12 +205,16 @@ dir_size = MemorySize('0B') dir_size.value = mem_module_size - dir_cntrl = Directory_Controller(version = i, - directory = RubyDirectoryMemory( - version = i, size = dir_size), - l2_select_num_bits = l2_bits, - transitions_per_cycle = options.ports, - ruby_system = ruby_system) + dir_cntrl = Directory_Controller(version=i, + directory=RubyDirectoryMemory( + version=i, size=dir_size, + num_directories=options.num_dirs), + l2_select_low_bit=block_size_bits, + l2_select_num_bits=l2_bits, + transitions_per_cycle=options.ports, + ruby_system=ruby_system, + dir_select_low_bit=block_size_bits, + dir_select_num_bits=dir_bits) exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) dir_cntrl_nodes.append(dir_cntrl) @@ -241,7 +251,9 @@ dma_cntrl = DMA_Controller(version = i, dma_sequencer = dma_seq, transitions_per_cycle = options.ports, - ruby_system = ruby_system) + ruby_system = ruby_system, + dir_select_low_bit = block_size_bits, + dir_select_num_bits = dir_bits) exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) dma_cntrl_nodes.append(dma_cntrl) @@ -264,7 +276,9 @@ ruby_system._io_port = io_seq io_controller = DMA_Controller(version = len(dma_ports), dma_sequencer = io_seq, - ruby_system = ruby_system) + ruby_system = ruby_system, + dir_select_low_bit = block_size_bits, + dir_select_num_bits = dir_bits) ruby_system.io_controller = io_controller # Connect the dma controller to the network diff --git a/configs/ruby/MOESI_hammer.py b/configs/ruby/MOESI_hammer.py --- a/configs/ruby/MOESI_hammer.py +++ b/configs/ruby/MOESI_hammer.py @@ -73,6 +73,7 @@ # controller constructors are called before the network constructor # block_size_bits = int(math.log(options.cacheline_size, 2)) + dir_bits = int(math.log(options.num_dirs, 2)) for i in xrange(options.num_cpus): # @@ -108,7 +109,11 @@ send_evictions=send_evicts(options), transitions_per_cycle=options.ports, clk_domain=clk_domain, - ruby_system=ruby_system) + ruby_system=ruby_system, + num_l1_machines=options.num_cpus, + dir_select_low_bit= + options.numa_high_bit + dir_bits - 1, + dir_select_num_bits=dir_bits) cpu_seq = RubySequencer(version=i, icache=l1i_cache, dcache=l1d_cache,clk_domain=clk_domain, @@ -154,7 +159,6 @@ # pf_size = MemorySize(options.l2_size) pf_size.value = pf_size.value * 2 - dir_bits = int(math.log(options.num_dirs, 2)) pf_bits = int(math.log(pf_size.value, 2)) if options.numa_high_bit: if options.pf_on or options.dir_on: @@ -184,14 +188,14 @@ pf = ProbeFilter(size = pf_size, assoc = 4, start_index_bit = pf_start_bit) - dir_cntrl = Directory_Controller(version = i, - directory = RubyDirectoryMemory( - version = i, size = dir_size), - probeFilter = pf, - probe_filter_enabled = options.pf_on, - full_bit_dir_enabled = options.dir_on, - transitions_per_cycle = options.ports, - ruby_system = ruby_system) + dir_cntrl = Directory_Controller(version=i, + directory=RubyDirectoryMemory(version=i, + size=dir_size, num_directories=options.num_dirs), + probeFilter=pf, probe_filter_enabled=options.pf_on, + full_bit_dir_enabled=options.dir_on, + transitions_per_cycle=options.ports, + num_l1_machines=options.num_cpus, + ruby_system=ruby_system) if options.recycle_latency: dir_cntrl.recycle_latency = options.recycle_latency @@ -228,10 +232,11 @@ ruby_system = ruby_system, slave = dma_port) - dma_cntrl = DMA_Controller(version = i, - dma_sequencer = dma_seq, - transitions_per_cycle = options.ports, - ruby_system = ruby_system) + dma_cntrl = DMA_Controller(version = i, dma_sequencer = dma_seq, + transitions_per_cycle = options.ports, + ruby_system = ruby_system, + dir_select_low_bit = options.numa_high_bit + dir_bits - 1, + dir_select_num_bits = dir_bits) exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) dma_cntrl_nodes.append(dma_cntrl) @@ -252,9 +257,12 @@ if full_system: io_seq = DMASequencer(version=len(dma_ports), ruby_system=ruby_system) ruby_system._io_port = io_seq + io_controller = DMA_Controller(version = len(dma_ports), - dma_sequencer = io_seq, - ruby_system = ruby_system) + dma_sequencer = io_seq, ruby_system = ruby_system, + dir_select_low_bit = options.numa_high_bit + dir_bits - 1, + dir_select_num_bits = dir_bits) + ruby_system.io_controller = io_controller # Connect the dma controller to the network diff --git a/configs/ruby/Network_test.py b/configs/ruby/Network_test.py --- a/configs/ruby/Network_test.py +++ b/configs/ruby/Network_test.py @@ -27,6 +27,7 @@ # # Authors: Brad Beckmann +import math import m5 from m5.objects import * from m5.defines import buildEnv @@ -61,6 +62,9 @@ l1_cntrl_nodes = [] dir_cntrl_nodes = [] + block_size_bits = int(math.log(options.cacheline_size, 2)) + dir_bits = int(math.log(options.num_dirs, 2)) + # # Must create the individual controllers before the network to ensure the # controller constructors are called before the network constructor @@ -79,8 +83,10 @@ # Only one unified L1 cache exists. Can cache instructions and data. # l1_cntrl = L1Cache_Controller(version = i, - cacheMemory = cache, - ruby_system = ruby_system) + cacheMemory = cache, ruby_system = + ruby_system, dir_select_low_bit = + block_size_bits, dir_select_num_bits = + dir_bits) cpu_seq = RubySequencer(icache = cache, dcache = cache, @@ -109,11 +115,10 @@ dir_size = MemorySize('0B') dir_size.value = mem_module_size - dir_cntrl = Directory_Controller(version = i, - directory = \ - RubyDirectoryMemory(version = i, - size = dir_size), - ruby_system = ruby_system) + dir_cntrl = Directory_Controller( + version=i, directory=RubyDirectoryMemory(version=i, size= + dir_size, num_directories=options.num_dirs), + ruby_system=ruby_system) exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) dir_cntrl_nodes.append(dir_cntrl) diff --git a/configs/ruby/GPU_VIPER_Baseline.py b/configs/ruby/GPU_VIPER_Baseline.py --- a/configs/ruby/GPU_VIPER_Baseline.py +++ b/configs/ruby/GPU_VIPER_Baseline.py @@ -303,7 +303,7 @@ def create(self, options, ruby_system, system): self.version = self.versionCount() self.response_latency = 30 - self.directory = DirMem() + self.directory = DirMem(num_directories=options.num_dirs) self.directory.create(options, ruby_system, system) self.L3CacheMemory = L3Cache() self.L3CacheMemory.create(options, ruby_system, system) @@ -398,8 +398,9 @@ # controller constructors are called before the network constructor # - # For an odd number of CPUs, still create the right number of controllers - TCC_bits = int(math.log(options.num_tccs, 2)) + block_size_bits = int(math.log(options.cacheline_size, 2)) + tcc_num_bits = int(math.log(options.num_tccs, 2)) + dir_num_bits = int(math.log(options.num_dirs, 2)) # This is the base crossbar that connects the L3s, Dirs, and cpu/gpu # Clusters @@ -407,7 +408,11 @@ mainCluster = Cluster(intBW = crossbar_bw) for i in xrange(options.num_dirs): - dir_cntrl = DirCntrl(noTCCdir=True,TCC_select_num_bits = TCC_bits) + dir_cntrl = DirCntrl(noTCCdir=True, + tcc_low_bit=block_size_bits, + tcc_num_bits=tcc_num_bits, + tccdir_low_bit=block_size_bits, + tccdir_num_bits=0) dir_cntrl.create(options, ruby_system, system) dir_cntrl.number_of_TBEs = options.num_tbes dir_cntrl.useL3OnWT = options.use_L3_on_WT @@ -440,7 +445,8 @@ cpuCluster = Cluster(extBW = crossbar_bw, intBW=crossbar_bw) for i in xrange((options.num_cpus + 1) / 2): - cp_cntrl = CPCntrl() + cp_cntrl = CPCntrl(dir_low_bit=block_size_bits, + dir_num_bits=dir_num_bits) cp_cntrl.create(options, ruby_system, system) exec("system.cp_cntrl%d = cp_cntrl" % i) @@ -473,9 +479,10 @@ gpuCluster = Cluster(extBW = crossbar_bw, intBW = crossbar_bw) for i in xrange(options.num_compute_units): - tcp_cntrl = TCPCntrl(TCC_select_num_bits = TCC_bits, - issue_latency = 1, - number_of_TBEs = 2560) + tcp_cntrl = TCPCntrl(tcc_low_bit=block_size_bits, + tcc_num_bits=tcc_num_bits, + issue_latency=1, + number_of_TBEs=2560) # TBEs set to max outstanding requests tcp_cntrl.create(options, ruby_system, system) tcp_cntrl.WB = options.WB_L1 @@ -510,7 +517,8 @@ for i in xrange(options.num_sqc): - sqc_cntrl = SQCCntrl(TCC_select_num_bits = TCC_bits) + sqc_cntrl = SQCCntrl(tcc_low_bit=block_size_bits, + tcc_num_bits=tcc_num_bits) sqc_cntrl.create(options, ruby_system, system) exec("system.sqc_cntrl%d = sqc_cntrl" % i) @@ -539,7 +547,8 @@ for i in xrange(options.num_tccs): - tcc_cntrl = TCCCntrl() + tcc_cntrl = TCCCntrl(dir_low_bit=block_size_bits, + dir_num_bits=dir_num_bits) tcc_cntrl.create(options, ruby_system, system) tcc_cntrl.l2_request_latency = options.gpu_to_dir_latency tcc_cntrl.l2_response_latency = options.TCC_latency diff --git a/configs/ruby/GPU_VIPER_Region.py b/configs/ruby/GPU_VIPER_Region.py --- a/configs/ruby/GPU_VIPER_Region.py +++ b/configs/ruby/GPU_VIPER_Region.py @@ -293,7 +293,7 @@ self.version = self.versionCount() self.response_latency = 25 self.response_latency_regionDir = 1 - self.directory = DirMem() + self.directory = DirMem(num_directories=options.num_dirs) self.directory.create(options, ruby_system, system) self.L3CacheMemory = L3Cache() self.L3CacheMemory.create(options, ruby_system, system) @@ -378,8 +378,7 @@ self.toDirLatency = options.gpu_to_dir_latency self.toRegionDirLatency = options.cpu_to_dir_latency self.noTCCdir = True - TCC_bits = int(math.log(options.num_tccs, 2)) - self.TCC_select_num_bits = TCC_bits + self.tcc_num_bits = int(math.log(options.num_tccs, 2)) self.ruby_system = ruby_system if options.recycle_latency: @@ -456,23 +455,29 @@ # dir_cntrl_nodes = [] - # For an odd number of CPUs, still create the right number of controllers - TCC_bits = int(math.log(options.num_tccs, 2)) + block_size_bits = int(math.log(options.cacheline_size, 2)) + tcc_num_bits = int(math.log(options.num_tccs, 2)) + dir_num_bits = int(math.log(options.num_dirs, 2)) # # Must create the individual controllers before the network to ensure the # controller constructors are called before the network constructor # - # For an odd number of CPUs, still create the right number of controllers crossbar_bw = 16 * options.num_compute_units #Assuming a 2GHz clock cpuCluster = Cluster(extBW = (crossbar_bw), intBW=crossbar_bw) for i in xrange((options.num_cpus + 1) / 2): - cp_cntrl = CPCntrl() + cp_cntrl = CPCntrl(dir_low_bit=block_size_bits, + dir_num_bits=dir_num_bits) cp_cntrl.create(options, ruby_system, system) - rb_cntrl = RBCntrl() + rb_cntrl = RBCntrl(dir_low_bit=block_size_bits, + dir_num_bits=dir_num_bits, + tcc_low_bit=block_size_bits, + tcc_num_bits=tcc_num_bits, + regiondir_low_bit=block_size_bits, + regiondir_num_bits=dir_num_bits) rb_cntrl.create(options, ruby_system, system) rb_cntrl.number_of_TBEs = 256 rb_cntrl.isOnCPU = True @@ -535,9 +540,10 @@ gpuCluster = Cluster(extBW = (crossbar_bw), intBW = crossbar_bw) for i in xrange(options.num_compute_units): - tcp_cntrl = TCPCntrl(TCC_select_num_bits = TCC_bits, - issue_latency = 1, - number_of_TBEs = 2560) + tcp_cntrl = TCPCntrl(tcc_low_bit=block_size_bits, + tcc_num_bits=tcc_num_bits, + issue_latency=1, + number_of_TBEs=2560) # TBEs set to max outstanding requests tcp_cntrl.create(options, ruby_system, system) tcp_cntrl.WB = options.WB_L1 @@ -571,7 +577,8 @@ for i in xrange(options.num_sqc): - sqc_cntrl = SQCCntrl(TCC_select_num_bits = TCC_bits) + sqc_cntrl = SQCCntrl(tcc_low_bit=block_size_bits, + tcc_num_bits=tcc_num_bits) sqc_cntrl.create(options, ruby_system, system) exec("system.sqc_cntrl%d = sqc_cntrl" % i) @@ -599,7 +606,8 @@ for i in xrange(options.num_tccs): - tcc_cntrl = TCCCntrl() + tcc_cntrl = TCCCntrl(dir_low_bit=block_size_bits, + dir_num_bits=dir_num_bits) tcc_cntrl.create(options, ruby_system, system) tcc_cntrl.l2_request_latency = 1 tcc_cntrl.l2_response_latency = options.TCC_latency @@ -630,7 +638,12 @@ tcc_cntrl.triggerQueue = MessageBuffer(ordered = True) - rb_cntrl = RBCntrl() + rb_cntrl = RBCntrl(dir_low_bit=block_size_bits, + dir_num_bits=dir_num_bits, + tcc_low_bit=block_size_bits, + tcc_num_bits=tcc_num_bits, + regiondir_low_bit=block_size_bits, + regiondir_num_bits=dir_num_bits) rb_cntrl.create(options, ruby_system, system) rb_cntrl.number_of_TBEs = 2560 * options.num_compute_units rb_cntrl.isOnCPU = False @@ -676,7 +689,11 @@ # Clusters mainCluster = Cluster(intBW = crossbar_bw) - dir_cntrl = DirCntrl() + dir_cntrl = DirCntrl(region_low_bit=block_size_bits, + region_num_bits=dir_num_bits, + num_core_pair=int((options.num_cpus + 1) / 2), + num_tcp=options.num_compute_units, + num_sqc=options.num_sqc) dir_cntrl.create(options, ruby_system, system) dir_cntrl.number_of_TBEs = 2560 * options.num_compute_units dir_cntrl.useL3OnWT = options.use_L3_on_WT @@ -718,7 +735,11 @@ mainCluster.add(dir_cntrl) - reg_cntrl = RegionCntrl(noTCCdir=True,TCC_select_num_bits = TCC_bits) + reg_cntrl = RegionCntrl(noTCCdir=True, + tcc_low_bit=block_size_bits, + tcc_num_bits=0, + dir_low_bit=block_size_bits, + dir_num_bits=dir_num_bits) reg_cntrl.create(options, ruby_system, system) reg_cntrl.number_of_TBEs = options.num_tbes reg_cntrl.cpuRegionBufferNum = system.rb_cntrl0.version diff --git a/configs/ruby/MESI_Three_Level.py b/configs/ruby/MESI_Three_Level.py --- a/configs/ruby/MESI_Three_Level.py +++ b/configs/ruby/MESI_Three_Level.py @@ -76,6 +76,7 @@ num_l2caches_per_cluster = options.num_l2caches / options.num_clusters l2_bits = int(math.log(num_l2caches_per_cluster, 2)) + dir_bits = int(math.log(options.num_dirs, 2)) block_size_bits = int(math.log(options.cacheline_size, 2)) l2_index_start = block_size_bits + l2_bits @@ -128,8 +129,11 @@ l1_cntrl = L1Cache_Controller( version = i * num_cpus_per_cluster + j, - cache = l1_cache, l2_select_num_bits = l2_bits, - cluster_id = i, ruby_system = ruby_system) + cache = l1_cache, + l2_select_low_bit = block_size_bits, + l2_select_num_bits = l2_bits, + cluster_id = i, + ruby_system = ruby_system) exec("ruby_system.l0_cntrl%d = l0_cntrl" % ( i * num_cpus_per_cluster + j)) @@ -173,7 +177,9 @@ version = i * num_l2caches_per_cluster + j, L2cache = l2_cache, cluster_id = i, transitions_per_cycle = options.ports, - ruby_system = ruby_system) + ruby_system = ruby_system, + dir_select_low_bit = block_size_bits, + dir_select_num_bits = dir_bits) exec("ruby_system.l2_cntrl%d = l2_cntrl" % (i * num_l2caches_per_cluster + j)) @@ -204,6 +210,7 @@ ruby_system.memctrl_clk_domain = DerivedClockDomain( clk_domain = ruby_system.clk_domain, clk_divider = 3) + dma_bits = 0 for i in xrange(options.num_dirs): # # Create the Ruby objects associated with the directory controller @@ -211,10 +218,16 @@ dir_size = MemorySize('0B') dir_size.value = mem_module_size - dir_cntrl = Directory_Controller(version = i, - directory = RubyDirectoryMemory(version = i, size = dir_size), - transitions_per_cycle = options.ports, - ruby_system = ruby_system) + dir_cntrl = Directory_Controller( + version = i, + directory = RubyDirectoryMemory(version = i, + size = dir_size, + num_directories = options.num_dirs), + transitions_per_cycle = options.ports, + ruby_system = ruby_system, + dma_select_low_bit = 0, + dma_select_num_bits = dma_bits) + exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) dir_cntrl_nodes.append(dir_cntrl) @@ -237,7 +250,9 @@ dma_cntrl = DMA_Controller(version = i, dma_sequencer = dma_seq, transitions_per_cycle = options.ports, - ruby_system = ruby_system) + ruby_system = ruby_system, + dir_select_low_bit = block_size_bits, + dir_select_num_bits = dir_bits) exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) exec("ruby_system.dma_cntrl%d.dma_sequencer.slave = dma_port" % i) @@ -262,7 +277,9 @@ ruby_system._io_port = io_seq io_controller = DMA_Controller(version = len(dma_ports), dma_sequencer = io_seq, - ruby_system = ruby_system) + ruby_system = ruby_system, + dir_select_low_bit = block_size_bits, + dir_select_num_bits = dir_bits) ruby_system.io_controller = io_controller # Connect the dma controller to the network diff --git a/configs/ruby/MESI_Two_Level.py b/configs/ruby/MESI_Two_Level.py --- a/configs/ruby/MESI_Two_Level.py +++ b/configs/ruby/MESI_Two_Level.py @@ -96,6 +96,7 @@ l1_cntrl = L1Cache_Controller(version = i, L1Icache = l1i_cache, L1Dcache = l1d_cache, + l2_select_low_bit = block_size_bits, l2_select_num_bits = l2_bits, send_evictions = send_evicts(options), prefetcher = prefetcher, @@ -134,6 +135,7 @@ l2_index_start = block_size_bits + l2_bits + dir_bits = int(math.log(options.num_dirs, 2)) for i in xrange(options.num_l2caches): # @@ -146,7 +148,9 @@ l2_cntrl = L2Cache_Controller(version = i, L2cache = l2_cache, transitions_per_cycle = options.ports, - ruby_system = ruby_system) + ruby_system = ruby_system, + dir_select_low_bit = block_size_bits, + dir_select_num_bits = dir_bits) exec("ruby_system.l2_cntrl%d = l2_cntrl" % i) l2_cntrl_nodes.append(l2_cntrl) @@ -178,14 +182,21 @@ ruby_system.memctrl_clk_domain = DerivedClockDomain( clk_domain = ruby_system.clk_domain, clk_divider = 3) + dma_bits = 0 + if len(dma_ports) > 0: + dma_bits = int(math.log(len(dma_ports), 2)) for i in xrange(options.num_dirs): dir_size = MemorySize('0B') dir_size.value = mem_module_size dir_cntrl = Directory_Controller(version = i, - directory = RubyDirectoryMemory(version = i, size = dir_size), + directory = RubyDirectoryMemory(version = i, + size = dir_size, + num_directories = options.num_dirs), transitions_per_cycle = options.ports, + dma_select_low_bit = 0, + dma_select_num_bits = dma_bits, ruby_system = ruby_system) exec("ruby_system.dir_cntrl%d = dir_cntrl" % i) @@ -208,7 +219,9 @@ dma_cntrl = DMA_Controller(version = i, dma_sequencer = dma_seq, transitions_per_cycle = options.ports, - ruby_system = ruby_system) + ruby_system = ruby_system, + dir_select_low_bit = block_size_bits, + dir_select_num_bits = dir_bits) exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) dma_cntrl_nodes.append(dma_cntrl) @@ -231,8 +244,9 @@ ruby_system = ruby_system) ruby_system._io_port = io_seq io_controller = DMA_Controller(version = len(dma_ports), - dma_sequencer = io_seq, - ruby_system = ruby_system) + dma_sequencer = io_seq, ruby_system = ruby_system, + dir_select_low_bit = block_size_bits, + dir_select_num_bits = dir_bits) ruby_system.io_controller = io_controller # Connect the dma controller to the network # Node ID 9b3faddc9c0b3a4fc47be011965af918b23bf695 # Parent 859f55d0928e8add970853cd5859055979d267c2 diff --git a/configs/ruby/GPU_RfO.py b/configs/ruby/GPU_RfO.py --- a/configs/ruby/GPU_RfO.py +++ b/configs/ruby/GPU_RfO.py @@ -387,7 +387,7 @@ self.response_latency = 30 - self.directory = DirMem() + self.directory = DirMem(num_directories=options.num_dirs) self.directory.create(options, ruby_system, system) self.L3CacheMemory = L3Cache() @@ -462,14 +462,20 @@ # controller constructors are called before the network constructor # - TCC_bits = int(math.log(options.num_tccs, 2)) + block_size_bits = int(math.log(options.cacheline_size, 2)) + tcc_num_bits = int(math.log(options.num_tccs, 2)) + tccdir_num_bits = int(math.log(options.num_tccs, 2)) + dir_num_bits = int(math.log(options.num_dirs, 2)) # This is the base crossbar that connects the L3s, Dirs, and cpu/gpu # Clusters mainCluster = Cluster(extBW = 512, intBW = 512) # 1 TB/s for i in xrange(options.num_dirs): - dir_cntrl = DirCntrl(TCC_select_num_bits = TCC_bits) + dir_cntrl = DirCntrl(tccdir_low_bit=block_size_bits, + tccdir_num_bits=tccdir_num_bits, + tcc_low_bit=block_size_bits, + tcc_num_bits=tcc_num_bits) dir_cntrl.create(options, ruby_system, system) dir_cntrl.number_of_TBEs = 2560 * options.num_compute_units #Enough TBEs for all TCP TBEs @@ -499,11 +505,11 @@ mainCluster.add(dir_cntrl) - # For an odd number of CPUs, still create the right number of controllers cpuCluster = Cluster(extBW = 512, intBW = 512) # 1 TB/s for i in xrange((options.num_cpus + 1) / 2): - cp_cntrl = CPCntrl() + cp_cntrl = CPCntrl(dir_low_bit=block_size_bits, + dir_num_bits=dir_num_bits) cp_cntrl.create(options, ruby_system, system) exec("system.cp_cntrl%d = cp_cntrl" % i) @@ -537,8 +543,11 @@ for i in xrange(options.num_compute_units): - tcp_cntrl = TCPCntrl(TCC_select_num_bits = TCC_bits, - number_of_TBEs = 2560) # max outstanding requests + tcp_cntrl = TCPCntrl(tcc_low_bit=block_size_bits, + tcc_num_bits=tcc_num_bits, + tccdir_low_bit=block_size_bits, + tccdir_num_bits=tccdir_num_bits, + number_of_TBEs=2560) # max outstanding requests tcp_cntrl.create(options, ruby_system, system) exec("system.tcp_cntrl%d = tcp_cntrl" % i) @@ -570,7 +579,10 @@ for i in xrange(options.num_sqc): - sqc_cntrl = SQCCntrl(TCC_select_num_bits = TCC_bits) + sqc_cntrl = SQCCntrl(tcc_low_bit=block_size_bits, + tcc_num_bits=tcc_num_bits, + tccdir_low_bit=block_size_bits, + tccdir_num_bits=tccdir_num_bits) sqc_cntrl.create(options, ruby_system, system) exec("system.sqc_cntrl%d = sqc_cntrl" % i) @@ -602,8 +614,11 @@ for i in xrange(options.num_cp): - tcp_cntrl = TCPCntrl(TCC_select_num_bits = TCC_bits, - number_of_TBEs = 2560) # max outstanding requests + tcp_cntrl = TCPCntrl(tccdir_low_bit=block_size_bits, + tccdir_num_bits=tccdir_num_bits, + tcc_low_bit=block_size_bits, + tcc_num_bits=tcc_num_bits, + number_of_TBEs=2560) # max outstanding requests tcp_cntrl.createCP(options, ruby_system, system) exec("system.tcp_cntrl%d = tcp_cntrl" % (options.num_compute_units + i)) @@ -633,7 +648,10 @@ gpuCluster.add(tcp_cntrl) - sqc_cntrl = SQCCntrl(TCC_select_num_bits = TCC_bits) + sqc_cntrl = SQCCntrl(tcc_low_bit=block_size_bits, + tcc_num_bits=tcc_num_bits, + tccdir_low_bit=block_size_bits, + tccdir_num_bits=tccdir_num_bits) sqc_cntrl.createCP(options, ruby_system, system) exec("system.sqc_cntrl%d = sqc_cntrl" % (options.num_compute_units + i)) @@ -665,14 +683,19 @@ for i in xrange(options.num_tccs): - tcc_cntrl = TCCCntrl(TCC_select_num_bits = TCC_bits, - number_of_TBEs = options.num_compute_units * 2560) + tcc_cntrl = TCCCntrl(tccdir_low_bit=block_size_bits, + tccdir_num_bits=tccdir_num_bits, + number_of_TBEs=options.num_compute_units*2560) #Enough TBEs for all TCP TBEs tcc_cntrl.create(options, ruby_system, system) tcc_cntrl_nodes.append(tcc_cntrl) - tccdir_cntrl = TCCDirCntrl(TCC_select_num_bits = TCC_bits, - number_of_TBEs = options.num_compute_units * 2560) + tccdir_cntrl = TCCDirCntrl(tcc_low_bit=block_size_bits, + tcc_num_bits=tcc_num_bits, + dir_low_bit=block_size_bits, + dir_num_bits=dir_num_bits, + number_of_TBEs=\ + options.num_compute_units*2560) #Enough TBEs for all TCP TBEs tccdir_cntrl.create(options, ruby_system, system) tccdir_cntrl_nodes.append(tccdir_cntrl) diff --git a/configs/ruby/GPU_VIPER.py b/configs/ruby/GPU_VIPER.py --- a/configs/ruby/GPU_VIPER.py +++ b/configs/ruby/GPU_VIPER.py @@ -327,7 +327,7 @@ self.response_latency = 30 - self.directory = DirMem() + self.directory = DirMem(num_directories=options.num_dirs) self.directory.create(options, ruby_system, system) self.L3CacheMemory = L3Cache() @@ -414,8 +414,9 @@ # controller constructors are called before the network constructor # - # For an odd number of CPUs, still create the right number of controllers - TCC_bits = int(math.log(options.num_tccs, 2)) + block_size_bits = int(math.log(options.cacheline_size, 2)) + tcc_num_bits = int(math.log(options.num_tccs, 2)) + dir_num_bits = int(math.log(options.num_dirs, 2)) # This is the base crossbar that connects the L3s, Dirs, and cpu/gpu # Clusters @@ -429,7 +430,11 @@ mainCluster = Cluster(intBW=8) # 16 GB/s for i in xrange(options.num_dirs): - dir_cntrl = DirCntrl(noTCCdir = True, TCC_select_num_bits = TCC_bits) + dir_cntrl = DirCntrl(noTCCdir=True, + tccdir_low_bit=block_size_bits, + tccdir_num_bits=0, + tcc_low_bit=block_size_bits, + tcc_num_bits=tcc_num_bits) dir_cntrl.create(options, ruby_system, system) dir_cntrl.number_of_TBEs = options.num_tbes dir_cntrl.useL3OnWT = options.use_L3_on_WT @@ -467,7 +472,8 @@ cpuCluster = Cluster(extBW = 8, intBW = 8) # 16 GB/s for i in xrange((options.num_cpus + 1) / 2): - cp_cntrl = CPCntrl() + cp_cntrl = CPCntrl(dir_low_bit=block_size_bits, + dir_num_bits=dir_num_bits) cp_cntrl.create(options, ruby_system, system) exec("ruby_system.cp_cntrl%d = cp_cntrl" % i) @@ -504,9 +510,10 @@ gpuCluster = Cluster(extBW = 8, intBW = 8) # 16 GB/s for i in xrange(options.num_compute_units): - tcp_cntrl = TCPCntrl(TCC_select_num_bits = TCC_bits, - issue_latency = 1, - number_of_TBEs = 2560) + tcp_cntrl = TCPCntrl(tcc_low_bit=block_size_bits, + tcc_num_bits=tcc_num_bits, + issue_latency=1, + number_of_TBEs=2560) # TBEs set to max outstanding requests tcp_cntrl.create(options, ruby_system, system) tcp_cntrl.WB = options.WB_L1 @@ -543,7 +550,8 @@ for i in xrange(options.num_sqc): - sqc_cntrl = SQCCntrl(TCC_select_num_bits = TCC_bits) + sqc_cntrl = SQCCntrl(tcc_low_bit=block_size_bits, + tcc_num_bits=tcc_num_bits) sqc_cntrl.create(options, ruby_system, system) exec("ruby_system.sqc_cntrl%d = sqc_cntrl" % i) @@ -572,9 +580,10 @@ tcp_ID = options.num_compute_units + i sqc_ID = options.num_sqc + i - tcp_cntrl = TCPCntrl(TCC_select_num_bits = TCC_bits, - issue_latency = 1, - number_of_TBEs = 2560) + tcp_cntrl = TCPCntrl(tcc_low_bit=block_size_bits, + tcc_num_bits=tcc_num_bits, + issue_latency=1, + number_of_TBEs=2560) # TBEs set to max outstanding requests tcp_cntrl.createCP(options, ruby_system, system) tcp_cntrl.WB = options.WB_L1 @@ -609,7 +618,8 @@ gpuCluster.add(tcp_cntrl) - sqc_cntrl = SQCCntrl(TCC_select_num_bits = TCC_bits) + sqc_cntrl = SQCCntrl(tcc_low_bit=block_size_bits, + tcc_num_bits=tcc_num_bits) sqc_cntrl.create(options, ruby_system, system) exec("ruby_system.sqc_cntrl%d = sqc_cntrl" % sqc_ID) @@ -623,7 +633,9 @@ for i in xrange(options.num_tccs): - tcc_cntrl = TCCCntrl(l2_response_latency = options.TCC_latency) + tcc_cntrl = TCCCntrl(dir_low_bit=block_size_bits, + dir_num_bits=dir_num_bits, + l2_response_latency=options.TCC_latency) tcc_cntrl.create(options, ruby_system, system) tcc_cntrl.l2_request_latency = options.gpu_to_dir_latency tcc_cntrl.l2_response_latency = options.TCC_latency