diff --git a/src/mem/protocol/RubySlicc_Types.sm b/src/mem/protocol/RubySlicc_Types.sm --- a/src/mem/protocol/RubySlicc_Types.sm +++ b/src/mem/protocol/RubySlicc_Types.sm @@ -50,7 +50,9 @@ } external_type(NodeID, default="0", primitive="yes"); -external_type(MachineID); +structure(MachineID, external = "yes", desc="..."){ + NodeID getNum(); +} structure (Set, external = "yes", non_obj="yes") { void setSize(int); @@ -81,8 +83,7 @@ void remove(MachineID); void removeSet(Set); void removeNetDest(NetDest); - void broadcast(); - void broadcast(MachineType); + void broadcast(MachineType, int); void addRandom(); void clear(); Set toSet(); diff --git a/src/mem/ruby/common/NetDest.hh b/src/mem/ruby/common/NetDest.hh --- a/src/mem/ruby/common/NetDest.hh +++ b/src/mem/ruby/common/NetDest.hh @@ -55,8 +55,7 @@ void remove(MachineID oldElement); void removeNetDest(const NetDest& netDest); void clear(); - void broadcast(); - void broadcast(MachineType machine); + void broadcast(MachineType machine, unsigned int count); int count() const; bool isEqual(const NetDest& netDest) const; @@ -79,33 +78,21 @@ bool isEmpty() const; // For Princeton Network - std::vector getAllDest(); + std::vector getAllDest(); MachineID smallestElement() const; MachineID smallestElement(MachineType machine) const; - void resize(); int getSize() const { return m_bits.size(); } // get element for a index - NodeID elementAt(MachineID index); + bool elementAt(MachineID index) const; void print(std::ostream& out) const; private: - // returns a value >= MachineType_base_level("this machine") - // and < MachineType_base_level("next highest machine") - int - vecIndex(MachineID m) const - { - int vec_index = MachineType_base_level(m.type); - assert(vec_index < m_bits.size()); - return vec_index; - } - - NodeID bitIndex(NodeID index) const { return index; } - - std::vector m_bits; // a vector of bit vectors - i.e. Sets + // a vector of bit vectors - i.e. Sets + std::vector m_bits; }; inline std::ostream& diff --git a/src/mem/ruby/common/NetDest.cc b/src/mem/ruby/common/NetDest.cc --- a/src/mem/ruby/common/NetDest.cc +++ b/src/mem/ruby/common/NetDest.cc @@ -26,20 +26,17 @@ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ -#include - #include "mem/ruby/common/NetDest.hh" NetDest::NetDest() + : m_bits(MachineType_NUM) { - resize(); } void NetDest::add(MachineID newElement) { - assert(bitIndex(newElement.num) < m_bits[vecIndex(newElement)].getSize()); - m_bits[vecIndex(newElement)].add(bitIndex(newElement.num)); + m_bits[newElement.type].add(newElement.num); } void @@ -54,16 +51,13 @@ void NetDest::setNetDest(MachineType machine, const Set& set) { - // assure that there is only one set of destinations for this machine - assert(MachineType_base_level((MachineType)(machine + 1)) - - MachineType_base_level(machine) == 1); - m_bits[MachineType_base_level(machine)] = set; + m_bits[machine] = set; } void NetDest::remove(MachineID oldElement) { - m_bits[vecIndex(oldElement)].remove(bitIndex(oldElement.num)); + m_bits[oldElement.type].remove(oldElement.num); } void @@ -84,34 +78,26 @@ } void -NetDest::broadcast() +NetDest::broadcast(MachineType machineType, unsigned int count) { - for (MachineType machine = MachineType_FIRST; - machine < MachineType_NUM; ++machine) { - broadcast(machine); - } -} - -void -NetDest::broadcast(MachineType machineType) -{ - for (NodeID i = 0; i < MachineType_base_count(machineType); i++) { + for (NodeID i = 0; i < count; i++) { MachineID mach = {machineType, i}; add(mach); } } //For Princeton Network -std::vector +std::vector NetDest::getAllDest() { - std::vector dest; - dest.clear(); + std::vector dest; for (int i = 0; i < m_bits.size(); i++) { - for (int j = 0; j < m_bits[i].getSize(); j++) { - if (m_bits[i].isElement(j)) { - int id = MachineType_base_number((MachineType)i) + j; - dest.push_back((NodeID)id); + if (!m_bits[i].isEmpty()) { + for (NodeID j = 0; j < m_bits[i].getSize(); j++) { + if (m_bits[i].isElement(j)) { + MachineID mach = {(MachineType)i, j}; + dest.push_back(mach); + } } } } @@ -128,10 +114,10 @@ return counter; } -NodeID -NetDest::elementAt(MachineID index) +bool +NetDest::elementAt(MachineID index) const { - return m_bits[vecIndex(index)].elementAt(bitIndex(index.num)); + return m_bits[index.type].elementAt(index.num); } MachineID @@ -141,7 +127,7 @@ for (int i = 0; i < m_bits.size(); i++) { for (NodeID j = 0; j < m_bits[i].getSize(); j++) { if (m_bits[i].isElement(j)) { - MachineID mach = {MachineType_from_base_level(i), j}; + MachineID mach = {(MachineType)i, j}; return mach; } } @@ -152,9 +138,9 @@ MachineID NetDest::smallestElement(MachineType machine) const { - int size = m_bits[MachineType_base_level(machine)].getSize(); + int size = m_bits[machine].getSize(); for (NodeID j = 0; j < size; j++) { - if (m_bits[MachineType_base_level(machine)].isElement(j)) { + if (m_bits[machine].isElement(j)) { MachineID mach = {machine, j}; return mach; } @@ -240,18 +226,7 @@ bool NetDest::isElement(MachineID element) const { - return ((m_bits[vecIndex(element)])).isElement(bitIndex(element.num)); -} - -void -NetDest::resize() -{ - m_bits.resize(MachineType_base_level(MachineType_NUM)); - assert(m_bits.size() == MachineType_NUM); - - for (int i = 0; i < m_bits.size(); i++) { - m_bits[i].setSize(MachineType_base_count((MachineType)i)); - } + return m_bits[element.type].isElement(element.num); } void diff --git a/src/mem/ruby/common/Set.hh b/src/mem/ruby/common/Set.hh --- a/src/mem/ruby/common/Set.hh +++ b/src/mem/ruby/common/Set.hh @@ -73,6 +73,9 @@ void add(NodeID index) { + assert(index < NUMBER_BITS_PER_SET); + if (m_nSize <= index) + m_nSize = index + 1; bits.set(index); } @@ -109,17 +112,6 @@ void clear() { bits.reset(); } /* - * this function sets all bits in the set - */ - void broadcast() - { - bits.set(); - for (int j = m_nSize; j < NUMBER_BITS_PER_SET; ++j) { - bits.reset(j); - } - } - - /* * This function returns the population count of 1's in the set */ int count() const { return bits.count(); } diff --git a/src/mem/ruby/network/Network.cc b/src/mem/ruby/network/Network.cc --- a/src/mem/ruby/network/Network.cc +++ b/src/mem/ruby/network/Network.cc @@ -44,7 +44,7 @@ // Total nodes/controllers in network // Must make sure this is called after the State Machine constructors - m_nodes = MachineType_base_number(MachineType_NUM); + m_nodes = p->ext_links.size(); assert(m_nodes != 0); assert(m_virtual_networks != 0); diff --git a/src/mem/ruby/network/garnet/fixed-pipeline/NetworkInterface_d.cc b/src/mem/ruby/network/garnet/fixed-pipeline/NetworkInterface_d.cc --- a/src/mem/ruby/network/garnet/fixed-pipeline/NetworkInterface_d.cc +++ b/src/mem/ruby/network/garnet/fixed-pipeline/NetworkInterface_d.cc @@ -123,7 +123,7 @@ NetDest net_msg_dest = net_msg_ptr->getDestination(); // gets all the destinations associated with this message. - vector dest_nodes = net_msg_dest.getAllDest(); + vector dest_nodes = net_msg_dest.getAllDest(); // Number of flits is dependent on the link bandwidth available. // This is expressed in terms of bytes/cycle or the flit size @@ -139,29 +139,16 @@ if (vc == -1) { return false ; } + MsgPtr new_msg_ptr = msg_ptr->clone(); - NodeID destID = dest_nodes[ctr]; + Message *new_net_msg_ptr = new_msg_ptr.get(); - Message *new_net_msg_ptr = new_msg_ptr.get(); - if (dest_nodes.size() > 1) { - NetDest personal_dest; - for (int m = 0; m < (int) MachineType_NUM; m++) { - if ((destID >= MachineType_base_number((MachineType) m)) && - destID < MachineType_base_number((MachineType) (m+1))) { - // calculating the NetDest associated with this destID - personal_dest.clear(); - personal_dest.add((MachineID) {(MachineType) m, (destID - - MachineType_base_number((MachineType) m))}); - new_net_msg_ptr->getDestination() = personal_dest; - break; - } - } - net_msg_dest.removeNetDest(personal_dest); - // removing the destination from the original message to reflect - // that a message with this particular destination has been - // flitisized and an output vc is acquired - net_msg_ptr->getDestination().removeNetDest(personal_dest); - } + MachineID destID = dest_nodes[ctr]; + new_net_msg_ptr->getDestination().clear(); + new_net_msg_ptr->getDestination().add(destID); + + net_msg_ptr->getDestination().removeNetDest(new_net_msg_ptr->\ + getDestination()); for (int i = 0; i < num_flits; i++) { m_net_ptr->increment_injected_flits(vnet); diff --git a/src/mem/ruby/network/garnet/flexible-pipeline/NetworkInterface.cc b/src/mem/ruby/network/garnet/flexible-pipeline/NetworkInterface.cc --- a/src/mem/ruby/network/garnet/flexible-pipeline/NetworkInterface.cc +++ b/src/mem/ruby/network/garnet/flexible-pipeline/NetworkInterface.cc @@ -117,7 +117,7 @@ NetDest net_msg_dest = net_msg_ptr->getDestination(); // get all the destinations associated with this message. - vector dest_nodes = net_msg_dest.getAllDest(); + vector dest_nodes = net_msg_dest.getAllDest(); // Number of flits is dependent on the link bandwidth available. // This is expressed in terms of bytes/cycle or the flit size @@ -133,30 +133,17 @@ // did not find a free output vc return false ; } + MsgPtr new_msg_ptr = msg_ptr->clone(); - NodeID destID = dest_nodes[ctr]; + Message *new_net_msg_ptr = new_msg_ptr.get(); - Message *new_net_msg_ptr = new_msg_ptr.get(); - if (dest_nodes.size() > 1) { - NetDest personal_dest; - for (int m = 0; m < (int) MachineType_NUM; m++) { - if ((destID >= MachineType_base_number((MachineType) m)) && - destID < MachineType_base_number((MachineType) (m+1))) { - // calculating the NetDest associated with this destID - personal_dest.clear(); - personal_dest.add((MachineID) {(MachineType) m, (destID - - MachineType_base_number((MachineType) m))}); - new_net_msg_ptr->getDestination() = personal_dest; - break; - } - } - net_msg_dest.removeNetDest(personal_dest); + MachineID destID = dest_nodes[ctr]; + new_net_msg_ptr->getDestination().clear(); + new_net_msg_ptr->getDestination().add(destID); - // removing the destination from the original message to reflect - // that a message with this particular destination has been - // flitisized and an output vc is acquired - net_msg_ptr->getDestination().removeNetDest(personal_dest); - } + net_msg_ptr->getDestination().removeNetDest(new_net_msg_ptr->\ + getDestination()); + for (int i = 0; i < num_flits; i++) { m_net_ptr->increment_injected_flits(vnet); flit *fl = new flit(i, vc, vnet, num_flits, new_msg_ptr, diff --git a/src/mem/ruby/slicc_interface/AbstractController.hh b/src/mem/ruby/slicc_interface/AbstractController.hh --- a/src/mem/ruby/slicc_interface/AbstractController.hh +++ b/src/mem/ruby/slicc_interface/AbstractController.hh @@ -68,6 +68,7 @@ NodeID getVersion() const { return m_machineID.getNum(); } MachineType getType() const { return m_machineID.getType(); } + MachineID getMachineID() const { return m_machineID; } void initNetworkPtr(Network* net_ptr) { m_net_ptr = net_ptr; } @@ -118,8 +119,6 @@ void recvTimingResp(PacketPtr pkt); public: - MachineID getMachineID() const { return m_machineID; } - Stats::Histogram& getDelayHist() { return m_delayHistogram; } Stats::Histogram& getDelayVCHist(uint32_t index) { return *(m_delayVCHistogram[index]); } diff --git a/src/mem/protocol/MOESI_AMD_Base-Region-dir.sm b/src/mem/protocol/MOESI_AMD_Base-Region-dir.sm --- a/src/mem/protocol/MOESI_AMD_Base-Region-dir.sm +++ b/src/mem/protocol/MOESI_AMD_Base-Region-dir.sm @@ -923,11 +923,12 @@ out_msg.Type := ProbeRequestType:PrbDowngrade; out_msg.ReturnData := true; out_msg.MessageSize := MessageSizeType:Control; - out_msg.Destination.broadcast(MachineType:CorePair); // won't be realistic for multisocket + // won't be realistic for multisocket + out_msg.Destination.broadcast(MachineType:CorePair, num_core_pair); tbe.NumPendingAcks := tbe.NumPendingAcks + num_core_pair - 1; - out_msg.Destination.broadcast(MachineType:TCP); + out_msg.Destination.broadcast(MachineType:TCP, num_tcp); tbe.NumPendingAcks := tbe.NumPendingAcks + num_tcp; - out_msg.Destination.broadcast(MachineType:SQC); + out_msg.Destination.broadcast(MachineType:SQC, num_sqc); tbe.NumPendingAcks := tbe.NumPendingAcks + num_sqc; out_msg.Destination.remove(in_msg.Requestor); DPRINTF(RubySlicc, "%s\n", (out_msg)); @@ -945,11 +946,12 @@ out_msg.Type := ProbeRequestType:PrbInv; out_msg.ReturnData := false; out_msg.MessageSize := MessageSizeType:Control; - out_msg.Destination.broadcast(MachineType:CorePair); // won't be realistic for multisocket + // won't be realistic for multisocket + out_msg.Destination.broadcast(MachineType:CorePair, num_core_pair); tbe.NumPendingAcks := tbe.NumPendingAcks + num_core_pair - 1; - out_msg.Destination.broadcast(MachineType:TCP); + out_msg.Destination.broadcast(MachineType:TCP, num_tcp); tbe.NumPendingAcks := tbe.NumPendingAcks + num_tcp; - out_msg.Destination.broadcast(MachineType:SQC); + out_msg.Destination.broadcast(MachineType:SQC, num_sqc); tbe.NumPendingAcks := tbe.NumPendingAcks + num_sqc; out_msg.Destination.remove(in_msg.Requestor); APPEND_TRANSITION_COMMENT(" ic: Acks remaining: "); diff --git a/src/mem/protocol/MOESI_AMD_Base-dir.sm b/src/mem/protocol/MOESI_AMD_Base-dir.sm --- a/src/mem/protocol/MOESI_AMD_Base-dir.sm +++ b/src/mem/protocol/MOESI_AMD_Base-dir.sm @@ -46,6 +46,7 @@ int tcc_num_bits; int tccdir_low_bit; int tccdir_num_bits; + int num_core_pair; // From the Cores MessageBuffer * requestFromCores, network="From", virtual_network="0", vnet_type="request"; @@ -555,7 +556,8 @@ out_msg.Type := ProbeRequestType:PrbInv; out_msg.ReturnData := true; out_msg.MessageSize := MessageSizeType:Control; - out_msg.Destination.broadcast(MachineType:CorePair); // won't be realistic for multisocket + // won't be realistic for multisocket + out_msg.Destination.broadcast(MachineType:CorePair, num_core_pair); // add relevant TCC node to list. This replaces all TCPs and SQCs if (((in_msg.Type == CoherenceRequestType:WriteThrough || @@ -596,7 +598,8 @@ out_msg.Type := ProbeRequestType:PrbDowngrade; out_msg.ReturnData := true; out_msg.MessageSize := MessageSizeType:Control; - out_msg.Destination.broadcast(MachineType:CorePair); // won't be realistic for multisocket + // won't be realistic for multisocket + out_msg.Destination.broadcast(MachineType:CorePair, num_core_pair); // add relevant TCC node to the list. This replaces all TCPs and SQCs if (noTCCdir || CPUonly) { //Don't need to notify TCC about reads @@ -636,7 +639,8 @@ out_msg.Type := ProbeRequestType:PrbInv; out_msg.ReturnData := false; out_msg.MessageSize := MessageSizeType:Control; - out_msg.Destination.broadcast(MachineType:CorePair); // won't be realistic for multisocket + // won't be realistic for multisocket + out_msg.Destination.broadcast(MachineType:CorePair, num_core_pair); // add relevant TCC node to the list. This replaces all TCPs and SQCs if (noTCCdir && !CPUonly) { diff --git a/src/mem/protocol/MOESI_AMD_Base-probeFilter.sm b/src/mem/protocol/MOESI_AMD_Base-probeFilter.sm --- a/src/mem/protocol/MOESI_AMD_Base-probeFilter.sm +++ b/src/mem/protocol/MOESI_AMD_Base-probeFilter.sm @@ -58,6 +58,7 @@ int tcc_num_bits:=1; int tccdir_low_bit; int tccdir_num_bits; + int num_core_pair; // From the Cores MessageBuffer * requestFromCores, network="From", virtual_network="0", ordered="false", vnet_type="request"; @@ -643,7 +644,8 @@ out_msg.ReturnData := true; out_msg.MessageSize := MessageSizeType:Control; if(isCPUSharer(address)) { - out_msg.Destination.broadcast(MachineType:CorePair); // won't be realistic for multisocket + // won't be realistic for multisocket + out_msg.Destination.broadcast(MachineType:CorePair, num_core_pair); } // add relevant TCC node to list. This replaces all TCPs and SQCs @@ -688,7 +690,7 @@ out_msg.MessageSize := MessageSizeType:Control; if(isCPUSharer(address)) { // won't be realistic for multisocket - out_msg.Destination.broadcast(MachineType:CorePair); + out_msg.Destination.broadcast(MachineType:CorePair, num_core_pair); } // add relevant TCC node to the list. This replaces all TCPs and SQCs if(isGPUSharer(address)) { @@ -731,7 +733,8 @@ out_msg.ReturnData := true; out_msg.MessageSize := MessageSizeType:Control; if(isCPUSharer(address)) { - out_msg.Destination.broadcast(MachineType:CorePair); // won't be realistic for multisocket + // won't be realistic for multisocket + out_msg.Destination.broadcast(MachineType:CorePair, num_core_pair); } // add relevant TCC node to the list. This replaces all TCPs and SQCs if(isGPUSharer(address)) { @@ -775,7 +778,8 @@ out_msg.ReturnData := false; out_msg.MessageSize := MessageSizeType:Control; if(isCPUSharer(address)) { - out_msg.Destination.broadcast(MachineType:CorePair); // won't be realistic for multisocket + // won't be realistic for multisocket + out_msg.Destination.broadcast(MachineType:CorePair, num_core_pair); } // add relevant TCC node to the list. This replaces all TCPs and SQCs diff --git a/src/mem/protocol/MOESI_CMP_token-L1cache.sm b/src/mem/protocol/MOESI_CMP_token-L1cache.sm --- a/src/mem/protocol/MOESI_CMP_token-L1cache.sm +++ b/src/mem/protocol/MOESI_CMP_token-L1cache.sm @@ -35,6 +35,7 @@ : Sequencer * sequencer; CacheMemory * L1Icache; CacheMemory * L1Dcache; + int num_l1_machines; int l2_select_low_bit; int l2_select_num_bits; int dir_select_num_bits; @@ -755,7 +756,7 @@ out_msg.addr := address; out_msg.Type := PersistentRequestType:GETS_PERSISTENT; out_msg.Requestor := machineID; - out_msg.Destination.broadcast(MachineType:L1Cache); + out_msg.Destination.broadcast(MachineType:L1Cache, num_l1_machines); // // Currently the configuration system limits the system to only one @@ -832,7 +833,7 @@ // Since only one chip, assuming all L1 caches are local // //out_msg.Destination := getOtherLocalL1IDs(machineID); - out_msg.Destination.broadcast(MachineType:L1Cache); + out_msg.Destination.broadcast(MachineType:L1Cache, num_l1_machines); out_msg.Destination.remove(machineID); out_msg.RetryNum := tbe.IssueCount; @@ -878,7 +879,7 @@ out_msg.addr := address; out_msg.Type := PersistentRequestType:GETX_PERSISTENT; out_msg.Requestor := machineID; - out_msg.Destination.broadcast(MachineType:L1Cache); + out_msg.Destination.broadcast(MachineType:L1Cache, num_l1_machines); // // Currently the configuration system limits the system to only one @@ -959,7 +960,7 @@ // Since only one chip, assuming all L1 caches are local // //out_msg.Destination := getOtherLocalL1IDs(machineID); - out_msg.Destination.broadcast(MachineType:L1Cache); + out_msg.Destination.broadcast(MachineType:L1Cache, num_l1_machines); out_msg.Destination.remove(machineID); out_msg.RetryNum := tbe.IssueCount; @@ -1451,7 +1452,7 @@ out_msg.addr := address; out_msg.Type := PersistentRequestType:DEACTIVATE_PERSISTENT; out_msg.Requestor := machineID; - out_msg.Destination.broadcast(MachineType:L1Cache); + out_msg.Destination.broadcast(MachineType:L1Cache, num_l1_machines); // // Currently the configuration system limits the system to only one diff --git a/src/mem/protocol/MOESI_CMP_token-L2cache.sm b/src/mem/protocol/MOESI_CMP_token-L2cache.sm --- a/src/mem/protocol/MOESI_CMP_token-L2cache.sm +++ b/src/mem/protocol/MOESI_CMP_token-L2cache.sm @@ -32,6 +32,7 @@ Cycles l2_request_latency := 5; Cycles l2_response_latency := 5; bool filtering_enabled := "True"; + int num_l1_machines; int dir_select_num_bits; int dir_select_low_bit; @@ -812,7 +813,7 @@ // Currently assuming only one chip so all L1s are local // //out_msg.Destination := getLocalL1IDs(machineID); - out_msg.Destination.broadcast(MachineType:L1Cache); + out_msg.Destination.broadcast(MachineType:L1Cache, num_l1_machines); out_msg.Destination.remove(in_msg.Requestor); out_msg.Type := in_msg.Type; diff --git a/src/mem/protocol/MOESI_CMP_token-dir.sm b/src/mem/protocol/MOESI_CMP_token-dir.sm --- a/src/mem/protocol/MOESI_CMP_token-dir.sm +++ b/src/mem/protocol/MOESI_CMP_token-dir.sm @@ -28,6 +28,7 @@ machine(MachineType:Directory, "Token protocol") : DirectoryMemory * directory; + int num_l1_machines; int l2_select_low_bit; int l2_select_num_bits; Cycles directory_latency := 5; @@ -465,7 +466,7 @@ out_msg.addr := address; out_msg.Type := PersistentRequestType:GETX_PERSISTENT; out_msg.Requestor := machineID; - out_msg.Destination.broadcast(MachineType:L1Cache); + out_msg.Destination.broadcast(MachineType:L1Cache, num_l1_machines); // // Currently the configuration system limits the system to only one @@ -517,7 +518,7 @@ // // Since only one chip, assuming all L1 caches are local // - out_msg.Destination.broadcast(MachineType:L1Cache); + out_msg.Destination.broadcast(MachineType:L1Cache, num_l1_machines); out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, l2_select_low_bit, l2_select_num_bits, intToID(0))); @@ -536,7 +537,7 @@ out_msg.addr := address; out_msg.Type := PersistentRequestType:GETS_PERSISTENT; out_msg.Requestor := machineID; - out_msg.Destination.broadcast(MachineType:L1Cache); + out_msg.Destination.broadcast(MachineType:L1Cache, num_l1_machines); // // Currently the configuration system limits the system to only one @@ -584,7 +585,7 @@ // // Since only one chip, assuming all L1 caches are local // - out_msg.Destination.broadcast(MachineType:L1Cache); + out_msg.Destination.broadcast(MachineType:L1Cache, num_l1_machines); out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, l2_select_low_bit, l2_select_num_bits, intToID(0))); @@ -721,7 +722,7 @@ out_msg.addr := address; out_msg.Type := PersistentRequestType:DEACTIVATE_PERSISTENT; out_msg.Requestor := machineID; - out_msg.Destination.broadcast(MachineType:L1Cache); + out_msg.Destination.broadcast(MachineType:L1Cache, num_l1_machines); // // Currently the configuration system limits the system to only one diff --git a/src/mem/protocol/MOESI_hammer-dir.sm b/src/mem/protocol/MOESI_hammer-dir.sm --- a/src/mem/protocol/MOESI_hammer-dir.sm +++ b/src/mem/protocol/MOESI_hammer-dir.sm @@ -890,8 +890,12 @@ out_msg.addr := address; out_msg.Type := in_msg.Type; out_msg.Requestor := in_msg.Requestor; - out_msg.Destination.broadcast(MachineType:L1Cache); // Send to all L1 caches - out_msg.Destination.remove(in_msg.Requestor); // Don't include the original requestor + + // Send to all L1 caches + out_msg.Destination.broadcast(MachineType:L1Cache, num_l1_machines); + // Don't include the original requestor + out_msg.Destination.remove(in_msg.Requestor); + out_msg.MessageSize := MessageSizeType:Broadcast_Control; out_msg.InitialRequestTime := in_msg.InitialRequestTime; out_msg.ForwardRequestTime := curCycle(); @@ -919,7 +923,8 @@ out_msg.addr := address; out_msg.Type := CoherenceRequestType:INV; out_msg.Requestor := machineID; - out_msg.Destination.broadcast(MachineType:L1Cache); // Send to all L1 caches + // Send to all L1 caches + out_msg.Destination.broadcast(MachineType:L1Cache, num_l1_machines); out_msg.MessageSize := MessageSizeType:Broadcast_Control; } } @@ -964,8 +969,13 @@ out_msg.addr := address; out_msg.Type := in_msg.Type; out_msg.Requestor := in_msg.Requestor; - out_msg.Destination.broadcast(MachineType:L1Cache); // Send to all L1 caches - out_msg.Destination.remove(in_msg.Requestor); // Don't include the original requestor + + // Send to all L1 caches + out_msg.Destination.broadcast(MachineType:L1Cache, + num_l1_machines); + // Don't include the original requestor + out_msg.Destination.remove(in_msg.Requestor); + out_msg.MessageSize := MessageSizeType:Broadcast_Control; out_msg.InitialRequestTime := in_msg.InitialRequestTime; out_msg.ForwardRequestTime := curCycle(); @@ -1036,8 +1046,12 @@ out_msg.addr := address; out_msg.Type := in_msg.Type; out_msg.Requestor := in_msg.Requestor; - out_msg.Destination.broadcast(MachineType:L1Cache); // Send to all L1 caches - out_msg.Destination.remove(in_msg.Requestor); // Don't include the original requestor + + // Send to all L1 caches + out_msg.Destination.broadcast(MachineType:L1Cache, num_l1_machines); + // Don't include the original requestor + out_msg.Destination.remove(in_msg.Requestor); + out_msg.MessageSize := MessageSizeType:Broadcast_Control; out_msg.InitialRequestTime := in_msg.InitialRequestTime; out_msg.ForwardRequestTime := curCycle(); @@ -1072,7 +1086,9 @@ out_msg.addr := address; out_msg.Type := in_msg.Type; out_msg.Requestor := in_msg.Requestor; - out_msg.Destination.broadcast(MachineType:L1Cache); // Send to all L1 caches + + // Send to all L1 caches + out_msg.Destination.broadcast(MachineType:L1Cache, num_l1_machines); out_msg.Destination.remove(in_msg.Requestor); // Don't include the original requestor out_msg.MessageSize := MessageSizeType:Broadcast_Control; out_msg.InitialRequestTime := in_msg.InitialRequestTime; @@ -1090,12 +1106,11 @@ enqueue(forwardNetwork_out, RequestMsg, from_memory_controller_latency) { out_msg.addr := address; out_msg.Type := CoherenceRequestType:GETX; - // - // Send to all L1 caches, since the requestor is the memory controller - // itself - // + + // Send to all L1 caches, since the requestor is the memory + // controller itself out_msg.Requestor := machineID; - out_msg.Destination.broadcast(MachineType:L1Cache); + out_msg.Destination.broadcast(MachineType:L1Cache, num_l1_machines); out_msg.MessageSize := MessageSizeType:Broadcast_Control; } } @@ -1109,12 +1124,11 @@ enqueue(forwardNetwork_out, RequestMsg, from_memory_controller_latency) { out_msg.addr := address; out_msg.Type := CoherenceRequestType:GETS; - // - // Send to all L1 caches, since the requestor is the memory controller - // itself - // + + // Send to all L1 caches, since the requestor is the memory + // controller itself out_msg.Requestor := machineID; - out_msg.Destination.broadcast(MachineType:L1Cache); + out_msg.Destination.broadcast(MachineType:L1Cache, num_l1_machines); out_msg.MessageSize := MessageSizeType:Broadcast_Control; } } diff --git a/configs/ruby/MOESI_AMD_Base.py b/configs/ruby/MOESI_AMD_Base.py --- a/configs/ruby/MOESI_AMD_Base.py +++ b/configs/ruby/MOESI_AMD_Base.py @@ -199,7 +199,8 @@ dir_cntrl = DirCntrl(tcc_low_bit=block_size_bits, tcc_num_bits=0, tccdir_low_bit=block_size_bits, - tccdir_num_bits=0) + tccdir_num_bits=0, + num_core_pair=int((options.num_cpus + 1) / 2)) dir_cntrl.create(options, ruby_system, system) # Connect the Directory controller to the ruby network diff --git a/configs/ruby/MOESI_CMP_token.py b/configs/ruby/MOESI_CMP_token.py --- a/configs/ruby/MOESI_CMP_token.py +++ b/configs/ruby/MOESI_CMP_token.py @@ -120,6 +120,7 @@ transitions_per_cycle=options.ports, clk_domain=clk_domain, ruby_system=ruby_system, + num_l1_machines = options.num_cpus, dir_select_low_bit=block_size_bits, dir_select_num_bits=dir_bits) @@ -166,6 +167,7 @@ N_tokens = n_tokens, transitions_per_cycle = options.ports, ruby_system = ruby_system, + num_l1_machines = options.num_cpus, dir_select_low_bit = block_size_bits, dir_select_num_bits = dir_bits) @@ -213,6 +215,7 @@ l2_select_num_bits=l2_bits, transitions_per_cycle=options.ports, ruby_system=ruby_system, + num_l1_machines = options.num_cpus, dir_select_low_bit=block_size_bits, dir_select_num_bits=dir_bits) diff --git a/configs/ruby/MOESI_hammer.py b/configs/ruby/MOESI_hammer.py --- a/configs/ruby/MOESI_hammer.py +++ b/configs/ruby/MOESI_hammer.py @@ -75,6 +75,19 @@ block_size_bits = int(math.log(options.cacheline_size, 2)) dir_bits = int(math.log(options.num_dirs, 2)) + # + # Duplicated from Ruby.py since this would otherwise arrive uninitialized + # for the variables which rely on it. + # + if options.numa_high_bit: + numa_bit = options.numa_high_bit + else: + # if the numa_bit is not specified, set the directory bits as the + # lowest bits above the block offset bits, and the numa_bit as the + # highest of those directory bits + dir_bits = int(math.log(options.num_dirs, 2)) + numa_bit = block_size_bits + dir_bits - 1 + for i in xrange(options.num_cpus): # # First create the Ruby objects associated with this cpu @@ -112,7 +125,7 @@ ruby_system=ruby_system, num_l1_machines=options.num_cpus, dir_select_low_bit= - options.numa_high_bit + dir_bits - 1, + numa_bit + dir_bits - 1, dir_select_num_bits=dir_bits) cpu_seq = RubySequencer(version=i, icache=l1i_cache, @@ -164,7 +177,7 @@ if options.pf_on or options.dir_on: # if numa high bit explicitly set, make sure it does not overlap # with the probe filter index - assert(options.numa_high_bit - dir_bits > pf_bits) + assert(numa_bit - dir_bits > pf_bits) # set the probe filter start bit to just above the block offset pf_start_bit = block_size_bits @@ -235,7 +248,7 @@ dma_cntrl = DMA_Controller(version = i, dma_sequencer = dma_seq, transitions_per_cycle = options.ports, ruby_system = ruby_system, - dir_select_low_bit = options.numa_high_bit + dir_bits - 1, + dir_select_low_bit = numa_bit - dir_bits + 1, dir_select_num_bits = dir_bits) exec("ruby_system.dma_cntrl%d = dma_cntrl" % i) @@ -260,7 +273,7 @@ io_controller = DMA_Controller(version = len(dma_ports), dma_sequencer = io_seq, ruby_system = ruby_system, - dir_select_low_bit = options.numa_high_bit + dir_bits - 1, + dir_select_low_bit = numa_bit - dir_bits + 1, dir_select_num_bits = dir_bits) ruby_system.io_controller = io_controller diff --git a/src/mem/protocol/GPU_RfO-TCCdir.sm b/src/mem/protocol/GPU_RfO-TCCdir.sm --- a/src/mem/protocol/GPU_RfO-TCCdir.sm +++ b/src/mem/protocol/GPU_RfO-TCCdir.sm @@ -49,6 +49,8 @@ int tcc_num_bits; int dir_low_bit; int dir_num_bits; + int num_sqc; + int num_tcp; // From the TCPs or SQCs MessageBuffer * requestFromTCP, network="From", virtual_network="1", vnet_type="request"; @@ -1072,8 +1074,8 @@ action(lpc_probeInvCore, "lpc", desc="local probe inv cores, no data") { peek(coreRequestNetwork_in, CPURequestMsg) { - TCC_dir_subtree.broadcast(MachineType:TCP); - TCC_dir_subtree.broadcast(MachineType:SQC); + TCC_dir_subtree.broadcast(MachineType:TCP, num_tcp); + TCC_dir_subtree.broadcast(MachineType:SQC, num_sqc); temp := cache_entry.Sharers; temp := temp.OR(cache_entry.Owner); @@ -1104,8 +1106,8 @@ } action(ipc_probeInvCore, "ipc", desc="probe inv cores, no data") { - TCC_dir_subtree.broadcast(MachineType:TCP); - TCC_dir_subtree.broadcast(MachineType:SQC); + TCC_dir_subtree.broadcast(MachineType:TCP, num_tcp); + TCC_dir_subtree.broadcast(MachineType:SQC, num_sqc); temp := cache_entry.Sharers; temp := temp.OR(cache_entry.Owner); diff --git a/src/mem/protocol/MESI_Three_Level-L0cache.sm b/src/mem/protocol/MESI_Three_Level-L0cache.sm --- a/src/mem/protocol/MESI_Three_Level-L0cache.sm +++ b/src/mem/protocol/MESI_Three_Level-L0cache.sm @@ -368,7 +368,8 @@ out_msg.addr := address; out_msg.Class := CoherenceClass:GETS; out_msg.Sender := machineID; - out_msg.Dest := createMachineID(MachineType:L1Cache, version); + out_msg.Dest := createMachineID(MachineType:L1Cache, + machineID.getNum()); DPRINTF(RubySlicc, "address: %#x, destination: %s\n", address, out_msg.Dest); out_msg.MessageSize := MessageSizeType:Control; @@ -385,7 +386,8 @@ out_msg.Class := CoherenceClass:GETX; out_msg.Sender := machineID; DPRINTF(RubySlicc, "%s\n", machineID); - out_msg.Dest := createMachineID(MachineType:L1Cache, version); + out_msg.Dest := createMachineID(MachineType:L1Cache, + machineID.getNum()); DPRINTF(RubySlicc, "address: %#x, destination: %s\n", address, out_msg.Dest); @@ -402,7 +404,8 @@ out_msg.addr := address; out_msg.Class := CoherenceClass:UPGRADE; out_msg.Sender := machineID; - out_msg.Dest := createMachineID(MachineType:L1Cache, version); + out_msg.Dest := createMachineID(MachineType:L1Cache, + machineID.getNum()); DPRINTF(RubySlicc, "address: %#x, destination: %s\n", address, out_msg.Dest); @@ -421,7 +424,7 @@ out_msg.DataBlk := cache_entry.DataBlk; out_msg.Dirty := cache_entry.Dirty; out_msg.Sender := machineID; - out_msg.Dest := createMachineID(MachineType:L1Cache, version); + out_msg.Dest := createMachineID(MachineType:L1Cache, machineID.getNum()); out_msg.MessageSize := MessageSizeType:Writeback_Data; } cache_entry.Dirty := false; @@ -433,7 +436,8 @@ out_msg.addr := address; out_msg.Class := CoherenceClass:INV_ACK; out_msg.Sender := machineID; - out_msg.Dest := createMachineID(MachineType:L1Cache, version); + out_msg.Dest := createMachineID(MachineType:L1Cache, + machineID.getNum()); out_msg.MessageSize := MessageSizeType:Response_Control; out_msg.DataBlk.alloc(block_size_bytes); } @@ -454,7 +458,7 @@ out_msg.Class := CoherenceClass:PUTX; out_msg.Dirty := cache_entry.Dirty; out_msg.Sender:= machineID; - out_msg.Dest := createMachineID(MachineType:L1Cache, version); + out_msg.Dest := createMachineID(MachineType:L1Cache, machineID.getNum()); if (cache_entry.Dirty) { out_msg.MessageSize := MessageSizeType:Writeback_Data; diff --git a/src/mem/protocol/MESI_Three_Level-L1cache.sm b/src/mem/protocol/MESI_Three_Level-L1cache.sm --- a/src/mem/protocol/MESI_Three_Level-L1cache.sm +++ b/src/mem/protocol/MESI_Three_Level-L1cache.sm @@ -553,7 +553,8 @@ out_msg.addr := address; out_msg.Class := CoherenceClass:INV; out_msg.Sender := machineID; - out_msg.Dest := createMachineID(MachineType:L0Cache, version); + out_msg.Dest := createMachineID(MachineType:L0Cache, + machineID.getNum()); out_msg.MessageSize := MessageSizeType:Control; out_msg.DataBlk.alloc(block_size_bytes); } @@ -612,7 +613,8 @@ out_msg.addr := address; out_msg.Class := CoherenceClass:DATA; out_msg.Sender := machineID; - out_msg.Dest := createMachineID(MachineType:L0Cache, version); + out_msg.Dest := createMachineID(MachineType:L0Cache, + machineID.getNum()); out_msg.DataBlk := cache_entry.DataBlk; out_msg.MessageSize := MessageSizeType:Response_Data; } @@ -625,12 +627,11 @@ out_msg.addr := address; out_msg.Class := CoherenceClass:DATA_EXCLUSIVE; out_msg.Sender := machineID; - out_msg.Dest := createMachineID(MachineType:L0Cache, version); + out_msg.Dest := createMachineID(MachineType:L0Cache, + machineID.getNum()); out_msg.DataBlk := cache_entry.DataBlk; out_msg.Dirty := cache_entry.Dirty; out_msg.MessageSize := MessageSizeType:Response_Data; - - //cache_entry.Dirty := true; } } # Node ID b6d5bc19a4f14ea31685b8c0f173f6a49b26dcc7 # Parent 75e7a63a07c49086a8a67fb19503e6ca1579eea9 diff --git a/configs/ruby/GPU_RfO.py b/configs/ruby/GPU_RfO.py --- a/configs/ruby/GPU_RfO.py +++ b/configs/ruby/GPU_RfO.py @@ -430,7 +430,8 @@ dir_cntrl = DirCntrl(tccdir_low_bit=block_size_bits, tccdir_num_bits=tccdir_num_bits, tcc_low_bit=block_size_bits, - tcc_num_bits=tcc_num_bits) + tcc_num_bits=tcc_num_bits, + num_core_pair=int((options.num_cpus + 1) / 2)) dir_cntrl.create(options, ruby_system, system) dir_cntrl.number_of_TBEs = 2560 * options.num_compute_units #Enough TBEs for all TCP TBEs @@ -649,6 +650,8 @@ tcc_num_bits=tcc_num_bits, dir_low_bit=block_size_bits, dir_num_bits=dir_num_bits, + num_sqc=options.num_sqc, + num_tcp=options.num_compute_units, number_of_TBEs=\ options.num_compute_units*2560) #Enough TBEs for all TCP TBEs diff --git a/configs/ruby/GPU_VIPER.py b/configs/ruby/GPU_VIPER.py --- a/configs/ruby/GPU_VIPER.py +++ b/configs/ruby/GPU_VIPER.py @@ -390,7 +390,8 @@ tccdir_low_bit=block_size_bits, tccdir_num_bits=0, tcc_low_bit=block_size_bits, - tcc_num_bits=tcc_num_bits) + tcc_num_bits=tcc_num_bits, + num_core_pair=int((options.num_cpus + 1) / 2)) dir_cntrl.create(options, ruby_system, system) dir_cntrl.number_of_TBEs = options.num_tbes dir_cntrl.useL3OnWT = options.use_L3_on_WT diff --git a/configs/ruby/GPU_VIPER_Baseline.py b/configs/ruby/GPU_VIPER_Baseline.py --- a/configs/ruby/GPU_VIPER_Baseline.py +++ b/configs/ruby/GPU_VIPER_Baseline.py @@ -369,7 +369,8 @@ tcc_low_bit=block_size_bits, tcc_num_bits=tcc_num_bits, tccdir_low_bit=block_size_bits, - tccdir_num_bits=0) + tccdir_num_bits=0, + num_core_pair=int((options.num_cpus + 1) / 2)) dir_cntrl.create(options, ruby_system, system) dir_cntrl.number_of_TBEs = options.num_tbes dir_cntrl.useL3OnWT = options.use_L3_on_WT diff --git a/src/mem/slicc/symbols/StateMachine.py b/src/mem/slicc/symbols/StateMachine.py --- a/src/mem/slicc/symbols/StateMachine.py +++ b/src/mem/slicc/symbols/StateMachine.py @@ -295,7 +295,6 @@ public: typedef ${c_ident}Params Params; $c_ident(const Params *p); - static int getNumControllers(); void init(); MessageBuffer *getMandatoryQueue() const; @@ -366,7 +365,6 @@ Stats::Scalar m_counters[${ident}_State_NUM][${ident}_Event_NUM]; Stats::Scalar m_event_counters[${ident}_Event_NUM]; bool m_possible[${ident}_State_NUM][${ident}_Event_NUM]; -static int m_num_controllers; // Internal functions ''') @@ -495,8 +493,6 @@ return new $c_ident(this); } -int $c_ident::m_num_controllers = 0; - // for adding information to the protocol debug trace stringstream ${ident}_transitionComment; @@ -511,9 +507,8 @@ : AbstractController(p) { m_machineID.type = MachineType_${ident}; - m_machineID.num = m_version; - m_num_controllers++; + m_machineID.num = p->version; m_in_ports = $num_in_ports; ''') code.indent() @@ -550,9 +545,6 @@ void $c_ident::initNetQueues() { - MachineType machine_type = string_to_MachineType("${{self.ident}}"); - int base M5_VAR_USED = MachineType_base_number(machine_type); - ''') code.indent() @@ -768,12 +760,6 @@ return m_possible[state][event]; } -int -$c_ident::getNumControllers() -{ - return m_num_controllers; -} - MessageBuffer* $c_ident::getMandatoryQueue() const { @@ -789,7 +775,7 @@ void $c_ident::print(ostream& out) const { - out << "[$c_ident " << m_version << "]"; + out << "[$c_ident " << m_machineID.num << "]"; } void $c_ident::resetStats() @@ -1148,7 +1134,7 @@ countTransition(state, event); DPRINTFR(ProtocolTrace, "%15d %3s %10s%20s %6s>%-6s %#x %s\\n", - curTick(), m_version, "${ident}", + curTick(), m_machineID.num, "${ident}", ${ident}_Event_to_string(event), ${ident}_State_to_string(state), ${ident}_State_to_string(next_state), @@ -1173,7 +1159,7 @@ code(''' } else if (result == TransitionResult_ResourceStall) { DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %#x %s\\n", - curTick(), m_version, "${ident}", + curTick(), m_machineID.num, "${ident}", ${ident}_Event_to_string(event), ${ident}_State_to_string(state), ${ident}_State_to_string(next_state), @@ -1181,7 +1167,7 @@ } else if (result == TransitionResult_ProtocolStall) { DPRINTF(RubyGenerated, "stalling\\n"); DPRINTFR(ProtocolTrace, "%15s %3s %10s%20s %6s>%-6s %#x %s\\n", - curTick(), m_version, "${ident}", + curTick(), m_machineID.num, "${ident}", ${ident}_Event_to_string(event), ${ident}_State_to_string(state), ${ident}_State_to_string(next_state), diff --git a/src/mem/slicc/symbols/Type.py b/src/mem/slicc/symbols/Type.py --- a/src/mem/slicc/symbols/Type.py +++ b/src/mem/slicc/symbols/Type.py @@ -529,25 +529,6 @@ ${{self.c_ident}} &operator++(${{self.c_ident}} &e); ''') - # MachineType hack used to set the base component id for each Machine - if self.isMachineType: - code(''' -int ${{self.c_ident}}_base_level(const ${{self.c_ident}}& obj); -MachineType ${{self.c_ident}}_from_base_level(int); -int ${{self.c_ident}}_base_number(const ${{self.c_ident}}& obj); -int ${{self.c_ident}}_base_count(const ${{self.c_ident}}& obj); -''') - - for enum in self.enums.itervalues(): - if enum.ident == "DMA": - code(''' -MachineID map_Address_to_DMA(const Addr &addr); -''') - code(''' - -MachineID get${{enum.ident}}MachineID(NodeID RubyNode); -''') - if self.isStateDecl: code(''' @@ -672,143 +653,6 @@ } ''') - # MachineType hack used to set the base level and number of - # components for each Machine - if self.isMachineType: - code(''' -/** \\brief returns the base vector index for each machine type to be - * used by NetDest - * - * \\return the base vector index for each machine type to be used by NetDest - * \\see NetDest.hh - */ -int -${{self.c_ident}}_base_level(const ${{self.c_ident}}& obj) -{ - switch(obj) { -''') - - # For each field - code.indent() - for i,enum in enumerate(self.enums.itervalues()): - code(' case ${{self.c_ident}}_${{enum.ident}}:') - code(' return $i;') - code.dedent() - - # total num - code(''' - case ${{self.c_ident}}_NUM: - return ${{len(self.enums)}}; - - default: - panic("Invalid range for type ${{self.c_ident}}"); - } -} - -/** \\brief returns the machine type for each base vector index used by NetDest - * - * \\return the MachineType - */ -MachineType -${{self.c_ident}}_from_base_level(int type) -{ - switch(type) { -''') - - # For each field - code.indent() - for i,enum in enumerate(self.enums.itervalues()): - code(' case $i:') - code(' return ${{self.c_ident}}_${{enum.ident}};') - code.dedent() - - # Trailer - code(''' - default: - panic("Invalid range for type ${{self.c_ident}}"); - } -} - -/** \\brief The return value indicates the number of components created - * before a particular machine\'s components - * - * \\return the base number of components for each machine - */ -int -${{self.c_ident}}_base_number(const ${{self.c_ident}}& obj) -{ - int base = 0; - switch(obj) { -''') - - # For each field - code.indent() - code(' case ${{self.c_ident}}_NUM:') - for enum in reversed(self.enums.values()): - # Check if there is a defined machine with this type - if enum.primary: - code(' base += ${{enum.ident}}_Controller::getNumControllers();') - else: - code(' base += 0;') - code(' case ${{self.c_ident}}_${{enum.ident}}:') - code(' break;') - code.dedent() - - code(''' - default: - panic("Invalid range for type ${{self.c_ident}}"); - } - - return base; -} - -/** \\brief returns the total number of components for each machine - * \\return the total number of components for each machine - */ -int -${{self.c_ident}}_base_count(const ${{self.c_ident}}& obj) -{ - switch(obj) { -''') - - # For each field - for enum in self.enums.itervalues(): - code('case ${{self.c_ident}}_${{enum.ident}}:') - if enum.primary: - code('return ${{enum.ident}}_Controller::getNumControllers();') - else: - code('return 0;') - - # total num - code(''' - case ${{self.c_ident}}_NUM: - default: - panic("Invalid range for type ${{self.c_ident}}"); - } -} -''') - - for enum in self.enums.itervalues(): - if enum.ident == "DMA": - code(''' -MachineID -map_Address_to_DMA(const Addr &addr) -{ - MachineID dma = {MachineType_DMA, 0}; - return dma; -} -''') - - code(''' - -MachineID -get${{enum.ident}}MachineID(NodeID RubyNode) -{ - MachineID mach = {MachineType_${{enum.ident}}, RubyNode}; - return mach; -} -''') - # Write the file code.write(path, "%s.cc" % self.c_ident)