diff --git a/src/mem/protocol/MOESI_CMP_directory-L1cache.sm b/src/mem/protocol/MOESI_CMP_directory-L1cache.sm --- a/src/mem/protocol/MOESI_CMP_directory-L1cache.sm +++ b/src/mem/protocol/MOESI_CMP_directory-L1cache.sm @@ -78,6 +78,14 @@ OI, AccessPermission:Busy, "OI", desc="Issued PutO, waiting for ack"; MI, AccessPermission:Busy, "MI", desc="Issued PutX, waiting for ack"; II, AccessPermission:Busy, "II", desc="Issued PutX/O, saw Fwd_GETS or Fwd_GETX, waiting for ack"; + + // Transition States Related to Flushing + MI_F, AccessPermission:Busy, "MI_F", desc="Issued PutX due to a Flush, waiting for ack"; + MM_F, AccessPermission:Busy, "MM_F", desc="Issued GETF due to a Flush, waiting for ack"; + IM_F, AccessPermission:Busy, "IM_F", desc="Issued GetX due to a Flush"; + SM_F, AccessPermission:Read_Only, "SM_F", desc="Issued GetX, we still have an old copy of the line"; + OM_F, AccessPermission:Read_Only, "OM_F", desc="Issued GetX, received data"; + MM_WF, AccessPermission:Busy, "MM_WF", desc="Issued GetX, received exclusive data"; } // EVENTS @@ -91,6 +99,8 @@ Own_GETX, desc="We observe our own GetX forwarded back to us"; Fwd_GETX, desc="A GetX from another processor"; Fwd_GETS, desc="A GetS from another processor"; + Own_GETF, desc="We observe our own GetF forwarded back to us"; + Fwd_GETF, desc="A GetF from another processor"; Fwd_DMA, desc="A GetS from another processor"; Inv, desc="Invalidations from the directory"; @@ -108,6 +118,10 @@ // Timeouts Use_Timeout, desc="lockout period ended"; + + // For Flush + Flush_line, desc="flush the cache line from all caches"; + Block_Ack, desc="the directory is blocked and ready for the flush"; } // TYPES @@ -239,6 +253,8 @@ return Event:Ifetch; } else if ((type == RubyRequestType:ST) || (type == RubyRequestType:ATOMIC)) { return Event:Store; + } else if ((type == RubyRequestType:FLUSH)) { + return Event:Flush_line; } else { error("Invalid RubyRequestType"); } @@ -294,7 +310,15 @@ trigger(Event:Fwd_GETX, in_msg.Address, getCacheEntry(in_msg.Address), TBEs[in_msg.Address]); } - } else if (in_msg.Type == CoherenceRequestType:GETS) { + } else if (in_msg.Type == CoherenceRequestType:GETF) { + if (in_msg.Requestor == machineID && in_msg.RequestorMachine == MachineType:L1Cache) { + trigger(Event:Own_GETF, in_msg.Address, + getCacheEntry(in_msg.Address), TBEs[in_msg.Address]); + } else { + trigger(Event:Fwd_GETF, in_msg.Address, + getCacheEntry(in_msg.Address), TBEs[in_msg.Address]); + } + } else if (in_msg.Type == CoherenceRequestType:GETS) { trigger(Event:Fwd_GETS, in_msg.Address, getCacheEntry(in_msg.Address), TBEs[in_msg.Address]); } else if (in_msg.Type == CoherenceRequestType:DMA_READ) { @@ -312,7 +336,10 @@ } else if (in_msg.Type == CoherenceRequestType:INV) { trigger(Event:Inv, in_msg.Address, getCacheEntry(in_msg.Address), TBEs[in_msg.Address]); - } else { + } else if (in_msg.Type == CoherenceRequestType:BLOCK_ACK) { + trigger(Event:Block_Ack, in_msg.Address, + getCacheEntry(in_msg.Address), TBEs[in_msg.Address]); + } else { error("Unexpected message"); } } @@ -449,6 +476,22 @@ } } + action(bf_issueGETF, "bf", desc="Issue GETF") { + peek(mandatoryQueue_in, RubyRequest) { + enqueue(requestNetwork_out, RequestMsg, latency=request_latency) { + assert(is_valid(tbe)); + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:GETF; + out_msg.Requestor := machineID; + out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, + l2_select_low_bit, l2_select_num_bits)); + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.AccessMode := in_msg.AccessMode; + out_msg.Prefetch := in_msg.Prefetch; + } + } + } + action(d_issuePUTX, "d", desc="Issue PUTX") { // enqueue(writebackNetwork_out, RequestMsg, latency=request_latency) { enqueue(requestNetwork_out, RequestMsg, latency=request_latency) { @@ -462,6 +505,17 @@ } } + action(df_issuePUTF, "df", desc="Issue PUTF") { + enqueue(requestNetwork_out, RequestMsg, latency=request_latency) { + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:PUTF; + out_msg.Requestor := machineID; + out_msg.Destination.add(mapAddressToRange(address, MachineType:L2Cache, + l2_select_low_bit, l2_select_num_bits)); + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + action(dd_issuePUTO, "\d", desc="Issue PUTO") { // enqueue(writebackNetwork_out, RequestMsg, latency=request_latency) { enqueue(requestNetwork_out, RequestMsg, latency=request_latency) { @@ -641,6 +695,12 @@ cache_entry.Dirty := true; } + action(hh_flush_hit, "\hf", desc="Notify sequencer that flush completed.") { + assert(is_valid(tbe)); + DPRINTF(RubySlicc, "%s\n", tbe.DataBlk); + sequencer.writeCallback(address, GenericMachineType:L1Cache,tbe.DataBlk); + } + action(i_allocateTBE, "i", desc="Allocate TBE") { check_allocate(TBEs); TBEs.allocate(address); @@ -650,11 +710,19 @@ tbe.Dirty := cache_entry.Dirty; } + action(it_allocateTBE, "it", desc="Allocate TBE") { + check_allocate(TBEs); + TBEs.allocate(address); + set_tbe(TBEs[address]); + tbe.Dirty := false; + } + action(j_popTriggerQueue, "j", desc="Pop trigger queue.") { triggerQueue_in.dequeue(); } action(jj_unsetUseTimer, "\jj", desc="Unset use timer.") { + DPRINTF(RubySlicc, "L1 unsetting UseTimeout on Address %s\n", address); useTimerTable.unset(address); } @@ -681,6 +749,18 @@ } } + action(mm_decrementNumberOfMessagesByOne, "p", desc="Decrement the number of messages for which we're waiting by one") { + assert(is_valid(tbe)); + DPRINTF(RubySlicc, "L1 decrementNumberOfMessagesByOne: %d\n", tbe.NumPendingMsgs); + tbe.NumPendingMsgs := tbe.NumPendingMsgs - 1; + } + + action(mm_incrementNumberOfMessagesByOne, "\p", desc="Increment the number of messages for which we're waiting by one") { + assert(is_valid(tbe)); + DPRINTF(RubySlicc, "L1 incrementNumberOfMessagesByOne: %d\n", tbe.NumPendingMsgs); + tbe.NumPendingMsgs := tbe.NumPendingMsgs + 1; + } + action(n_popResponseQueue, "n", desc="Pop response queue") { responseToL1Cache_in.dequeue(); } @@ -696,6 +776,7 @@ } action(o_scheduleUseTimeout, "oo", desc="Schedule a use timeout.") { + DPRINTF(RubySlicc, "L1 scheduling UseTimeout on Address %s \n", address); useTimerTable.set(address, 50); } @@ -821,6 +902,23 @@ } } + action(uf_writeDataToCacheTBE, "uf", desc="Write data to TBE") { + peek(responseToL1Cache_in, ResponseMsg) { + assert(is_valid(tbe)); + tbe.DataBlk := in_msg.DataBlk; + tbe.Dirty := in_msg.Dirty; + } + } + + action(uf_writeDataToCacheTBEVerify, "ufv", desc="Write data to TBE") { + peek(responseToL1Cache_in, ResponseMsg) { + assert(is_valid(tbe)); + assert(tbe.DataBlk == in_msg.DataBlk); + tbe.DataBlk := in_msg.DataBlk; + tbe.Dirty := in_msg.Dirty; + } + } + action(v_writeDataToCacheVerify, "v", desc="Write data to cache, assert it was same as before") { peek(responseToL1Cache_in, ResponseMsg) { assert(is_valid(cache_entry)); @@ -877,15 +975,19 @@ //***************************************************** // Transitions for Load/Store/L2_Replacement from transient states - transition({IM, SM, OM, IS, OI, SI, MI, II}, {Store, L1_Replacement}) { + transition({IM, IM_F, MM_F, SM, SM_F, OM, OM_F, IS, OI, SI, MI, II}, {Store, L1_Replacement}) { zz_recycleMandatoryQueue; } - transition({M_W, MM_W}, L1_Replacement) { + transition({IM, IM_F, MM_F, MI_F, SM, SM_F, OM, OM_F, IS, OI, SI, MI, II}, {Flush_line}) { zz_recycleMandatoryQueue; } - transition({M_W, MM_W}, {Fwd_GETS, Fwd_DMA, Fwd_GETX, Own_GETX, Inv}) { + transition({M_W, MM_W, MM_WF}, L1_Replacement) { + zz_recycleMandatoryQueue; + } + + transition({M_W, MM_W, MM_WF}, {Fwd_GETS, Fwd_DMA, Fwd_GETX, Fwd_GETF, Own_GETX, Own_GETF, Inv, Flush_line}) { z_recycleRequestQueue; } @@ -918,6 +1020,13 @@ k_popMandatoryQueue; } + transition(I, Flush_line, IM_F) { + it_allocateTBE; + bf_issueGETF; + uu_profileMiss; + k_popMandatoryQueue; + } + transition(I, L1_Replacement) { kk_deallocateL1CacheBlock; } @@ -940,6 +1049,15 @@ k_popMandatoryQueue; } + transition(S, Flush_line, SM_F) { + i_allocateTBE; + bf_issueGETF; + uu_profileMiss; + forward_eviction_to_cpu; + kk_deallocateL1CacheBlock; + k_popMandatoryQueue; + } + transition(S, L1_Replacement, SI) { i_allocateTBE; dd_issuePUTS; @@ -977,6 +1095,15 @@ k_popMandatoryQueue; } + transition(O, Flush_line, OM_F) { + i_allocateTBE; + bf_issueGETF; + uu_profileMiss; + forward_eviction_to_cpu; + kk_deallocateL1CacheBlock; + k_popMandatoryQueue; + } + transition(O, L1_Replacement, OI) { i_allocateTBE; dd_issuePUTO; @@ -990,6 +1117,12 @@ l_popForwardQueue; } + transition(O, Fwd_GETF, I) { + ee_sendDataExclusive; + forward_eviction_to_cpu; + l_popForwardQueue; + } + transition(O, Fwd_GETS) { e_sendData; l_popForwardQueue; @@ -1012,6 +1145,15 @@ k_popMandatoryQueue; } + transition(MM, Flush_line, MI_F) { + i_allocateTBE; + hh_flush_hit; + df_issuePUTF; + forward_eviction_to_cpu; + kk_deallocateL1CacheBlock; + k_popMandatoryQueue; + } + transition(MM, L1_Replacement, MI) { i_allocateTBE; d_issuePUTX; @@ -1031,12 +1173,36 @@ l_popForwardQueue; } - transition(MM, Fwd_DMA, MM) { + transition(MM, Fwd_DMA) { e_sendData; ub_dmaUnblockL2Cache; l_popForwardQueue; } + // Transitions from MM_F + transition(MM_F, Block_Ack, MI_F) { + df_issuePUTF; + l_popForwardQueue; + } + + transition(MM_F, Fwd_GETX, IM_F) { + q_sendExclusiveDataFromTBEToCache; + mm_incrementNumberOfMessagesByOne; + l_popForwardQueue; + } + + transition(MM_F, Fwd_GETS, IM_F) { + q_sendExclusiveDataFromTBEToCache; + mm_incrementNumberOfMessagesByOne; + l_popForwardQueue; + } + + transition(MM_F, Fwd_DMA, OM_F) { + q_sendDataFromTBEToCache; + ub_dmaUnblockL2Cache; + l_popForwardQueue; + } + // Transitions from M transition({M, M_W}, {Load, Ifetch}) { h_load_hit; @@ -1053,6 +1219,15 @@ k_popMandatoryQueue; } + transition(M, Flush_line, MI_F) { + i_allocateTBE; + hh_flush_hit; + df_issuePUTF; + forward_eviction_to_cpu; + kk_deallocateL1CacheBlock; + k_popMandatoryQueue; + } + transition(M, L1_Replacement, MI) { i_allocateTBE; d_issuePUTX; @@ -1085,7 +1260,7 @@ l_popForwardQueue; } - transition(IM, Ack) { + transition({IM, IM_F}, Ack) { m_decrementNumberOfMessages; o_checkForCompletion; n_popResponseQueue; @@ -1098,6 +1273,27 @@ n_popResponseQueue; } + transition(IM_F, {Exclusive_Data, Data}, OM_F) { + uf_writeDataToCacheTBE; + m_decrementNumberOfMessages; + o_checkForCompletion; + n_popResponseQueue; + } + + // L2 must be currently issuing PUTF to Dir + // give it time to finish otherwise FLUSH + // will be reissued + transition(IM_F, Writeback_Nack) { + hh_flush_hit; + o_scheduleUseTimeout; + l_popForwardQueue; + } + + transition(IM_F, Use_Timeout, I) { + s_deallocateTBE; + jj_unsetUseTimer; + } + // Transitions from SM transition(SM, Inv, IM) { f_sendAck; @@ -1105,7 +1301,13 @@ l_popForwardQueue; } - transition(SM, Ack) { + transition(SM_F, Inv, IM_F) { + f_sendAck; + forward_eviction_to_cpu; + l_popForwardQueue; + } + + transition({SM, SM_F}, Ack) { m_decrementNumberOfMessages; o_checkForCompletion; n_popResponseQueue; @@ -1118,44 +1320,60 @@ n_popResponseQueue; } - transition(SM, Fwd_GETS) { + transition(SM_F, {Data, Exclusive_Data}, OM_F) { + uf_writeDataToCacheTBEVerify; + m_decrementNumberOfMessages; + o_checkForCompletion; + n_popResponseQueue; + } + + transition({SM, SM_F}, Fwd_GETS) { e_sendData; l_popForwardQueue; } - transition(SM, Fwd_DMA) { + transition({SM, SM_F}, Fwd_DMA) { e_sendData; ub_dmaUnblockL2Cache; l_popForwardQueue; } // Transitions from OM - transition(OM, Own_GETX) { + transition({OM}, Own_GETX) { mm_decrementNumberOfMessages; o_checkForCompletion; l_popForwardQueue; } - // transition(OM, Fwd_GETX, OMF) { transition(OM, Fwd_GETX, IM) { ee_sendDataExclusive; l_popForwardQueue; } - transition(OM, Fwd_GETS) { + transition({OM_F}, Own_GETF) { + mm_decrementNumberOfMessages; + o_checkForCompletion; + l_popForwardQueue; + } + + transition(OM_F, Fwd_GETX, IM_F) { + ee_sendDataExclusive; + l_popForwardQueue; + } + + transition({OM, OM_F}, Fwd_GETS) { e_sendData; l_popForwardQueue; } - transition(OM, Fwd_DMA) { + transition({OM, OM_F}, Fwd_DMA) { e_sendData; ub_dmaUnblockL2Cache; l_popForwardQueue; } - //transition({OM, OMF}, Ack) { - transition(OM, Ack) { + transition({OM, OM_F}, Ack) { m_decrementNumberOfMessages; o_checkForCompletion; n_popResponseQueue; @@ -1169,10 +1387,25 @@ j_popTriggerQueue; } + //transition(OM_F, All_acks, MI_F) { + transition(OM_F, All_acks, MM_WF) { + hh_flush_hit; + gg_sendUnblockExclusive; + //df_issuePUTF; + o_scheduleUseTimeout; + j_popTriggerQueue; + } + transition(MM_W, Use_Timeout, MM) { jj_unsetUseTimer; } + //transition(MM_WF, Use_Timeout, MM_F) { + transition(MM_WF, Use_Timeout, MI_F) { + df_issuePUTF; + jj_unsetUseTimer; + } + // Transitions from IS transition(IS, Inv) { @@ -1249,6 +1482,19 @@ l_popForwardQueue; } + transition({MI_F}, Writeback_Ack, I) { + hh_flush_hit; + g_sendUnblock; + s_deallocateTBE; + l_popForwardQueue; + } + + transition({MI_F}, Writeback_Ack_Data, I) { + qq_sendWBDataFromTBEToL2; + s_deallocateTBE; + l_popForwardQueue; + } + transition({MI, OI}, Writeback_Nack, OI) { // FIXME: This might cause deadlock by re-using the writeback // channel, we should handle this case differently. diff --git a/src/mem/protocol/MOESI_CMP_directory-L2cache.sm b/src/mem/protocol/MOESI_CMP_directory-L2cache.sm --- a/src/mem/protocol/MOESI_CMP_directory-L2cache.sm +++ b/src/mem/protocol/MOESI_CMP_directory-L2cache.sm @@ -68,17 +68,21 @@ OLSX, AccessPermission:Read_Only, desc="Owned with local sharers, chip is exclusive"; SLS, AccessPermission:Read_Only, desc="Shared with local sharers"; M, AccessPermission:Read_Write, desc="Modified"; - + M_FL, AccessPermission:Busy, desc="Modified pending Flush"; // Transient States - IFGX, AccessPermission:Busy, desc="Blocked, forwarded global GETX to local owner/exclusive. No other on-chip invs needed"; + IFGX, AccessPermission:Busy, desc="Blocked, forwarded global GETX to local owner/exclusive. No other on-chip invs needed"; IFGS, AccessPermission:Busy, desc="Blocked, forwarded global GETS to local owner"; ISFGS, AccessPermission:Busy, desc="Blocked, forwarded global GETS to local owner, local sharers exist"; + IFGF, AccessPermission:Busy, desc="Blocked, forwarded global GETF to local owner/exclusive. No other on-chip invs needed"; // UNUSED IFGXX, AccessPermission:Busy, desc="Blocked, forwarded global GETX to local owner but may need acks from other sharers"; + IFGFF, AccessPermission:Busy, desc="Blocked, forwarded global GETF to local owner but may need acks from other sharers"; + OFGX, AccessPermission:Busy, desc="Blocked, forwarded global GETX to owner and got data but may need acks"; OLSF, AccessPermission:Busy, desc="Blocked, got Fwd_GETX with local sharers, waiting for local inv acks"; + OLSFF, AccessPermission:Busy, desc="Blocked, got Fwd_GETF with local sharers, waiting for local inv acks"; // writebacks ILOW, AccessPermission:Busy, desc="local WB request, was ILO"; @@ -102,6 +106,15 @@ IFLOSX, AccessPermission:Busy, desc="Blocked, forwarded local GETS to local owner w/ other sharers, chip is exclusive"; IFLXO, AccessPermission:Busy, desc="Blocked, forwarded local GETX to local owner with other sharers, chip is exclusive"; + // Flushing States + ILOW_FL, AccessPermission:Busy, desc="local WB Flush Request, was ILO"; + ILOXW_FL, AccessPermission:Busy, desc="local WB Flush Request, was ILOX"; + ILXW_FL, AccessPermission:Busy, desc="local WB Flush Request, was ILX"; + + IFLOX_FL, AccessPermission:Busy, desc="Blocked, forwarded local GETF to local owner but chip is exclusive"; + IFLOSX_FL, AccessPermission:Busy, desc="Blocked, forwarded local GETF to local owner w/ other sharers, chip is exclusive"; + IFLXO_FL, AccessPermission:Busy, desc="Blocked, forwarded local GETF to local owner with other sharers, chip is exclusive"; + IGS, AccessPermission:Busy, desc="Semi-blocked, issued local GETS to directory"; IGM, AccessPermission:Busy, desc="Blocked, issued local GETX to directory. Need global acks and data"; IGMLS, AccessPermission:Busy, desc="Blocked, issued local GETX to directory but may need to INV local sharers"; @@ -112,6 +125,16 @@ IGMIOFS, AccessPermission:Busy, desc="Blocked, issued local GETX, local owner, waiting for global acks, got Fwd_GETS"; OGMIOF, AccessPermission:Busy, desc="Blocked, issued local GETX, was owner, waiting for global acks, got Fwd_GETX"; + // Flushing States + IGM_FL, AccessPermission:Busy, desc="Blocked, issued local GETF to directory. Need global acks and data"; + IGMLS_FL, AccessPermission:Busy, desc="Blocked, issued local GETF to directory but may need to INV local sharers"; + IGMO_FL, AccessPermission:Busy, desc="Blocked, have data for local GETF but need all acks"; + IGMIO_FL, AccessPermission:Busy, desc="Blocked, issued local GETF, local owner with possible local sharer, may need to INV"; + OGMIO_FL, AccessPermission:Busy, desc="Blocked, issued local GETF, was owner, may need to INV"; + IGMIOF_FL, AccessPermission:Busy, desc="Blocked, issued local GETF, local owner, waiting for global acks, got Fwd_GETX"; + IGMIOFS_FL, AccessPermission:Busy, desc="Blocked, issued local GETF, local owner, waiting for global acks, got Fwd_GETS"; + OGMIOF_FL, AccessPermission:Busy, desc="Blocked, issued local GETF, was owner, waiting for globals acks, got Fwd_GETX"; + II, AccessPermission:Busy, desc="Blocked, handling invalidations"; MM, AccessPermission:Busy, desc="Blocked, was M satisfying local GETX"; SS, AccessPermission:Busy, desc="Blocked, was S satisfying local GETS"; @@ -126,6 +149,12 @@ OLSI, AccessPermission:Busy, desc="Blocked, doing writeback, was OLS"; ILSI, AccessPermission:Busy, desc="Blocked, doing writeback, was OLS got Fwd_GETX"; + MM_FL, AccessPermission:Busy, desc="Blocked, was M satisfying local GETF"; + OO_FL, AccessPermission:Busy, desc="Blocked, was O satisfying local GETS"; + MI_FL, AccessPermission:Busy, desc="Blocked, doing writeback, was M_FL"; + MII_FL, AccessPermission:Busy, desc="Blocked, doing writeback, was M, got Fwd_GETF"; + ILSI_FL, AccessPermission:Busy, desc="Blocked, doing writeback, was OLS got Fwd_GETF"; + // DMA blocking states ILOSD, AccessPermission:Busy, desc="Blocked, waiting for DMA ack"; ILOSXD, AccessPermission:Busy, desc="Blocked, waiting for DMA ack"; @@ -140,14 +169,18 @@ // Requests L1_GETS, desc="local L1 GETS request"; L1_GETX, desc="local L1 GETX request"; + L1_GETF, desc="local L1 GETF request"; L1_PUTO, desc="local owner wants to writeback"; L1_PUTX, desc="local exclusive wants to writeback"; + L1_PUTF, desc="local exclusive wants to writeback"; L1_PUTS_only, desc="only local sharer wants to writeback"; L1_PUTS, desc="local sharer wants to writeback"; Fwd_GETX, desc="A GetX from another processor"; + Fwd_GETF, desc="A GetF from another processor"; Fwd_GETS, desc="A GetS from another processor"; Fwd_DMA, desc="A request from DMA"; Own_GETX, desc="A GetX from this node"; + Own_GETF, desc="A GetF from this node"; Inv, desc="Invalidations from the directory"; // Responses @@ -164,11 +197,12 @@ Unblock, desc="Local L1 is telling L2 dir to unblock"; Exclusive_Unblock, desc="Local L1 is telling L2 dir to unblock"; + Block_Ack, desc="the directory is blocked and ready for the flush"; DmaAck, desc="DMA ack from local L1"; // events initiated by this L2 L2_Replacement, desc="L2 Replacement", format="!r"; - + L2_PUTF, desc="L2 Flushing"; } // TYPES @@ -549,7 +583,10 @@ if (in_msg.Type == TriggerType:ALL_ACKS) { trigger(Event:All_Acks, in_msg.Address, getCacheEntry(in_msg.Address), TBEs[in_msg.Address]); - } else { + } else if (in_msg.Type == TriggerType:L2_FLUSH) { + trigger(Event:L2_PUTF, in_msg.Address, + getCacheEntry(in_msg.Address), TBEs[in_msg.Address]); + } else { error("Unexpected message"); } } @@ -569,7 +606,15 @@ trigger(Event:Fwd_GETX, in_msg.Address, getCacheEntry(in_msg.Address), TBEs[in_msg.Address]); } - } else if (in_msg.Type == CoherenceRequestType:GETS) { + } else if (in_msg.Type == CoherenceRequestType:GETF) { + if (in_msg.Requestor == machineID) { + trigger(Event:Own_GETF, in_msg.Address, + getCacheEntry(in_msg.Address), TBEs[in_msg.Address]); + } else { + trigger(Event:Fwd_GETF, in_msg.Address, + getCacheEntry(in_msg.Address), TBEs[in_msg.Address]); + } + } else if (in_msg.Type == CoherenceRequestType:GETS) { trigger(Event:Fwd_GETS, in_msg.Address, getCacheEntry(in_msg.Address), TBEs[in_msg.Address]); } else if(in_msg.Type == CoherenceRequestType:DMA_READ) { @@ -584,7 +629,10 @@ } else if (in_msg.Type == CoherenceRequestType:WB_NACK) { trigger(Event:Writeback_Nack, in_msg.Address, getCacheEntry(in_msg.Address), TBEs[in_msg.Address]); - } else { + } else if (in_msg.Type == CoherenceRequestType:BLOCK_ACK) { + trigger(Event:Block_Ack, in_msg.Address, + getCacheEntry(in_msg.Address), TBEs[in_msg.Address]); + } else { error("Unexpected message"); } } @@ -598,7 +646,10 @@ if (in_msg.Type == CoherenceRequestType:GETX) { trigger(Event:L1_GETX, in_msg.Address, getCacheEntry(in_msg.Address), TBEs[in_msg.Address]); - } else if (in_msg.Type == CoherenceRequestType:GETS) { + } else if (in_msg.Type == CoherenceRequestType:GETF) { + trigger(Event:L1_GETF, in_msg.Address, + getCacheEntry(in_msg.Address), TBEs[in_msg.Address]); + } else if (in_msg.Type == CoherenceRequestType:GETS) { trigger(Event:L1_GETS, in_msg.Address, getCacheEntry(in_msg.Address), TBEs[in_msg.Address]); } else if (in_msg.Type == CoherenceRequestType:PUTO) { @@ -607,7 +658,10 @@ } else if (in_msg.Type == CoherenceRequestType:PUTX) { trigger(Event:L1_PUTX, in_msg.Address, getCacheEntry(in_msg.Address), TBEs[in_msg.Address]); - } else if (in_msg.Type == CoherenceRequestType:PUTS) { + } else if (in_msg.Type == CoherenceRequestType:PUTF) { + trigger(Event:L1_PUTF, in_msg.Address, + getCacheEntry(in_msg.Address), TBEs[in_msg.Address]); + } else if (in_msg.Type == CoherenceRequestType:PUTS) { Entry cache_entry := getCacheEntry(in_msg.Address); if (isOnlySharer(cache_entry, in_msg.Address, in_msg.Requestor)) { trigger(Event:L1_PUTS_only, in_msg.Address, @@ -714,6 +768,19 @@ } } + action(a_issueGETF, "\af", desc="issue local request globally") { + peek(L1requestNetwork_in, RequestMsg) { + enqueue(globalRequestNetwork_out, RequestMsg, latency=request_latency) { + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:GETF; + out_msg.RequestorMachine := MachineType:L2Cache; + out_msg.Requestor := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Request_Control; + } + } + } + action(b_issuePUTX, "b", desc="Issue PUTX") { enqueue(globalRequestNetwork_out, RequestMsg, latency=request_latency) { out_msg.Address := address; @@ -736,6 +803,17 @@ } } + action(bf_issuePUTF, "\bf", desc="Issue PUTF") { + enqueue(globalRequestNetwork_out, RequestMsg, latency=request_latency) { + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:PUTF; + out_msg.Requestor := machineID; + out_msg.RequestorMachine := MachineType:L2Cache; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + /* PUTO, but local sharers exist */ action(b_issuePUTO_ls, "\bb", desc="Issue PUTO") { enqueue(globalRequestNetwork_out, RequestMsg, latency=request_latency) { @@ -1138,6 +1216,12 @@ } } + action(gg_recordLocalExclusive, "\ggg", desc="Record new local exclusive sharer fromunblock message") { + peek(L1requestNetwork_in, RequestMsg) { + recordNewLocalExclusiveInDir(cache_entry, address, in_msg.Requestor); + } + } + action(gg_clearLocalSharers, "gg", desc="Clear local sharers") { removeAllLocalSharersFromDir(cache_entry, address); } @@ -1254,6 +1338,19 @@ } } + action(k_forwardLocalGETFToLocalOwner, "\kf", desc="Forward local request to local owner") { + enqueue( localRequestNetwork_out, RequestMsg, latency=response_latency ) { + assert(is_valid(tbe)); + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:GETF; + out_msg.Requestor := tbe.L1_GetX_ID; + out_msg.RequestorMachine := MachineType:L1Cache; + out_msg.Destination.add(localDirectory[address].Owner); + out_msg.MessageSize := MessageSizeType:Forwarded_Control; + out_msg.Acks := 1 + tbe.Local_GETX_IntAcks; + } + } + // same as previous except that it assumes to TBE is present to get number of acks action(kk_forwardLocalGETXToLocalExclusive, "kk", desc="Forward local request to local owner") { peek(L1requestNetwork_in, RequestMsg) { @@ -1269,6 +1366,20 @@ } } + action(kk_forwardLocalGETFToLocalExclusive, "kkf", desc="Forward local request to local owner") { + peek(L1requestNetwork_in, RequestMsg) { + enqueue( localRequestNetwork_out, RequestMsg, latency=response_latency ) { + out_msg.Address := in_msg.Address; + out_msg.Type := CoherenceRequestType:GETF; + out_msg.Requestor := in_msg.Requestor; + out_msg.RequestorMachine := MachineType:L1Cache; + out_msg.Destination.add(getLocalOwner(cache_entry, in_msg.Address)); + out_msg.MessageSize := MessageSizeType:Forwarded_Control; + out_msg.Acks := 1; + } + } + } + action(kk_forwardLocalGETSToLocalOwner, "\kk", desc="Forward local request to local owner") { peek(L1requestNetwork_in, RequestMsg) { enqueue( localRequestNetwork_out, RequestMsg, latency=response_latency ) { @@ -1382,6 +1493,12 @@ } } + action(o_scheduleFlushLine, "\oof", desc="Schedule a self-Flush of the cache line") { + enqueue(triggerQueue_out, TriggerMsg) { + out_msg.Address := address; + out_msg.Type := TriggerType:L2_FLUSH; + } + } action( qq_sendDataFromTBEToMemory, "qq", desc="Send data from TBE to directory") { enqueue(responseNetwork_out, ResponseMsg, latency=response_latency) { @@ -1533,19 +1650,19 @@ // TRANSITIONS //***************************************************** - transition({II, IFGX, IFGS, ISFGS, IFGXX, IFLXO, OFGX, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX, OLSXS, IGS, IGM, IGMLS, IGMO, IGMIO, OGMIO, IGMIOF, OGMIOF, MM, SS, OO, OI, MI, MII, OLSI, ILSI, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {L1_PUTO, L1_PUTS, L1_PUTS_only, L1_PUTX}) { + transition({II, IFGX, IFGS, ISFGS, IFGXX, IFLXO, IFLXO_FL, OFGX, ILOW, ILOXW, ILOSW, ILOSXW, ILOW_FL, ILOXW_FL, ILXW_FL, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX, OLSXS, IGS, IGM, IGMLS, IGMO, IGMIO, OGMIO, IGMIOF, OGMIOF, MM, SS, OO, MM_FL, OO_FL, OI, MI, MII, OLSI, ILSI, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {L1_PUTO, L1_PUTS, L1_PUTS_only, L1_PUTX}) { zz_recycleL1RequestQueue; } - transition({II, IFGX, IFGS, ISFGS, IFGXX, IFLXO, OFGX, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX, OLSXS, IGS, IGM, IGMLS, IGMO, IGMIO, OGMIO, IGMIOF, OGMIOF, MM, SS, OO, OI, MI, MII, OLSI, ILSI, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {L1_GETX, L1_GETS}) { + transition({II, IFGX, IFGS, ISFGS, IFGXX, IFLXO, IFLXO_FL, OFGX, ILOW, ILOXW, ILOSW, ILOSXW, ILOW_FL, ILOXW_FL, ILXW_FL, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX, OLSXS, IGS, IGM, IGMLS, IGMO, IGMIO, OGMIO, IGMIOF, OGMIOF, MM, SS, OO, MM_FL, OO_FL, OI, MI, MII, OLSI, ILSI, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {L1_GETX, L1_GETS, L1_GETF}) { zz_recycleL1RequestQueue; } - transition({IFGX, IFGS, ISFGS, IFGXX, IFLXO, OFGX, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, ILXW, OW, SW, OXW, OLSXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX,OLSXS, IGS, IGM, IGMLS, IGMO, MM, SS, OO, OI, MI, MII, OLSI, ILSI, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, L2_Replacement) { + transition({IFGX, IFGS, ISFGS, IFGXX, IFLXO, IFLXO_FL, OFGX, ILOW, ILOXW, ILOSW, ILOSXW, ILOW_FL, ILOXW_FL, ILXW_FL, SLSW, OLSW, ILSW, IW, ILXW, OW, SW, OXW, OLSXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX,OLSXS, IGS, IGM, IGMLS, IGMO, MM, SS, OO, MM_FL, OO_FL, OI, MI, MII, OLSI, ILSI, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, L2_Replacement) { zz_recycleResponseQueue; } - transition({IFGX, IFGS, ISFGS, IFGXX, IFLXO, OFGX, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX,OLSXS, IGS, IGM, MM, SS, OO, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {Fwd_GETX, Fwd_GETS, Fwd_DMA}) { + transition({IFGX, IFGS, ISFGS, IFGXX, IFLXO, IFLXO_FL, OFGX, ILOW, ILOXW, ILOSW, ILOSXW, ILOW_FL, ILOXW_FL, ILXW_FL, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX,OLSXS, IGS, IGM, MM, SS, OO, MM_FL, OO_FL, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {Fwd_GETX, Fwd_GETS, Fwd_DMA}) { zz_recycleRequestQueue; } @@ -1553,7 +1670,7 @@ zz_recycleRequestQueue; } - transition({IFGX, IFGS, ISFGS, IFGXX, IFLXO, OFGX, ILOW, ILOXW, ILOSW, ILOSXW, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX,OLSXS, MM, SS, OO, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {Inv}) { + transition({IFGX, IFGS, ISFGS, IFGXX, IFLXO, IFLXO_FL, OFGX, ILOW, ILOXW, ILOSW, ILOSXW, ILOW_FL, ILOXW_FL, ILXW_FL, SLSW, OLSW, ILSW, IW, OW, SW, OXW, OLSXW, ILXW, IFLS, IFLO, IFLOX, IFLOXX, IFLOSX,OLSXS, MM, SS, OO, MM_FL, OO_FL, SLSS, OLSS, OLSF, IGMIOFS, ILOSD, ILOSXD, ILOD, ILXD, ILOXD}, {Inv}) { zz_recycleRequestQueue; } @@ -1723,6 +1840,13 @@ m_popRequestQueue; } + transition({ILX, ILO, ILOX}, Fwd_GETF, IFGF) { + i_allocateTBE; + t_recordFwdXID; + j_forwardGlobalRequestToLocalOwner; + m_popRequestQueue; + } + transition(IFGX, {Data_Exclusive, Data}, I) { i_copyDataToTBE; c_sendDataFromTBEToFwdGETX; @@ -1731,6 +1855,14 @@ n_popResponseQueue; } + transition(IFGF, {Data_Exclusive, Data}, I) { + i_copyDataToTBE; + c_sendDataFromTBEToFwdGETX; + gg_clearLocalSharers; + s_deallocateTBE; + n_popResponseQueue; + } + transition({ILOSX, ILOS}, Fwd_GETX, IFGXX) { i_allocateTBE; t_recordFwdXID; @@ -1740,21 +1872,29 @@ m_popRequestQueue; } - - transition(IFGXX, IntAck) { + transition({ILOSX, ILOS}, Fwd_GETF, IFGFF) { + i_allocateTBE; + t_recordFwdXID; + j_forwardGlobalRequestToLocalOwner; + ee_sendLocalInvSharersOnly; + ee_addLocalIntAck; + m_popRequestQueue; + } + + transition({IFGXX, IFGFF}, IntAck) { m_decrementNumberOfMessagesInt; o_checkForIntCompletion; n_popResponseQueue; } - transition(IFGXX, Data_Exclusive) { + transition({IFGXX, IFGFF}, Data_Exclusive) { i_copyDataToTBE; m_decrementNumberOfMessagesInt; o_checkForIntCompletion; n_popResponseQueue; } - transition(IFGXX, All_Acks, I) { + transition({IFGXX, IFGFF}, All_Acks, I) { c_sendDataFromTBEToFwdGETX; gg_clearLocalSharers; s_deallocateTBE; @@ -1770,6 +1910,13 @@ m_popRequestQueue; } + transition(O, Fwd_GETF, I) { + dd_sendDataToFwdGETX; + y_copyCacheStateToDir; + rr_deallocateL2CacheBlock; + m_popRequestQueue; + } + transition({O, OLS}, Fwd_GETS) { dd_sendDataToFwdGETS; m_popRequestQueue; @@ -1799,6 +1946,12 @@ m_popRequestQueue; } + transition(M, Fwd_GETF, I) { + dd_sendDataToFwdGETX; + rr_deallocateL2CacheBlock; + m_popRequestQueue; + } + // MAKE THIS THE SAME POLICY FOR NOW // transition(M, Fwd_GETS, O) { @@ -1825,13 +1978,20 @@ m_popRequestQueue; } - transition(OLSF, IntAck) { + transition({OLS, OLSX}, Fwd_GETF, OLSFF) { + i_allocateTBE; + t_recordFwdXID; + ee_sendLocalInv; + m_popRequestQueue; + } + + transition({OLSF, OLSFF}, IntAck) { m_decrementNumberOfMessagesInt; o_checkForIntCompletion; n_popResponseQueue; } - transition(OLSF, All_Acks, I) { + transition({OLSF, OLSFF}, All_Acks, I) { c_sendDataFromTBEToFwdGETX; gg_clearLocalSharers; s_deallocateTBE; @@ -1906,7 +2066,7 @@ h_countLocalSharersExceptRequestor; // issue INVs to everyone except requestor ee_issueLocalInvExceptL1Requestor; - d_sendDataToL1GETX + d_sendDataToL1GETX; y_copyCacheStateToDir; r_setMRU; rr_deallocateL2CacheBlock; @@ -1914,7 +2074,22 @@ o_popL1RequestQueue; } - transition(IFLOX, Exclusive_Unblock, ILX) { + transition(OLSX, L1_GETF, IFLOX_FL) { + i_allocateTBE; + s_recordGetXL1ID; + // count number of INVs needed that doesn't include requestor + h_countLocalSharersExceptRequestor; + // issue INVs to everyone except requestor + ee_issueLocalInvExceptL1Requestor; + d_sendDataToL1GETX; + y_copyCacheStateToDir; + r_setMRU; + rr_deallocateL2CacheBlock; + uu_profileMiss; + o_popL1RequestQueue; + } + + transition({IFLOX, IFLOX_FL}, Exclusive_Unblock, ILX) { g_recordLocalExclusive; s_deallocateTBE; n_popResponseQueue; @@ -1936,9 +2111,13 @@ mm_decrementNumberOfMessagesExt; o_checkForExtCompletion; m_popRequestQueue; - - } - + } + + transition(IGMO_FL, Own_GETF) { + mm_decrementNumberOfMessagesExt; + o_checkForExtCompletion; + m_popRequestQueue; + } transition(ILX, L1_GETS, IFLOXX) { kk_forwardLocalGETSToLocalOwner; @@ -1971,6 +2150,14 @@ o_popL1RequestQueue; } + transition({ILX, ILOX}, L1_GETF, IFLOX_FL) { + i_allocateTBE; + kk_forwardLocalGETFToLocalExclusive; + e_sendAckToL1Requestor; + uu_profileMiss; + o_popL1RequestQueue; + } + transition(ILOX, L1_GETS, IFLOX) { kk_forwardLocalGETSToLocalOwner; uu_profileMiss; @@ -1997,7 +2184,7 @@ n_popResponseQueue; } - transition({IFLOSX, IFLOXX}, Exclusive_Unblock, ILX) { + transition({IFLOSX, IFLOXX, IFLOSX_FL}, Exclusive_Unblock, ILX) { g_recordLocalExclusive; n_popResponseQueue; } @@ -2019,15 +2206,32 @@ o_popL1RequestQueue; } + transition(ILOSX, L1_GETF, IFLXO_FL) { + i_allocateTBE; + s_recordGetXL1ID; + h_countLocalSharersExceptRequestor; + ee_issueLocalInvExceptL1Requestor; + k_forwardLocalGETFToLocalOwner; + e_sendAckToL1RequestorFromTBE; + uu_profileMiss; + o_popL1RequestQueue; + } + transition(IFLXO, Exclusive_Unblock, ILX) { g_recordLocalExclusive; s_deallocateTBE; n_popResponseQueue; } + transition(IFLXO_FL, Exclusive_Unblock, ILX) { + g_recordLocalExclusive; + s_deallocateTBE; + n_popResponseQueue; + } + // LOCAL REQUESTS THAT MUST ISSUE - transition(NP, {L1_PUTS, L1_PUTX, L1_PUTO}) { + transition(NP, {L1_PUTS, L1_PUTX, L1_PUTO, L1_PUTF}) { ll_writebackNack; o_popL1RequestQueue; } @@ -2048,6 +2252,14 @@ o_popL1RequestQueue; } + transition({NP, I}, L1_GETF, IGM_FL) { + i_allocateTBE; + s_recordGetXL1ID; + a_issueGETF; + uu_profileMiss; + o_popL1RequestQueue; + } + transition(S, L1_GETX, IGM) { i_allocateTBE; s_recordGetXL1ID; @@ -2059,6 +2271,17 @@ o_popL1RequestQueue; } + transition(S, L1_GETF, IGM_FL) { + i_allocateTBE; + s_recordGetXL1ID; + a_issueGETX; + y_copyCacheStateToDir; + r_setMRU; + rr_deallocateL2CacheBlock; + uu_profileMiss; + o_popL1RequestQueue; + } + transition(ILS, L1_GETX, IGMLS) { i_allocateTBE; s_recordGetXL1ID; @@ -2069,19 +2292,29 @@ o_popL1RequestQueue; } - transition(IGMLS, Inv) { + transition(ILS, L1_GETF, IGMLS_FL) { + i_allocateTBE; + s_recordGetXL1ID; + a_issueGETF; + // count number of INVs (just sharers?) needed that doesn't include requestor + h_countLocalSharersExceptRequestor; + uu_profileMiss; + o_popL1RequestQueue; + } + + transition({IGMLS, IGMLS_FL}, Inv) { t_recordFwdXID; ee_sendLocalInv; m_popRequestQueue; } - transition(IGMLS, IntAck) { + transition({IGMLS, IGMLS_FL}, IntAck) { m_decrementNumberOfMessagesInt; o_checkForIntCompletion; n_popResponseQueue; } - transition(IGMLS, All_Acks, IGM) { + transition({IGMLS, IGMLS_FL}, All_Acks, IGM) { gg_clearLocalSharers; h_clearIntAcks; e_sendAck; @@ -2089,7 +2322,7 @@ } // transition(IGMLS, ExtAck, IGMO) { - transition(IGMLS, ExtAck) { + transition({IGMLS, IGMLS_FL}, ExtAck) { m_decrementNumberOfMessagesExt; o_checkForExtCompletion; n_popResponseQueue; @@ -2103,6 +2336,13 @@ n_popResponseQueue; } + transition(IGMLS_FL, {Data, Data_Exclusive}, IGMO_FL) { + ee_issueLocalInvExceptL1RequestorInTBE; + i_copyDataToTBE; + m_decrementNumberOfMessagesExt; + o_checkForExtCompletion; + n_popResponseQueue; + } transition(ILOS, L1_GETX, IGMIO) { i_allocateTBE; @@ -2112,6 +2352,14 @@ o_popL1RequestQueue; } + transition(ILOS, L1_GETF, IGMIO_FL) { + i_allocateTBE; + s_recordGetXL1ID; + a_issueGETF; + uu_profileMiss; + o_popL1RequestQueue; + } + // new exclusive happened while sharer attempted writeback transition(ILX, {L1_PUTS, L1_PUTS_only, L1_PUTO}) { ll_writebackNack; @@ -2135,6 +2383,18 @@ o_popL1RequestQueue; } + transition(OLS, L1_GETF, OGMIO_FL) { + i_allocateTBE; + s_recordGetXL1ID; + a_issueGETF; + h_countLocalSharersExceptRequestor; + // COPY DATA FROM CACHE TO TBE (happens during i_allocateTBE) + y_copyCacheStateToDir; + rr_deallocateL2CacheBlock; + uu_profileMiss; + o_popL1RequestQueue; + } + transition(OGMIO, Fwd_GETS) { t_recordFwdSID; c_sendDataFromTBEToFwdGETS; @@ -2151,6 +2411,16 @@ o_popL1RequestQueue; } + transition(ILO, L1_GETF, IGMIO_FL) { + i_allocateTBE; + s_recordGetXL1ID; + a_issueGETF; + // the following, of course, returns 0 sharers but do anyways for consistency + h_countLocalSharersExceptRequestor; + uu_profileMiss; + o_popL1RequestQueue; + } + transition({ILO, ILOX}, L1_PUTS) { ll_writebackNack; o_popL1RequestQueue; @@ -2164,6 +2434,14 @@ m_popRequestQueue; } + transition(IGMIO, Fwd_GETF, IGMIOF_FL) { + t_recordFwdXID; + j_forwardGlobalRequestToLocalOwner; + ee_sendLocalInvSharersOnly; + ee_addLocalIntAck; + m_popRequestQueue; + } + transition(IGMIO, Fwd_GETS, IGMIOFS) { t_recordFwdSID; j_forwardGlobalRequestToLocalOwner; @@ -2176,13 +2454,27 @@ n_popResponseQueue; } + // Note: copy the data to the TBE, but don't send it to GETS requestor + // until GETF gets exc. copy, then forward the GETS + transition(IGMIOFS_FL, Data, IGMIO_FL) { + i_copyDataToTBE; + c_sendDataFromTBEToFwdGETS; + n_popResponseQueue; + } + transition(OGMIO, Fwd_GETX, OGMIOF) { t_recordFwdXID; ee_sendLocalInvSharersOnly; m_popRequestQueue; } - transition(OGMIOF, IntAck) { + transition(OGMIO, Fwd_GETF, OGMIOF_FL) { + t_recordFwdXID; + ee_sendLocalInvSharersOnly; + m_popRequestQueue; + } + + transition({OGMIOF, OGMIOF_FL}, IntAck) { m_decrementNumberOfMessagesInt; o_checkForIntCompletion; n_popResponseQueue; @@ -2195,13 +2487,20 @@ n_popTriggerQueue; } - transition(IGMIOF, IntAck) { + transition(OGMIOF_FL, All_Acks, IGM_FL) { + gg_clearLocalSharers; + hh_countLocalSharersExceptL1GETXRequestorInTBE; + c_sendDataFromTBEToFwdGETX; + n_popTriggerQueue; + } + + transition({IGMIOF, IGMIOF_FL}, IntAck) { m_decrementNumberOfMessagesInt; o_checkForIntCompletion; n_popResponseQueue; } - transition(IGMIOF, Data_Exclusive) { + transition({IGMIOF, IGMIOF_FL}, Data_Exclusive) { i_copyDataToTBE; m_decrementNumberOfMessagesInt; o_checkForIntCompletion; @@ -2214,6 +2513,12 @@ n_popTriggerQueue; } + transition(IGMIOF_FL, All_Acks, IGM_FL) { + gg_clearLocalSharers; + c_sendDataFromTBEToFwdGETX; + n_popTriggerQueue; + } + transition(IGMIO, All_Acks, IGMO) { hh_countLocalSharersExceptL1GETXRequestorInTBE; ee_issueLocalInvExceptL1RequestorInTBE; @@ -2222,17 +2527,36 @@ n_popTriggerQueue; } + transition(IGMIO_FL, All_Acks, IGMO_FL) { + hh_countLocalSharersExceptL1GETXRequestorInTBE; + ee_issueLocalInvExceptL1RequestorInTBE; + k_forwardLocalGETFToLocalOwner; + e_sendAckToL1RequestorFromTBE; + n_popTriggerQueue; + } + transition(OGMIO, All_Acks, IGMO) { ee_issueLocalInvExceptL1RequestorInTBE; c_sendDataFromTBEToL1GETX; n_popTriggerQueue; } + transition(OGMIO_FL, All_Acks, IGMO_FL) { + ee_issueLocalInvExceptL1RequestorInTBE; + c_sendDataFromTBEToL1GETX; + n_popTriggerQueue; + } + transition({IGMIO, OGMIO}, Own_GETX) { mm_decrementNumberOfMessagesExt; o_checkForExtCompletion; m_popRequestQueue; - + } + + transition({IGMIO_FL, OGMIO_FL}, Own_GETF) { + mm_decrementNumberOfMessagesExt; + o_checkForExtCompletion; + m_popRequestQueue; } transition(IGM, {Data, Data_Exclusive}, IGMO) { @@ -2242,6 +2566,13 @@ n_popResponseQueue; } + transition(IGM_FL, {Data, Data_Exclusive}, IGMO_FL) { + i_copyDataToTBE; + m_decrementNumberOfMessagesExt; + o_checkForExtCompletion; + n_popResponseQueue; + } + transition({IGM, IGMIO, OGMIO}, ExtAck) { m_decrementNumberOfMessagesExt; o_checkForExtCompletion; @@ -2254,6 +2585,12 @@ n_popResponseQueue; } + transition({IGM_FL, IGMIO_FL, OGMIO_FL, IGMO_FL}, ExtAck) { + m_decrementNumberOfMessagesExt; + o_checkForExtCompletion; + n_popResponseQueue; + } + transition(IGS, Data) { i_copyDataToTBE; m_decrementNumberOfMessagesExt; @@ -2282,7 +2619,7 @@ n_popResponseQueue; } - transition(IGMO, All_Acks) { + transition({IGMO, IGMO_FL}, All_Acks) { c_sendDataFromTBEToL1GETX; n_popTriggerQueue; } @@ -2294,6 +2631,12 @@ n_popResponseQueue; } + transition(IGMO_FL, Exclusive_Unblock, ILX) { + g_recordLocalExclusive; + f_sendExclusiveUnblock; + s_deallocateTBE; + n_popResponseQueue; + } transition(SLS, L1_GETX, IGMLS) { i_allocateTBE; @@ -2306,7 +2649,19 @@ rr_deallocateL2CacheBlock; uu_profileMiss; o_popL1RequestQueue; - + } + + transition(SLS, L1_GETF, IGMLS_FL) { + i_allocateTBE; + s_recordGetXL1ID; + a_issueGETF; + // count number of INVs needed that doesn't include requestor + h_countLocalSharersExceptRequestor; + // issue INVs to everyone except requestor + y_copyCacheStateToDir; + rr_deallocateL2CacheBlock; + uu_profileMiss; + o_popL1RequestQueue; } transition(SLS, L1_GETS, SLSS ) { @@ -2331,6 +2686,16 @@ o_popL1RequestQueue; } + transition(O, L1_GETF, IGMO_FL) { + i_allocateTBE; + s_recordGetXL1ID; + a_issueGETF; + y_copyCacheStateToDir; + rr_deallocateL2CacheBlock; + uu_profileMiss; + o_popL1RequestQueue; + } + transition(OLS, L1_GETS, OLSS) { d_sendDataToL1GETS; r_setMRU; @@ -2346,7 +2711,12 @@ t_recordFwdXID; c_sendDataFromTBEToFwdGETX; m_popRequestQueue; - + } + + transition(IGMO_FL, Fwd_GETF, IGM_FL) { + t_recordFwdXID; + c_sendDataFromTBEToFwdGETX; + m_popRequestQueue; } transition(IGMO, Fwd_GETS) { @@ -2369,11 +2739,27 @@ o_popL1RequestQueue; } + transition(M, L1_GETF, MM_FL) { + i_allocateTBE; + // should count 0 of course + h_countLocalSharersExceptRequestor; + d_sendDataToL1GETX; + y_copyCacheStateToDir; + rr_deallocateL2CacheBlock; + s_deallocateTBE; + o_popL1RequestQueue; + } + transition(MM, Exclusive_Unblock, ILX) { g_recordLocalExclusive; n_popResponseQueue; } + transition(MM_FL, Exclusive_Unblock, ILX) { + g_recordLocalExclusive; + n_popResponseQueue; + } + transition(M, L1_GETS, OO) { i_allocateTBE; // should count 0 of course @@ -2407,12 +2793,18 @@ } transition(OO, Exclusive_Unblock, ILX) { - g_recordLocalExclusive + g_recordLocalExclusive; y_copyCacheStateToDir; rr_deallocateL2CacheBlock; n_popResponseQueue; } + transition(OO_FL, Exclusive_Unblock, ILX) { + g_recordLocalExclusive; + y_copyCacheStateToDir; + rr_deallocateL2CacheBlock; + n_popResponseQueue; + } // L1 WRITEBACKS transition(ILO, L1_PUTO, ILOW) { @@ -2425,7 +2817,6 @@ o_popL1RequestQueue; } - transition(ILOS, L1_PUTO, ILOSW) { l_writebackAckNeedData; o_popL1RequestQueue; @@ -2436,6 +2827,15 @@ o_popL1RequestQueue; } + transition(ILO, L1_PUTF, ILOW_FL) { + l_writebackAckNeedData; + o_popL1RequestQueue; + } + + transition(ILOX, L1_PUTF, ILOXW_FL) { + l_writebackAckNeedData; + o_popL1RequestQueue; + } // hmmm...keep data or drop. Just drop for now transition(ILOS, L1_PUTS_only, ILOW) { @@ -2453,6 +2853,11 @@ n_popResponseQueue; } + transition(ILOW_FL, Unblock, ILO) { + gg_clearSharerFromL1Response; + n_popResponseQueue; + } + transition(ILOSX, L1_PUTS_only, ILOXW) { l_writebackAckDropData; o_popL1RequestQueue; @@ -2463,6 +2868,11 @@ n_popResponseQueue; } + transition(ILOXW_FL, Unblock, ILOX) { + gg_clearSharerFromL1Response; + n_popResponseQueue; + } + // hmmm...keep data or drop. Just drop for now transition(ILOS, L1_PUTS, ILOSW) { l_writebackAckDropData; @@ -2544,11 +2954,16 @@ n_popResponseQueue; } - transition(ILX, L1_PUTX, ILXW ) { + transition(ILX, L1_PUTX, ILXW) { l_writebackAckNeedData; o_popL1RequestQueue; } + transition(ILX, L1_PUTF, ILXW_FL) { + l_writebackAckNeedData; + o_popL1RequestQueue; + } + transition(ILXW, L1_WBDIRTYDATA, M) { gg_clearLocalSharers; vv_allocateL2CacheBlock; @@ -2557,6 +2972,27 @@ n_popResponseQueue; } + // Note: But now we need to write it back from L2 -> DRAM + transition(ILXW_FL, L1_WBDIRTYDATA, M_FL) { + gg_clearLocalSharers; + vv_allocateL2CacheBlock; + y_copyDirToCacheAndRemove; + u_writeDataToCache; + o_scheduleFlushLine; + n_popResponseQueue; + } + + // Note: clean writeback, we don't need to really write this back + // to main memory, but we do need to unblock it + // for now just writeback as well + transition(ILXW_FL, L1_WBCLEANDATA, M_FL) { + gg_clearLocalSharers; + vv_allocateL2CacheBlock; + y_copyDirToCacheAndRemove; + u_writeDataToCache; + n_popResponseQueue; + } + // clean writeback transition(ILXW, L1_WBCLEANDATA, M) { gg_clearLocalSharers; @@ -2571,6 +3007,12 @@ n_popResponseQueue; } + // Can this or should this be allowed to happen? + transition(ILXW_FL, Unblock, ILX) { + // writeback canceled because L1 invalidated + n_popResponseQueue; + } + transition(ILSW, L1_WBCLEANDATA, SLS) { vv_allocateL2CacheBlock; y_copyDirToCacheAndRemove; @@ -2613,6 +3055,14 @@ n_popResponseQueue; } + transition(ILOXW_FL, {L1_WBDIRTYDATA, L1_WBCLEANDATA}, M_FL) { + vv_allocateL2CacheBlock; + y_copyDirToCacheAndRemove; + gg_clearOwnerFromL1Response; + u_writeDataToCache; + n_popResponseQueue; + } + transition(ILOSW, {L1_WBCLEANDATA, L1_WBDIRTYDATA}, OLS) { vv_allocateL2CacheBlock; y_copyDirToCacheAndRemove; @@ -2686,13 +3136,68 @@ rr_deallocateL2CacheBlock; } + transition(M_FL, L2_Replacement, MI_FL) { + bf_issuePUTF; + i_allocateTBE; + rr_deallocateL2CacheBlock; + } + + transition(M_FL, L2_PUTF, MI_FL) { + bf_issuePUTF; + i_allocateTBE; + rr_deallocateL2CacheBlock; + n_popTriggerQueue; + } + + // Writeback to DIR and Nack L1 + transition(M_FL, L1_GETF, MI_FL) { + bf_issuePUTF; + i_allocateTBE; + ll_writebackNack; + rr_deallocateL2CacheBlock; + o_popL1RequestQueue; + } + + transition(M_FL, L1_GETX, MM) { + i_allocateTBE; + // should count 0 of course + h_countLocalSharersExceptRequestor; + d_sendDataToL1GETX; + y_copyCacheStateToDir; + rr_deallocateL2CacheBlock; + s_deallocateTBE; + o_popL1RequestQueue; + } + + transition(M_FL, L1_GETS, OO_FL) { + i_allocateTBE; + // should count 0 of course + h_countLocalSharersExceptRequestor; + d_sendDataToL1GETX; + r_setMRU; + s_deallocateTBE; + o_popL1RequestQueue; + } + + transition(M_FL, Fwd_GETX, I) { + dd_sendDataToFwdGETX; + rr_deallocateL2CacheBlock; + m_popRequestQueue; + } + transition(OLSI, Fwd_GETX, ILSI) { t_recordFwdXID; ee_sendLocalInv; m_popRequestQueue; } - transition(ILSI, IntAck) { + transition(OLSI, Fwd_GETF, ILSI_FL) { + t_recordFwdXID; + ee_sendLocalInv; + m_popRequestQueue; + } + + transition({ILSI, ILSI_FL}, IntAck) { m_decrementNumberOfMessagesInt; o_checkForIntCompletion; n_popResponseQueue; @@ -2704,6 +3209,12 @@ n_popTriggerQueue; } + transition(ILSI_FL, All_Acks, MII_FL) { + gg_clearLocalSharers; + c_sendDataFromTBEToFwdGETX; + n_popTriggerQueue; + } + transition(OLSI, Fwd_GETS) { t_recordFwdSID; c_sendDataFromTBEToFwdGETS; @@ -2722,6 +3233,12 @@ m_popRequestQueue; } + // Already doing a WB, Nack this redundant request + transition({MI_FL}, L1_GETF) { + ll_writebackNack; + o_popL1RequestQueue; + } + transition(OLSI, Fwd_DMA) { cd_sendDataFromTBEToFwdDma; da_sendDmaAckUnblock; @@ -2740,6 +3257,12 @@ m_popRequestQueue; } + transition(MI_FL, Writeback_Ack, I) { + qq_sendDataFromTBEToMemory; + s_deallocateTBE; + m_popRequestQueue; + } + transition(MII, Writeback_Nack, I) { s_deallocateTBE; m_popRequestQueue; diff --git a/src/mem/protocol/MOESI_CMP_directory-dir.sm b/src/mem/protocol/MOESI_CMP_directory-dir.sm --- a/src/mem/protocol/MOESI_CMP_directory-dir.sm +++ b/src/mem/protocol/MOESI_CMP_directory-dir.sm @@ -58,10 +58,12 @@ OO, AccessPermission:Busy, desc="Blocked, was in owned"; MO, AccessPermission:Busy, desc="Blocked, going to owner or maybe modified"; MM, AccessPermission:Busy, desc="Blocked, going to modified"; + MM_F, AccessPermission:Busy, desc="Blocked, going to modified"; MM_DMA, AccessPermission:Busy, desc="Blocked, going to I"; MI, AccessPermission:Busy, desc="Blocked on a writeback"; MIS, AccessPermission:Busy, desc="Blocked on a writeback, but don't remove from sharers when received"; + MIF, AccessPermission:Busy, desc="Blocked on a writeback due to Flush Req"; OS, AccessPermission:Busy, desc="Blocked on a writeback"; OSS, AccessPermission:Busy, desc="Blocked on a writeback, but don't remove from sharers when received"; @@ -69,6 +71,10 @@ XI_U, AccessPermission:Busy, desc="In a stable state, going to I, waiting for an unblock"; OI_D, AccessPermission:Busy, desc="In O, going to I, waiting for data"; + XI_MF, AccessPermission:Busy, desc="In a stable state, going to I, waiting for thememory controller"; + XI_UF, AccessPermission:Busy, desc="In a stable state, going to I, waiting for an unblock"; + OI_DF, AccessPermission:Busy, desc="In O, going to I, waiting for data"; + OD, AccessPermission:Busy, desc="In O, waiting for dma ack from L2"; MD, AccessPermission:Busy, desc="In M, waiting for dma ack from L2"; } @@ -77,6 +83,8 @@ enumeration(Event, desc="Directory events") { GETX, desc="A GETX arrives"; GETS, desc="A GETS arrives"; + GETF, desc="A GETF arrives"; + PUTF, desc="A PUTF arrives"; PUTX, desc="A PUTX arrives"; PUTO, desc="A PUTO arrives"; PUTO_SHARERS, desc="A PUTO arrives, but don't remove from sharers list"; @@ -265,7 +273,11 @@ trigger(Event:GETS, in_msg.Address, TBEs[in_msg.Address]); } else if (in_msg.Type == CoherenceRequestType:GETX) { trigger(Event:GETX, in_msg.Address, TBEs[in_msg.Address]); - } else if (in_msg.Type == CoherenceRequestType:PUTX) { + } else if (in_msg.Type == CoherenceRequestType:GETF) { + trigger(Event:GETF, in_msg.Address, TBEs[in_msg.Address]); + } else if (in_msg.Type == CoherenceRequestType:PUTF) { + trigger(Event:PUTF, in_msg.Address, TBEs[in_msg.Address]); + } else if (in_msg.Type == CoherenceRequestType:PUTX) { trigger(Event:PUTX, in_msg.Address, TBEs[in_msg.Address]); } else if (in_msg.Type == CoherenceRequestType:PUTO) { trigger(Event:PUTO, in_msg.Address, TBEs[in_msg.Address]); @@ -328,6 +340,18 @@ } } + action(bc_sendBlockAck, "bc", desc="Send block ack to the owner") { + peek(requestQueue_in, RequestMsg) { + enqueue(forwardNetwork_out, RequestMsg, latency=directory_latency) { + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:BLOCK_ACK; + out_msg.Requestor := in_msg.Requestor; + out_msg.Destination.add(in_msg.Requestor); + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + } + action(c_clearOwner, "c", desc="Clear the owner field") { getDirectoryEntry(address).Owner.clear(); } @@ -636,6 +660,11 @@ i_popIncomingRequestQueue; } + transition(I, GETF, MM_F) { + qf_queueMemoryFetchRequest; + i_popIncomingRequestQueue; + } + transition(I, DMA_READ, XI_M) { qf_queueMemoryFetchRequest; i_popIncomingRequestQueue; @@ -665,6 +694,12 @@ i_popIncomingRequestQueue; } + transition(S, GETF, MM_F) { + qf_queueMemoryFetchRequest; + g_sendInvalidations; + i_popIncomingRequestQueue; + } + transition(S, DMA_READ) { //qf_queueMemoryFetchRequest; p_fwdDataToDMA; @@ -707,6 +742,12 @@ i_popIncomingRequestQueue; } + transition(O, GETF, MM_F) { + f_forwardRequest; + g_sendInvalidations; + i_popIncomingRequestQueue; + } + transition(O, DMA_READ, OD) { f_forwardRequest; // this will cause the data to go to DMA directly //g_sendInvalidations; // this will cause acks to be sent to the DMA @@ -744,6 +785,16 @@ i_popIncomingRequestQueue; } + transition(M, GETF, MM_F) { + f_forwardRequest; + i_popIncomingRequestQueue; + } + + transition(M, PUTF, MIF) { + a_sendWriteBackAck; + i_popIncomingRequestQueue; + } + // no exclusive unblock will show up to the directory transition(M, DMA_READ, MD) { f_forwardRequest; // this will cause the data to go to DMA directly @@ -786,11 +837,16 @@ } - transition({MM, MO, MI, MIS, OS, OSS, XI_M, XI_U, OI_D, OD, MD}, {GETS, GETX, PUTO, PUTO_SHARERS, PUTX, DMA_READ, DMA_WRITE}) { + transition(MM_F, PUTF, MIF) { + a_sendWriteBackAck; + i_popIncomingRequestQueue; + } + + transition({MM, MM_F, MO, MI, MIF, MIS, OS, OSS, XI_M, XI_U, OI_D, OD, MD}, {GETS, GETX, GETF, PUTO, PUTO_SHARERS, PUTX, DMA_READ, DMA_WRITE}) { zz_recycleRequest; } - transition({MM, MO}, Exclusive_Unblock, M) { + transition({MM, MO, MM_F}, Exclusive_Unblock, M) { cc_clearSharers; e_ownerIsUnblocker; j_popIncomingUnblockQueue; @@ -801,7 +857,7 @@ j_popIncomingUnblockQueue; } - transition({IS, SS, OO}, {GETX, PUTO, PUTO_SHARERS, PUTX, DMA_READ, DMA_WRITE}) { + transition({IS, SS, OO}, {GETX, GETF, PUTO, PUTO_SHARERS, PUTX, DMA_READ, DMA_WRITE}) { zz_recycleRequest; } @@ -852,6 +908,21 @@ j_popIncomingUnblockQueue; } + transition(MIF, Clean_Writeback, I) { + c_clearOwner; + cc_clearSharers; + ll_checkDataInMemory; + j_popIncomingUnblockQueue; + } + + transition(MIF, Dirty_Writeback, I) { + c_clearOwner; + cc_clearSharers; + l_writeDataToMemory; + qw_queueMemoryWBRequest; + j_popIncomingUnblockQueue; + } + transition(MIS, Dirty_Writeback, S) { c_moveOwnerToSharer; l_writeDataToMemory; @@ -904,12 +975,12 @@ j_popIncomingUnblockQueue; } - transition({I, S, O, M, IS, SS, OO, MO, MM, MI, MIS, OS, OSS}, Memory_Data) { + transition({I, S, O, M, IS, SS, OO, MO, MM, MM_F, MI, MIS, OS, OSS}, Memory_Data) { d_sendDataMsg; q_popMemQueue; } - transition({I, S, O, M, IS, SS, OO, MO, MM, MI, MIS, OS, OSS, XI_U, XI_M}, Memory_Ack) { + transition({I, S, O, M, IS, SS, OO, MO, MM, MM_F, MI, MIS, OS, OSS, XI_U, XI_M}, Memory_Ack) { //a_sendAck; q_popMemQueue; } diff --git a/src/mem/protocol/MOESI_CMP_directory-msg.sm b/src/mem/protocol/MOESI_CMP_directory-msg.sm --- a/src/mem/protocol/MOESI_CMP_directory-msg.sm +++ b/src/mem/protocol/MOESI_CMP_directory-msg.sm @@ -36,13 +36,16 @@ enumeration(CoherenceRequestType, desc="...") { GETX, desc="Get eXclusive"; GETS, desc="Get Shared"; + GETF, desc="Issue exclusive for Flushing"; PUTX, desc="Put eXclusive"; + PUTF, desc="PUT on a Flush"; PUTO, desc="Put Owned"; PUTO_SHARERS, desc="Put Owned, but sharers exist so don't remove from sharers list"; PUTS, desc="Put Shared"; WB_ACK, desc="Writeback ack"; WB_ACK_DATA, desc="Writeback ack"; WB_NACK, desc="Writeback neg. ack"; + BLOCK_ACK, desc="Dir Block ack"; INV, desc="Invalidation"; DMA_READ, desc="DMA Read"; @@ -65,6 +68,7 @@ // TriggerType enumeration(TriggerType, desc="...") { ALL_ACKS, desc="See corresponding event"; + L2_FLUSH, desc="See corresponding event"; } // TriggerMsg