diff --git a/src/cpu/testers/rubytest/Check.hh b/src/cpu/testers/rubytest/Check.hh --- a/src/cpu/testers/rubytest/Check.hh +++ b/src/cpu/testers/rubytest/Check.hh @@ -61,6 +61,8 @@ void initiatePrefetch(); void initiateAction(); void initiateCheck(); + //added by Somayeh + void initiateFlush(); void pickValue(); void pickInitiatingNode(); diff --git a/src/cpu/testers/rubytest/Check.cc b/src/cpu/testers/rubytest/Check.cc --- a/src/cpu/testers/rubytest/Check.cc +++ b/src/cpu/testers/rubytest/Check.cc @@ -62,7 +62,11 @@ if (m_status == TesterStatus_Idle) { initiateAction(); } else if (m_status == TesterStatus_Ready) { - initiateCheck(); + if (true && ((random() & 0xf) ==0)){ + initiateFlush(); + } else { + initiateCheck(); + } } else { // Pending - do nothing DPRINTF(RubyTest, @@ -186,6 +190,57 @@ (TesterStatus_to_string(m_status)).c_str()); } +//added by Somayeh +void +Check::initiateFlush() +{ + + DPRINTF(RubyTest, "initiating Flush\n"); + assert(m_status == TesterStatus_Ready); + + + int index = random() % m_num_cpu_sequencers; + RubyTester::CpuPort* port = + safe_cast(m_tester_ptr->getCpuPort(index)); + + Request::Flags flags; + + Request *req = new Request(m_address.getAddress(), CHECK_SIZE, flags, curTick(), + m_pc.getAddress()); + + Packet::Command cmd; + + cmd = MemCmd::FlushReq; + + + PacketPtr pkt = new Packet(req, cmd, port->idx); + + uint8_t* dataArray = new uint8_t[CHECK_SIZE]; + pkt->dataDynamicArray(dataArray); + + + // push the subblock onto the sender state. The sequencer will + // update the subblock on the return + pkt->senderState = + new SenderState(m_address, req->getSize(), pkt->senderState); + + if (port->sendTiming(pkt)) { + DPRINTF(RubyTest, "initiating Flush - successful\n"); + m_status = TesterStatus_Check_Pending; + } else { + // If the packet did not issue, must delete + SenderState* senderState = safe_cast(pkt->senderState); + pkt->senderState = senderState->saved; + delete senderState; + delete pkt->req; + delete pkt; + + DPRINTF(RubyTest, "failed to initiate action - sequencer not ready\n"); + } + +} + + void Check::initiateCheck() { diff --git a/src/mem/packet.hh b/src/mem/packet.hh --- a/src/mem/packet.hh +++ b/src/mem/packet.hh @@ -105,6 +105,7 @@ BadAddressError, // memory address invalid // Fake simulator-only commands PrintReq, // Print state matching address + FlushReq, //added by Somayeh for cache flush NUM_MEM_CMDS }; @@ -129,6 +130,7 @@ HasData, //!< There is an associated payload IsError, //!< Error response IsPrint, //!< Print state matching address (for debugging) + IsFlush, //!< Somayeh: Flush the address from caches NUM_COMMAND_ATTRIBUTES }; @@ -175,6 +177,8 @@ bool isLLSC() const { return testCmdAttrib(IsLlsc); } bool isError() const { return testCmdAttrib(IsError); } bool isPrint() const { return testCmdAttrib(IsPrint); } + //added by Somayeh + bool isFlush() const { return testCmdAttrib(IsFlush); } const Command responseCommand() const @@ -411,6 +415,9 @@ bool isLLSC() const { return cmd.isLLSC(); } bool isError() const { return cmd.isError(); } bool isPrint() const { return cmd.isPrint(); } + //added by Somayeh + bool isFlush() const { return cmd.isFlush(); } + // Snoop flags void assertMemInhibit() { flags.set(MEM_INHIBIT); } diff --git a/src/mem/packet.cc b/src/mem/packet.cc --- a/src/mem/packet.cc +++ b/src/mem/packet.cc @@ -148,7 +148,9 @@ /* BadAddressError -- memory address invalid */ { SET2(IsResponse, IsError), InvalidCmd, "BadAddressError" }, /* PrintReq */ - { SET2(IsRequest, IsPrint), InvalidCmd, "PrintReq" } + { SET2(IsRequest, IsPrint), InvalidCmd, "PrintReq" }, + /* Somayeh: Flush Request */ + { SET5(IsRequest, IsFlush, NeedsExclusive, HasData, NeedsResponse), InvalidCmd, "FlushReq" } }; bool diff --git a/src/mem/physical.cc b/src/mem/physical.cc --- a/src/mem/physical.cc +++ b/src/mem/physical.cc @@ -322,6 +322,8 @@ if (pkt->needsResponse()) { pkt->makeAtomicResponse(); } + } else if (pkt->isFlush()) { + } else { panic("unimplemented"); } @@ -360,6 +362,8 @@ prs->printLabels(); // Right now we just print the single byte at the specified address. ccprintf(prs->os, "%s%#x\n", prs->curPrefix(), *hostAddr); + } else if (pkt->isFlush()) { + } else { panic("PhysicalMemory: unimplemented functional command %s", pkt->cmdString()); diff --git a/src/mem/protocol/MOESI_hammer-cache.sm b/src/mem/protocol/MOESI_hammer-cache.sm --- a/src/mem/protocol/MOESI_hammer-cache.sm +++ b/src/mem/protocol/MOESI_hammer-cache.sm @@ -79,6 +79,17 @@ OT, AccessPermission:Busy, "OT", desc="O block transferring to L1"; MT, AccessPermission:Busy, "MT", desc="M block transferring to L1"; MMT, AccessPermission:Busy, "MMT", desc="MM block transferring to L1"; + +//added by Somayeh for Flushing! + + MI_F, AccessPermission:Busy, "MI_F", desc="Issued PutX due to a Flush, waiting for ack"; + MI_F_W, AccessPermission:Busy, "MI_F_W", desc="Issued Block due to a Flush, waiting for ack"; + IM_F, AccessPermission:Busy, "IM_F", desc="Issued GetX due to a Flush"; + ISM_F, AccessPermission:Read_Only, "ISM_F", desc="Issued GetX, received data, waiting for all acks"; + SM_F, AccessPermission:Read_Only, "SM_F", desc="Issued GetX, we still have an old copy of the line"; + OM_F, AccessPermission:Read_Only, "OM_F", desc="Issued GetX, received data"; + MM_WF, AccessPermission:Busy, "MM_WF", desc="Issued GetX, received exclusive data"; + MM_F, AccessPermission:Busy, "MM_F", desc="Modified (dirty and locally modified), but it will be continued with Flush!"; } // EVENTS @@ -113,6 +124,13 @@ // Triggers All_acks, desc="Received all required data and message acks"; All_acks_no_sharers, desc="Received all acks and no other processor has a shared copy"; + + //Added by Somayeh + Flush_line, desc="flush the cache line from all caches"; + Complete_Flush, desc="continue flushing the cache line"; + Ready_to_Flush, desc="the cache line is ready to be flushed"; + Block_Ack, desc="the directory is blocked to be flushed"; + Start_Flush, desc="start the flush over"; } // TYPES @@ -219,6 +237,8 @@ return Event:Ifetch; } else if ((type == CacheRequestType:ST) || (type == CacheRequestType:ATOMIC)) { return Event:Store; + } else if ((type == CacheRequestType:FLUSH)) { + return Event:Flush_line; } else { error("Invalid CacheRequestType"); } @@ -274,6 +294,10 @@ trigger(Event:All_acks, in_msg.Address, cache_entry, tbe); } else if (in_msg.Type == TriggerType:ALL_ACKS_NO_SHARERS) { trigger(Event:All_acks_no_sharers, in_msg.Address, cache_entry, tbe); + } else if (in_msg.Type == TriggerType:COMPLETE_FLUSHING) { + trigger(Event:Complete_Flush, in_msg.Address, cache_entry, tbe); + } else if (in_msg.Type == TriggerType:START_FLUSH) { + trigger(Event:Start_Flush, in_msg.Address, cache_entry, tbe); } else { error("Unexpected message"); } @@ -316,7 +340,7 @@ Entry cache_entry := getCacheEntry(in_msg.Address); TBE tbe := TBEs[in_msg.Address]; - if (in_msg.Type == CoherenceRequestType:GETX) { + if ((in_msg.Type == CoherenceRequestType:GETX) || (in_msg.Type == CoherenceRequestType:GETF)) { trigger(Event:Other_GETX, in_msg.Address, cache_entry, tbe); } else if (in_msg.Type == CoherenceRequestType:MERGED_GETS) { trigger(Event:Merged_GETS, in_msg.Address, cache_entry, tbe); @@ -340,6 +364,8 @@ trigger(Event:Writeback_Ack, in_msg.Address, cache_entry, tbe); } else if (in_msg.Type == CoherenceRequestType:WB_NACK) { trigger(Event:Writeback_Nack, in_msg.Address, cache_entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:BLOCK_ACK) { + trigger(Event:Block_Ack, in_msg.Address, cache_entry, tbe); } else { error("Unexpected message"); } @@ -502,6 +528,21 @@ } } + + action(bf_issueGETF, "bf", desc="Issue GETF") { + enqueue(requestNetwork_out, RequestMsg, latency=issue_latency) { + assert(is_valid(tbe)); + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:GETF; + out_msg.Requestor := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Request_Control; + out_msg.InitialRequestTime := get_time(); + tbe.NumPendingMsgs := machineCount(MachineType:L1Cache); // One from each other cache (n-1) plus the memory (+1) + } + } + + action(c_sendExclusiveData, "c", desc="Send exclusive data from cache to requestor") { peek(forwardToCache_in, RequestMsg) { enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) { @@ -535,6 +576,26 @@ } } + action(df_issuePUTF, "df", desc="Issue PUTF") { + enqueue(requestNetwork_out, RequestMsg, latency=issue_latency) { + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:PUTF; + out_msg.Requestor := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + + action(db_issueBlock, "db", desc="Issue Block") { + enqueue(requestNetwork_out, RequestMsg, latency=issue_latency) { + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:BLOCK; + out_msg.Requestor := machineID; + out_msg.Destination.add(map_Address_to_Directory(address)); + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + action(e_sendData, "e", desc="Send data from cache to requestor") { peek(forwardToCache_in, RequestMsg) { enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) { @@ -581,7 +642,33 @@ } } } - + + action(et_sendDataSharedFromTBE, "\et", desc="Send data from TBE to requestor, keep a shared copy") { + peek(forwardToCache_in, RequestMsg) { + enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) { + assert(is_valid(tbe)); + out_msg.Address := address; + out_msg.Type := CoherenceResponseType:DATA_SHARED; + out_msg.Sender := machineID; + out_msg.Destination.add(in_msg.Requestor); + out_msg.DataBlk := tbe.DataBlk; + out_msg.Dirty := tbe.Dirty; + DPRINTF(RubySlicc, "%s\n", out_msg.DataBlk); + if (in_msg.DirectedProbe) { + out_msg.Acks := machineCount(MachineType:L1Cache); + } else { + out_msg.Acks := 2; + } + out_msg.SilentAcks := in_msg.SilentAcks; + out_msg.MessageSize := MessageSizeType:Response_Data; + out_msg.InitialRequestTime := in_msg.InitialRequestTime; + out_msg.ForwardRequestTime := in_msg.ForwardRequestTime; + } + } + } + + + action(em_sendDataSharedMultiple, "em", desc="Send data from cache to all requestors") { peek(forwardToCache_in, RequestMsg) { enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) { @@ -601,7 +688,28 @@ } } } - + + action(emt_sendDataSharedMultipleFromTBE, "emt", desc="Send data from tbe to all requestors") { + peek(forwardToCache_in, RequestMsg) { + enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) { + assert(is_valid(tbe)); + out_msg.Address := address; + out_msg.Type := CoherenceResponseType:DATA_SHARED; + out_msg.Sender := machineID; + out_msg.Destination := in_msg.MergedRequestors; + out_msg.DataBlk := tbe.DataBlk; + out_msg.Dirty := tbe.Dirty; + DPRINTF(RubySlicc, "%s\n", out_msg.DataBlk); + out_msg.Acks := machineCount(MachineType:L1Cache); + out_msg.SilentAcks := in_msg.SilentAcks; + out_msg.MessageSize := MessageSizeType:Response_Data; + out_msg.InitialRequestTime := in_msg.InitialRequestTime; + out_msg.ForwardRequestTime := in_msg.ForwardRequestTime; + } + } + } + + action(f_sendAck, "f", desc="Send ack from cache to requestor") { peek(forwardToCache_in, RequestMsg) { enqueue(responseNetwork_out, ResponseMsg, latency=cache_response_latency) { @@ -704,6 +812,13 @@ } } + action(fh_flush_hit, "fh", desc="Notify sequencer that flush completed.") { + assert(is_valid(tbe)); + DPRINTF(RubySlicc, "%s\n", tbe.DataBlk); + sequencer.flushCallback(address, GenericMachineType:L1Cache, + tbe.DataBlk); + } + action(sx_external_store_hit, "sx", desc="store required external msgs.") { assert(is_valid(cache_entry)); assert(is_valid(tbe)); @@ -745,6 +860,16 @@ tbe.Sharers := false; } + + action(it_allocateTBE, "it", desc="Allocate TBE") { + check_allocate(TBEs); + TBEs.allocate(address); + set_tbe(TBEs[address]); + tbe.Dirty := false; + tbe.Sharers := false; + } + + action(j_popTriggerQueue, "j", desc="Pop trigger queue.") { triggerQueue_in.dequeue(); } @@ -837,6 +962,21 @@ } } + action(cf_flush, "cf", desc="complete flushing the line") { + enqueue(triggerQueue_out, TriggerMsg) { + out_msg.Address := address; + out_msg.Type := TriggerType:COMPLETE_FLUSHING; + } + } + + action(tf_flush, "tf", desc="trigger flush") { + enqueue(triggerQueue_out, TriggerMsg) { + out_msg.Address := address; + out_msg.Type := TriggerType:START_FLUSH; + } + } + + action(p_decrementNumberOfMessagesByOne, "p", desc="Decrement the number of messages for which we're waiting by one") { assert(is_valid(tbe)); tbe.NumPendingMsgs := tbe.NumPendingMsgs - 1; @@ -953,6 +1093,17 @@ } } + action(uf_writeDataToCacheTBE, "uf", desc="Write data to TBE") { + peek(responseToCache_in, ResponseMsg) { + assert(is_valid(tbe)); + //cache_entry.DataBlk := in_msg.DataBlk; + //cache_entry.Dirty := in_msg.Dirty; + tbe.DataBlk := in_msg.DataBlk; + tbe.Dirty := in_msg.Dirty; + } + } + + action(v_writeDataToCacheVerify, "v", desc="Write data to cache, assert it was same as before") { peek(responseToCache_in, ResponseMsg) { assert(is_valid(cache_entry)); @@ -963,6 +1114,17 @@ cache_entry.Dirty := in_msg.Dirty || cache_entry.Dirty; } } + + action(vt_writeDataToTBEVerify, "vt", desc="Write data to TBE, assert it was same as before") { + peek(responseToCache_in, ResponseMsg) { + assert(is_valid(tbe)); + DPRINTF(RubySlicc, "Cached Data Block: %s, Msg Data Block: %s\n", + tbe.DataBlk, in_msg.DataBlk); + assert(tbe.DataBlk == in_msg.DataBlk); + tbe.DataBlk := in_msg.DataBlk; + tbe.Dirty := in_msg.Dirty || tbe.Dirty; + } + } action(gg_deallocateL1CacheBlock, "\g", desc="Deallocate cache block. Sets the cache to invalid, allowing a replacement in parallel with a fetch.") { if (L1DcacheMemory.isTagPresent(address)) { @@ -1024,23 +1186,36 @@ //***************************************************** // Transitions for Load/Store/L2_Replacement from transient states - transition({IM, SM, ISM, OM, IS, SS, OI, MI, II, IT, ST, OT, MT, MMT}, {Store, L2_Replacement}) { + transition({IM, IM_F, MM_WF, SM, SM_F, ISM, ISM_F, OM, OM_F, IS, SS, OI, MI, II, IT, ST, OT, MT, MMT}, {Store, L2_Replacement}) { zz_stallAndWaitMandatoryQueue; } - transition({M_W, MM_W}, {L2_Replacement}) { + transition({IM, IM_F, MM_WF, SM, SM_F, ISM, ISM_F, OM, OM_F, IS, SS, OI, MI, II}, {Flush_line}) { zz_stallAndWaitMandatoryQueue; } - transition({IM, IS, OI, MI, II, IT, ST, OT, MT, MMT}, {Load, Ifetch}) { + transition({M_W, MM_W}, {L2_Replacement, Flush_line}) { zz_stallAndWaitMandatoryQueue; } - transition({IM, SM, ISM, OM, IS, SS, MM_W, M_W, OI, MI, II, IT, ST, OT, MT, MMT}, L1_to_L2) { + transition({IM, IS, OI, MI, II, IT, ST, OT, MT, MMT, MI_F, MI_F_W, OM_F, MM_F, IM_F, ISM_F, SM_F, MM_WF}, {Load, Ifetch}) { zz_stallAndWaitMandatoryQueue; } - transition({IT, ST, OT, MT, MMT}, {Other_GETX, NC_DMA_GETS, Other_GETS, Merged_GETS, Other_GETS_No_Mig, Invalidate}) { + transition({IM, IM_F, SM, SM_F, ISM, ISM_F, OM, OM_F, IS, SS, MM_W, MM_WF, M_W, OI, MI, II, IT, ST, OT, MT, MMT, MM_F, MI_F, MI_F_W}, L1_to_L2) { + zz_stallAndWaitMandatoryQueue; + } + + transition({MM_F, MI_F, MI_F_W}, {Store}) { + zz_stallAndWaitMandatoryQueue; + } + + transition({MI_F_W}, {Flush_line}) { + zz_stallAndWaitMandatoryQueue; + } + + + transition({IT, ST, OT, MT, MMT}, {Other_GETX, NC_DMA_GETS, Other_GETS, Merged_GETS, Other_GETS_No_Mig, Invalidate, Flush_line}) { // stall } @@ -1213,6 +1388,19 @@ uu_profileMiss; k_popMandatoryQueue; } + + transition(I, Flush_line, IM_F) { + it_allocateTBE; + bf_issueGETF; + uu_profileMiss; + k_popMandatoryQueue; + } + + transition(I, Start_Flush, IM_F) { + it_allocateTBE; + bf_issueGETF; + j_popTriggerQueue; + } transition(I, L2_Replacement) { rr_deallocateL2CacheBlock; @@ -1237,6 +1425,14 @@ k_popMandatoryQueue; } + transition(S, Flush_line, SM_F) { + i_allocateTBE; + bf_issueGETF; + uu_profileMiss; + gg_deallocateL1CacheBlock; + k_popMandatoryQueue; + } + transition(S, L2_Replacement, I) { rr_deallocateL2CacheBlock; ka_wakeUpAllDependents; @@ -1266,6 +1462,23 @@ k_popMandatoryQueue; } + transition(O, Flush_line, OM_F) { + i_allocateTBE; + bf_issueGETF; + p_decrementNumberOfMessagesByOne; + uu_profileMiss; + gg_deallocateL1CacheBlock; + k_popMandatoryQueue; + } + + transition(O, Start_Flush, OM_F) { + i_allocateTBE; + bf_issueGETF; + gg_deallocateL1CacheBlock; + j_popTriggerQueue; + } + + transition(O, L2_Replacement, OI) { i_allocateTBE; d_issuePUT; @@ -1289,7 +1502,7 @@ } // Transitions from Modified - transition(MM, {Load, Ifetch}) { + transition({MM}, {Load, Ifetch}) { h_load_hit; k_popMandatoryQueue; } @@ -1299,6 +1512,31 @@ k_popMandatoryQueue; } + + transition({MM, M}, Flush_line, MI_F_W) { + i_allocateTBE; + db_issueBlock; + k_popMandatoryQueue; + } + + transition(MI_F_W, Block_Ack, MI_F) { + gg_deallocateL1CacheBlock; + df_issuePUTF; + l_popForwardQueue; + } + + + transition({MM_F}, Complete_Flush, MI_F) { + df_issuePUTF; + j_popTriggerQueue; + } + + + transition({MI_F, MM_F}, {Other_GETX, NC_DMA_GETS, Other_GETS, Merged_GETS, Other_GETS_No_Mig, Invalidate}) { + // stall + } + + transition(MM, L2_Replacement, MI) { i_allocateTBE; d_issuePUT; @@ -1331,6 +1569,34 @@ l_popForwardQueue; } + transition({MI_F_W}, {Other_GETX, Invalidate, Other_GETS}, I) { + c_sendExclusiveData; + l_popForwardQueue; + s_deallocateTBE; + gg_deallocateL1CacheBlock; + tf_flush; + } + + transition({MI_F_W}, NC_DMA_GETS) { //not sure! + c_sendExclusiveData; + l_popForwardQueue; + } + + transition(MI_F_W, Other_GETS_No_Mig, O) { + ee_sendDataShared; + l_popForwardQueue; + s_deallocateTBE; + tf_flush; + } + + transition({MI_F_W}, Merged_GETS, O) { + em_sendDataSharedMultiple; + l_popForwardQueue; + s_deallocateTBE; + tf_flush; + } + + // Transitions from Dirty Exclusive transition(M, {Load, Ifetch}) { h_load_hit; @@ -1371,12 +1637,12 @@ // Transitions from IM - transition(IM, {Other_GETX, NC_DMA_GETS, Other_GETS, Other_GETS_No_Mig, Invalidate}) { + transition({IM, IM_F}, {Other_GETX, NC_DMA_GETS, Other_GETS, Other_GETS_No_Mig, Invalidate}) { f_sendAck; l_popForwardQueue; } - transition(IM, Ack) { + transition({IM, IM_F}, Ack) { m_decrementNumberOfMessages; o_checkForCompletion; n_popResponseQueue; @@ -1389,6 +1655,14 @@ n_popResponseQueue; } + transition(IM_F, Data, ISM_F) { + uf_writeDataToCacheTBE; + m_decrementNumberOfMessages; + o_checkForCompletion; + n_popResponseQueue; + } + + transition(IM, Exclusive_Data, MM_W) { u_writeDataToCache; m_decrementNumberOfMessages; @@ -1398,8 +1672,16 @@ kd_wakeUpDependents; } + transition(IM_F, Exclusive_Data, MM_WF) { + uf_writeDataToCacheTBE; + m_decrementNumberOfMessages; + o_checkForCompletion; + n_popResponseQueue; + } + + // Transitions from SM - transition(SM, {NC_DMA_GETS, Other_GETS, Other_GETS_No_Mig}) { + transition({SM, SM_F}, {NC_DMA_GETS, Other_GETS, Other_GETS_No_Mig}) { ff_sendAckShared; l_popForwardQueue; } @@ -1409,7 +1691,12 @@ l_popForwardQueue; } - transition(SM, Ack) { + transition(SM_F, {Other_GETX, Invalidate}, IM_F) { + f_sendAck; + l_popForwardQueue; + } + + transition({SM, SM_F}, Ack) { m_decrementNumberOfMessages; o_checkForCompletion; n_popResponseQueue; @@ -1422,8 +1709,15 @@ n_popResponseQueue; } + transition(SM_F, {Data, Exclusive_Data}, ISM_F) { + vt_writeDataToTBEVerify; + m_decrementNumberOfMessages; + o_checkForCompletion; + n_popResponseQueue; + } + // Transitions from ISM - transition(ISM, Ack) { + transition({ISM, ISM_F}, Ack) { m_decrementNumberOfMessages; o_checkForCompletion; n_popResponseQueue; @@ -1437,6 +1731,14 @@ kd_wakeUpDependents; } + + + transition(ISM_F, All_acks_no_sharers, MM_F) { + cf_flush; + j_popTriggerQueue; + kd_wakeUpDependents; + } + // Transitions from OM transition(OM, {Other_GETX, Invalidate}, IM) { @@ -1445,6 +1747,13 @@ l_popForwardQueue; } + transition(OM_F, {Other_GETX, Invalidate}, IM_F) { + q_sendDataFromTBEToCache; + pp_incrementNumberOfMessagesByOne; + l_popForwardQueue; + } + + transition(OM, {NC_DMA_GETS, Other_GETS, Other_GETS_No_Mig}) { ee_sendDataShared; l_popForwardQueue; @@ -1455,7 +1764,18 @@ l_popForwardQueue; } - transition(OM, Ack) { + transition(OM_F, {NC_DMA_GETS, Other_GETS, Other_GETS_No_Mig}) { + et_sendDataSharedFromTBE; + l_popForwardQueue; + } + + transition(OM_F, Merged_GETS) { + emt_sendDataSharedMultipleFromTBE; + l_popForwardQueue; + } + + + transition({OM, OM_F}, Ack) { m_decrementNumberOfMessages; o_checkForCompletion; n_popResponseQueue; @@ -1469,6 +1789,11 @@ kd_wakeUpDependents; } + transition(OM_F, {All_acks, All_acks_no_sharers}, MM_F) { + cf_flush; + j_popTriggerQueue; + kd_wakeUpDependents; + } // Transitions from IS transition(IS, {Other_GETX, NC_DMA_GETS, Other_GETS, Other_GETS_No_Mig, Invalidate}) { @@ -1556,7 +1881,7 @@ k_popMandatoryQueue; } - transition(MM_W, Ack) { + transition({MM_W,MM_WF}, Ack) { m_decrementNumberOfMessages; o_checkForCompletion; n_popResponseQueue; @@ -1569,6 +1894,13 @@ kd_wakeUpDependents; } + transition(MM_WF, All_acks_no_sharers, MM_F) { + cf_flush; + j_popTriggerQueue; + kd_wakeUpDependents; + } + + // Transitions from M_W transition(M_W, Store, MM_W) { @@ -1613,6 +1945,14 @@ kd_wakeUpDependents; } + transition(MI_F, Writeback_Ack, I) { + fh_flush_hit; + t_sendExclusiveDataFromTBEToMemory; + s_deallocateTBE; + l_popForwardQueue; + kd_wakeUpDependents; + } + transition(OI, Writeback_Ack, I) { qq_sendDataFromTBEToMemory; s_deallocateTBE; diff --git a/src/mem/protocol/MOESI_hammer-dir.sm b/src/mem/protocol/MOESI_hammer-dir.sm --- a/src/mem/protocol/MOESI_hammer-dir.sm +++ b/src/mem/protocol/MOESI_hammer-dir.sm @@ -88,6 +88,9 @@ WB, AccessPermission:Invalid, desc="Blocked on a writeback"; WB_O_W, AccessPermission:Invalid, desc="Blocked on memory write, will go to O"; WB_E_W, AccessPermission:Invalid, desc="Blocked on memory write, will go to E"; + //added by Somayeh for Cache Flushing + NO_F, AccessPermission:Invalid, desc="Blocked on a flush"; + NO_F_W, AccessPermission:Invalid, desc="Not Owner, Blocked, waiting for Dram"; } // Events @@ -126,6 +129,9 @@ All_acks_and_owner_data, desc="Received shared data and message acks"; All_acks_and_data_no_sharers, desc="Received all acks and no other processor has a shared copy"; All_Unblocks, desc="Received all unblocks for a merged gets request"; + GETF, desc="A GETF arrives"; + PUTF, desc="A PUTF arrives"; + BLOCK, desc= "A BLOCK arrives"; } // TYPES @@ -231,6 +237,8 @@ return Event:GETS; } else if (type == CoherenceRequestType:GETX) { return Event:GETX; + } else if (type == CoherenceRequestType:GETF) { + return Event:GETF; } else { error("Invalid CoherenceRequestType"); } @@ -353,6 +361,10 @@ TBE tbe := TBEs[in_msg.Address]; if (in_msg.Type == CoherenceRequestType:PUT) { trigger(Event:PUT, in_msg.Address, pf_entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:PUTF) { + trigger(Event:PUTF, in_msg.Address, pf_entry, tbe); + } else if (in_msg.Type == CoherenceRequestType:BLOCK) { + trigger(Event:BLOCK, in_msg.Address, pf_entry, tbe); } else { if (probe_filter_enabled || full_bit_dir_enabled) { if (is_valid(pf_entry)) { @@ -451,6 +463,19 @@ } } + + action(ab_sendBlockAck, "ab", desc="Send block ack to requestor") { + peek(requestQueue_in, RequestMsg) { + enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) { + out_msg.Address := address; + out_msg.Type := CoherenceRequestType:BLOCK_ACK; + out_msg.Requestor := in_msg.Requestor; + out_msg.Destination.add(in_msg.Requestor); + out_msg.MessageSize := MessageSizeType:Writeback_Control; + } + } + } + action(b_sendWriteBackNack, "b", desc="Send writeback nack to requestor") { peek(requestQueue_in, RequestMsg) { enqueue(forwardNetwork_out, RequestMsg, latency=memory_controller_latency) { @@ -1182,6 +1207,17 @@ i_popIncomingRequestQueue; } + transition(E, GETF, NO_F_W) { + pfa_probeFilterAllocate; + v_allocateTBE; + rx_recordExclusiveInTBE; + saa_setAcksToAllIfPF; + qf_queueMemoryFetchRequest; + fn_forwardRequestIfNecessary; + i_popIncomingRequestQueue; + } + + transition(E, GETS, NO_B_W) { pfa_probeFilterAllocate; v_allocateTBE; @@ -1220,6 +1256,18 @@ i_popIncomingRequestQueue; } + transition(O, GETF, NO_F_W) { + r_setMRU; + v_allocateTBE; + r_recordDataInTBE; + sa_setAcksToOne; + qf_queueMemoryFetchRequest; + fb_forwardRequestBcast; + cs_clearSharers; + i_popIncomingRequestQueue; + } + + // This transition is dumb, if a shared copy exists on-chip, then that should // provide data, not slow off-chip dram. The problem is that the current // caches don't provide data in S state @@ -1283,6 +1331,14 @@ i_popIncomingRequestQueue; } + transition(NX, GETF, NO_F) { + r_setMRU; + fb_forwardRequestBcast; + cs_clearSharers; + i_popIncomingRequestQueue; + } + + // Transitions out of NO state transition(NO, GETX, NO_B) { r_setMRU; @@ -1292,6 +1348,15 @@ i_popIncomingRequestQueue; } + transition(NO, GETF, NO_F) { + r_setMRU; + ano_assertNotOwner; + fc_forwardRequestConditionalOwner; + cs_clearSharers; + i_popIncomingRequestQueue; + } + + transition(S, GETX, NO_B) { r_setMRU; fb_forwardRequestBcast; @@ -1299,6 +1364,14 @@ i_popIncomingRequestQueue; } + transition(S, GETF, NO_F) { + r_setMRU; + fb_forwardRequestBcast; + cs_clearSharers; + i_popIncomingRequestQueue; + } + + transition(S, GETS, NO_B) { r_setMRU; ano_assertNotOwner; @@ -1345,12 +1418,20 @@ // Blocked transient states transition({NO_B_X, O_B, NO_DR_B_W, NO_DW_B_W, NO_B_W, NO_DR_B_D, NO_DR_B, O_DR_B, O_B_W, O_DR_B_W, NO_DW_W, NO_B_S_W, - NO_W, O_W, WB, WB_E_W, WB_O_W, O_R, S_R, NO_R}, - {GETS, GETX, PUT, Pf_Replacement}) { + NO_W, O_W, WB, WB_E_W, WB_O_W, O_R, S_R, NO_R, NO_F_W}, + {GETS, GETX, GETF, PUT, Pf_Replacement}) { + z_stallAndWaitRequest; + } + + transition(WB,BLOCK) { + z_stallAndWaitRequest; + } + + transition(NO_F, {GETS, GETX, GETF, PUT, Pf_Replacement}){ z_stallAndWaitRequest; } - transition(NO_B, GETX, NO_B_X) { + transition(NO_B, {GETX, GETF}, NO_B_X) { z_stallAndWaitRequest; } @@ -1358,13 +1439,13 @@ z_stallAndWaitRequest; } - transition(NO_B_S, {GETX, PUT, Pf_Replacement}) { + transition(NO_B_S, {GETX, GETF, PUT, Pf_Replacement}) { z_stallAndWaitRequest; } transition({NO_B_X, NO_B, NO_B_S, O_B, NO_DR_B_W, NO_DW_B_W, NO_B_W, NO_DR_B_D, NO_DR_B, O_DR_B, O_B_W, O_DR_B_W, NO_DW_W, NO_B_S_W, - NO_W, O_W, WB, WB_E_W, WB_O_W, O_R, S_R, NO_R}, + NO_W, O_W, WB, WB_E_W, WB_O_W, O_R, S_R, NO_R, NO_F_W}, {DMA_READ, DMA_WRITE}) { zd_stallAndWaitDMARequest; } @@ -1441,6 +1522,13 @@ l_popMemQueue; } + transition(NO_F_W, Memory_Data, NO_F) { + d_sendData; + w_deallocateTBE; + l_popMemQueue; + } + + transition(NO_DR_B_W, Memory_Data, NO_DR_B) { r_recordMemoryData; o_checkForCompletion; @@ -1735,4 +1823,25 @@ k_wakeUpDependents; j_popIncomingUnblockQueue; } + + transition(NO_F, PUTF, WB) { + a_sendWriteBackAck; + i_popIncomingRequestQueue; + } + + + transition({NX,NO}, BLOCK, NO_F){ + ab_sendBlockAck; + i_popIncomingRequestQueue; + } + + //possible race between BLOCK and UnblockM + transition(NO_F, UnblockM) { + j_popIncomingUnblockQueue; + } + + transition({NO_B_S, NO_B_X, NO_B_S_W, NO_B}, BLOCK){ + i_popIncomingRequestQueue; + } + } diff --git a/src/mem/protocol/MOESI_hammer-msg.sm b/src/mem/protocol/MOESI_hammer-msg.sm --- a/src/mem/protocol/MOESI_hammer-msg.sm +++ b/src/mem/protocol/MOESI_hammer-msg.sm @@ -37,6 +37,10 @@ PUT, desc="Put Ownership"; WB_ACK, desc="Writeback ack"; WB_NACK, desc="Writeback neg. ack"; + PUTF, desc="PUT on a Flush"; + GETF, desc="Issue exclusive for Flushing"; + BLOCK, desc="Issue a block for flushing"; + BLOCK_ACK, desc="Dir Block ack"; INV, desc="Invalidate"; } @@ -64,6 +68,8 @@ ALL_ACKS_OWNER_EXISTS,desc="See corresponding event"; ALL_ACKS_NO_SHARERS, desc="See corresponding event"; ALL_UNBLOCKS, desc="all unblockS received"; + COMPLETE_FLUSHING, desc="complete flush"; + START_FLUSH, desc="start over the flush"; } // TriggerMsg diff --git a/src/mem/protocol/RubySlicc_Exports.sm b/src/mem/protocol/RubySlicc_Exports.sm --- a/src/mem/protocol/RubySlicc_Exports.sm +++ b/src/mem/protocol/RubySlicc_Exports.sm @@ -113,6 +113,8 @@ REPLACEMENT, desc="Replacement"; COMMIT, desc="Commit version"; NULL, desc="Invalid request type"; + //added by Somayeh + FLUSH, desc="Flush request type"; } enumeration(SequencerRequestType, desc="...", default="SequencerRequestType_NULL") { diff --git a/src/mem/protocol/RubySlicc_Types.sm b/src/mem/protocol/RubySlicc_Types.sm --- a/src/mem/protocol/RubySlicc_Types.sm +++ b/src/mem/protocol/RubySlicc_Types.sm @@ -105,6 +105,9 @@ void writeCallback(Address, DataBlock); void writeCallback(Address, GenericMachineType, DataBlock); void writeCallback(Address, GenericMachineType, DataBlock, Time, Time, Time); + void flushCallback(Address, DataBlock); + void flushCallback(Address, GenericMachineType, DataBlock); + void flushCallback(Address, GenericMachineType, DataBlock, Time, Time, Time); void checkCoherence(Address); void profileNack(Address, int, int, uint64); } diff --git a/src/mem/ruby/slicc_interface/RubyRequest.hh b/src/mem/ruby/slicc_interface/RubyRequest.hh --- a/src/mem/ruby/slicc_interface/RubyRequest.hh +++ b/src/mem/ruby/slicc_interface/RubyRequest.hh @@ -50,6 +50,7 @@ RubyRequestType_RMW_Write, RubyRequestType_Locked_RMW_Read, RubyRequestType_Locked_RMW_Write, + RubyRequestType_FLUSH, RubyRequestType_NUM }; diff --git a/src/mem/ruby/slicc_interface/RubyRequest.cc b/src/mem/ruby/slicc_interface/RubyRequest.cc --- a/src/mem/ruby/slicc_interface/RubyRequest.cc +++ b/src/mem/ruby/slicc_interface/RubyRequest.cc @@ -26,6 +26,8 @@ return "Locked_RMW_Read"; case RubyRequestType_Locked_RMW_Write: return "Locked_RMW_Write"; + case RubyRequestType_FLUSH: + return "FLUSH"; case RubyRequestType_NULL: default: assert(0); @@ -42,6 +44,8 @@ return RubyRequestType_LD; else if (str == "ST") return RubyRequestType_ST; + else if (str == "FLUSH") + return RubyRequestType_FLUSH; else if (str == "Locked_Read") return RubyRequestType_Load_Linked; else if (str == "Locked_Write") diff --git a/src/mem/ruby/system/DMASequencer.cc b/src/mem/ruby/system/DMASequencer.cc --- a/src/mem/ruby/system/DMASequencer.cc +++ b/src/mem/ruby/system/DMASequencer.cc @@ -72,6 +72,7 @@ case RubyRequestType_RMW_Write: case RubyRequestType_Locked_RMW_Read: case RubyRequestType_Locked_RMW_Write: + case RubyRequestType_FLUSH: case RubyRequestType_NUM: panic("DMASequencer::makeRequest does not support RubyRequestType"); return RequestStatus_NULL; diff --git a/src/mem/ruby/system/RubyPort.cc b/src/mem/ruby/system/RubyPort.cc --- a/src/mem/ruby/system/RubyPort.cc +++ b/src/mem/ruby/system/RubyPort.cc @@ -244,6 +244,8 @@ // Note: M5 packets do not differentiate ST from RMW_Write // type = RubyRequestType_ST; + } else if (pkt->isFlush()) { + type = RubyRequestType_FLUSH; } else { panic("Unsupported ruby packet type\n"); } diff --git a/src/mem/ruby/system/Sequencer.hh b/src/mem/ruby/system/Sequencer.hh --- a/src/mem/ruby/system/Sequencer.hh +++ b/src/mem/ruby/system/Sequencer.hh @@ -100,6 +100,19 @@ Time forwardRequestTime, Time firstResponseTime); + void flushCallback(const Address& address, DataBlock& data); + + void flushCallback(const Address& address, + GenericMachineType mach, + DataBlock& data); + + void flushCallback(const Address& address, + GenericMachineType mach, + DataBlock& data, + Time initialRequestTime, + Time forwardRequestTime, + Time firstResponseTime); + RequestStatus makeRequest(const RubyRequest & request); RequestStatus getRequestStatus(const RubyRequest& request); bool empty() const; @@ -143,6 +156,7 @@ typedef m5::hash_map RequestTable; RequestTable m_writeRequestTable; RequestTable m_readRequestTable; + RequestTable m_flushRequestTable; // Global outstanding request count, across all request tables int m_outstanding_count; bool m_deadlock_check_scheduled; diff --git a/src/mem/ruby/system/Sequencer.cc b/src/mem/ruby/system/Sequencer.cc --- a/src/mem/ruby/system/Sequencer.cc +++ b/src/mem/ruby/system/Sequencer.cc @@ -122,6 +122,26 @@ current_time - request->issue_time); } + + //added by Somayeh + RequestTable::iterator flush = m_flushRequestTable.begin(); + RequestTable::iterator flush_end = m_flushRequestTable.end(); + for (; flush != flush_end; ++flush) { + SequencerRequest* request = flush->second; + if (current_time - request->issue_time < m_deadlock_threshold) + continue; + + panic("Possible Deadlock detected. Aborting!\n" + "version: %d request.paddr: 0x%x m_flushRequestTable: %d " + "current time: %u issue_time: %d difference: %d\n", m_version, + request->ruby_request.paddr, m_flushRequestTable.size(), + current_time, request->issue_time, + current_time - request->issue_time); + + } + + total_outstanding += m_flushRequestTable.size(); + total_outstanding += m_writeRequestTable.size(); total_outstanding += m_readRequestTable.size(); @@ -191,6 +211,26 @@ } } + //added by Somayeh + + out << "proc " << m_version + << " Flush Requests = " << m_flushRequestTable.size << endl; + + // print the request table + RequestTable::iterator flush = m_flushRequestTable.begin(); + RequestTable::iterator flush_end = m_flushRequestTable.end(); + for (; flush != flush_end; ++flush) { + SequencerRequest* request = flush->second; + out << "\tRequest[ " << i << " ] = " << request.getType() + << " Address " << wkeys[i] + << " Posted " << request.getTime() + << " PF " << request.getPrefetch() << endl; + if (request.getPrefetch() == PrefetchBit_No) { + total_demand++; + } + } + + out << endl; out << "Total Number Outstanding: " << m_outstanding_count << endl @@ -216,7 +256,7 @@ Sequencer::insertRequest(SequencerRequest* request) { int total_outstanding = - m_writeRequestTable.size() + m_readRequestTable.size(); + m_writeRequestTable.size() + m_readRequestTable.size() + m_flushRequestTable.size(); assert(m_outstanding_count == total_outstanding); @@ -247,6 +287,19 @@ } i->second = request; m_outstanding_count++; + //Added by Somayeh + }else if ((request->ruby_request.type == RubyRequestType_FLUSH)){ + //insert it in flush request table + pair r = + m_flushRequestTable.insert(RequestTable::value_type(line_addr, 0)); + bool success = r.second; + RequestTable::iterator i = r.first; + if (!success) { + i->second = request; + assert(0); + } + i->second = request; + m_outstanding_count++; } else { pair r = m_readRequestTable.insert(RequestTable::value_type(line_addr, 0)); @@ -265,7 +318,8 @@ g_system_ptr->getProfiler()->sequencerRequests(m_outstanding_count); - total_outstanding = m_writeRequestTable.size() + m_readRequestTable.size(); + + total_outstanding = m_writeRequestTable.size() + m_readRequestTable.size() + m_flushRequestTable.size(); assert(m_outstanding_count == total_outstanding); return false; @@ -276,14 +330,14 @@ { m_outstanding_count--; assert(m_outstanding_count == - m_writeRequestTable.size() + m_readRequestTable.size()); + m_writeRequestTable.size() + m_readRequestTable.size() + m_flushRequestTable.size()); } void Sequencer::removeRequest(SequencerRequest* srequest) { assert(m_outstanding_count == - m_writeRequestTable.size() + m_readRequestTable.size()); + m_writeRequestTable.size() + m_readRequestTable.size() + m_flushRequestTable.size()); const RubyRequest & ruby_request = srequest->ruby_request; Address line_addr(ruby_request.paddr); @@ -296,6 +350,8 @@ (ruby_request.type == RubyRequestType_Locked_RMW_Read) || (ruby_request.type == RubyRequestType_Locked_RMW_Write)) { m_writeRequestTable.erase(line_addr); + } else if ((ruby_request.type == RubyRequestType_FLUSH)) { + m_flushRequestTable.erase(line_addr); } else { m_readRequestTable.erase(line_addr); } @@ -441,6 +497,50 @@ initialRequestTime, forwardRequestTime, firstResponseTime); } + +//added by Somayeh + +void +Sequencer::flushCallback(const Address& address, DataBlock& data) +{ + flushCallback(address, GenericMachineType_NULL, data); +} + +void +Sequencer::flushCallback(const Address& address, + GenericMachineType mach, + DataBlock& data) +{ + flushCallback(address, mach, data, 0, 0, 0); +} + + +void +Sequencer::flushCallback(const Address& address, + GenericMachineType mach, + DataBlock& data, + Time initialRequestTime, + Time forwardRequestTime, + Time firstResponseTime) +{ + assert(address == line_address(address)); + assert(m_flushRequestTable.count(line_address(address))); + + RequestTable::iterator i = m_flushRequestTable.find(address); + assert(i != m_flushRequestTable.end()); + SequencerRequest* request = i->second; + + m_flushRequestTable.erase(i); + markRemoved(); + + assert((request->ruby_request.type == RubyRequestType_FLUSH)); + hitCallback(request, mach, data, true, + initialRequestTime, forwardRequestTime, firstResponseTime); +} + + + + void Sequencer::hitCallback(SequencerRequest* srequest, GenericMachineType mach, @@ -506,7 +606,9 @@ (type == RubyRequestType_IFETCH) || (type == RubyRequestType_RMW_Read) || (type == RubyRequestType_Locked_RMW_Read) || - (type == RubyRequestType_Load_Linked)) { + (type == RubyRequestType_Load_Linked) || + (type == RubyRequestType_FLUSH) + ) { memcpy(ruby_request.data, data.getData(request_address.getOffset(), ruby_request.len), ruby_request.len); @@ -545,6 +647,9 @@ !!m_writeRequestTable.count(line_address(Address(request.paddr))); bool is_outstanding_load = !!m_readRequestTable.count(line_address(Address(request.paddr))); + bool is_outstanding_flush = + !!m_flushRequestTable.count(line_address(Address(request.paddr))); + if (is_outstanding_store) { if ((request.type == RubyRequestType_LD) || (request.type == RubyRequestType_IFETCH) || @@ -562,6 +667,8 @@ m_load_waiting_on_load_cycles++; } return RequestStatus_Aliased; + } else if (is_outstanding_flush) { + return RequestStatus_Aliased; } if (m_outstanding_count >= m_max_outstanding_requests) { @@ -574,7 +681,7 @@ bool Sequencer::empty() const { - return m_writeRequestTable.empty() && m_readRequestTable.empty(); + return m_writeRequestTable.empty() && m_readRequestTable.empty() && m_flushRequestTable.empty(); } RequestStatus @@ -615,6 +722,9 @@ case RubyRequestType_LD: ctype = CacheRequestType_LD; break; + case RubyRequestType_FLUSH: + ctype = CacheRequestType_FLUSH; + break; case RubyRequestType_ST: case RubyRequestType_RMW_Read: case RubyRequestType_RMW_Write: @@ -718,6 +828,7 @@ << ", outstanding requests: " << m_outstanding_count << ", read request table: " << m_readRequestTable << ", write request table: " << m_writeRequestTable + << ", flush request table: " << m_flushRequestTable << "]"; }