diff --git a/src/cpu/base_dyn_inst.hh b/src/cpu/base_dyn_inst.hh --- a/src/cpu/base_dyn_inst.hh +++ b/src/cpu/base_dyn_inst.hh @@ -858,11 +858,12 @@ public: // monitor/mwait funtions - void armMonitor(Addr address) { cpu->armMonitor(address); } - bool mwait(PacketPtr pkt) { return cpu->mwait(pkt); } + void armMonitor(Addr address) { cpu->armMonitor(threadNumber, address); } + bool mwait(PacketPtr pkt) { return cpu->mwait(threadNumber, pkt); } void mwaitAtomic(ThreadContext *tc) - { return cpu->mwaitAtomic(tc, cpu->dtb); } - AddressMonitor *getAddrMonitor() { return cpu->getCpuAddrMonitor(); } + { return cpu->mwaitAtomic(threadNumber, tc, cpu->dtb); } + AddressMonitor *getAddrMonitor() + { return cpu->getCpuAddrMonitor(threadNumber); } }; template diff --git a/src/cpu/checker/cpu.hh b/src/cpu/checker/cpu.hh --- a/src/cpu/checker/cpu.hh +++ b/src/cpu/checker/cpu.hh @@ -350,11 +350,11 @@ } // monitor/mwait funtions - virtual void armMonitor(Addr address) { BaseCPU::armMonitor(address); } - bool mwait(PacketPtr pkt) { return BaseCPU::mwait(pkt); } + virtual void armMonitor(Addr address) { BaseCPU::armMonitor(0, address); } + bool mwait(PacketPtr pkt) { return BaseCPU::mwait(0, pkt); } void mwaitAtomic(ThreadContext *tc) - { return BaseCPU::mwaitAtomic(tc, thread->dtb); } - AddressMonitor *getAddrMonitor() { return BaseCPU::getCpuAddrMonitor(); } + { return BaseCPU::mwaitAtomic(0, tc, thread->dtb); } + AddressMonitor *getAddrMonitor() { return BaseCPU::getCpuAddrMonitor(0); } void demapInstPage(Addr vaddr, uint64_t asn) { diff --git a/src/cpu/minor/exec_context.hh b/src/cpu/minor/exec_context.hh --- a/src/cpu/minor/exec_context.hh +++ b/src/cpu/minor/exec_context.hh @@ -343,12 +343,12 @@ public: // monitor/mwait funtions - void armMonitor(Addr address) { getCpuPtr()->armMonitor(address); } - bool mwait(PacketPtr pkt) { return getCpuPtr()->mwait(pkt); } + void armMonitor(Addr address) { getCpuPtr()->armMonitor(0, address); } + bool mwait(PacketPtr pkt) { return getCpuPtr()->mwait(0, pkt); } void mwaitAtomic(ThreadContext *tc) - { return getCpuPtr()->mwaitAtomic(tc, thread.dtb); } + { return getCpuPtr()->mwaitAtomic(0, tc, thread.dtb); } AddressMonitor *getAddrMonitor() - { return getCpuPtr()->getCpuAddrMonitor(); } + { return getCpuPtr()->getCpuAddrMonitor(0); } }; } diff --git a/src/cpu/minor/fetch1.cc b/src/cpu/minor/fetch1.cc --- a/src/cpu/minor/fetch1.cc +++ b/src/cpu/minor/fetch1.cc @@ -135,7 +135,8 @@ "%s addr: 0x%x pc: %s line_offset: %d request_size: %d\n", request_id, aligned_pc, pc, line_offset, request_size); - request->request.setThreadContext(cpu.cpuId(), /* thread id */ 0); + request->request.setThreadContext(cpu.threads[0]->getTC()->contextId(), + /* thread id */ 0); request->request.setVirt(0 /* asid */, aligned_pc, request_size, Request::INST_FETCH, cpu.instMasterId(), /* I've no idea why we need the PC, but give it */ diff --git a/src/cpu/base.hh b/src/cpu/base.hh --- a/src/cpu/base.hh +++ b/src/cpu/base.hh @@ -554,14 +554,17 @@ Stats::Scalar numWorkItemsCompleted; private: - AddressMonitor addressMonitor; + std::vector addressMonitor; public: - void armMonitor(Addr address); - bool mwait(PacketPtr pkt); - void mwaitAtomic(ThreadContext *tc, TheISA::TLB *dtb); - AddressMonitor *getCpuAddrMonitor() { return &addressMonitor; } - void atomicNotify(Addr address); + void armMonitor(ThreadID tid, Addr address); + bool mwait(ThreadID tid, PacketPtr pkt); + void mwaitAtomic(ThreadID tid, ThreadContext *tc, TheISA::TLB *dtb); + AddressMonitor *getCpuAddrMonitor(ThreadID tid) + { + assert(tid < numThreads); + return &addressMonitor[tid]; + } }; #endif // THE_ISA == NULL_ISA # Node ID 3a631c76d3039d478532038cd5612010a3930eb0 # Parent b4b75a47738c821c24fe6173236245ca48f8c2fc diff --git a/src/cpu/base.cc b/src/cpu/base.cc --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -133,7 +133,7 @@ numThreads(p->numThreads), system(p->system), functionTraceStream(nullptr), currentFunctionStart(0), currentFunctionEnd(0), functionEntryTick(0), - addressMonitor() + addressMonitor(p->numThreads) { // if Python did not provide a valid ID, do it here if (_cpuId == -1 ) { @@ -271,39 +271,48 @@ } void -BaseCPU::armMonitor(Addr address) +BaseCPU::armMonitor(ThreadID tid, Addr address) { - addressMonitor.armed = true; - addressMonitor.vAddr = address; - addressMonitor.pAddr = 0x0; - DPRINTF(Mwait,"Armed monitor (vAddr=0x%lx)\n", address); + assert(tid < numThreads); + AddressMonitor &monitor = addressMonitor[tid]; + + monitor.armed = true; + monitor.vAddr = address; + monitor.pAddr = 0x0; + DPRINTF(Mwait,"[tid:%d] Armed monitor (vAddr=0x%lx)\n", tid, address); } bool -BaseCPU::mwait(PacketPtr pkt) +BaseCPU::mwait(ThreadID tid, PacketPtr pkt) { - if(addressMonitor.gotWakeup == false) { + assert(tid < numThreads); + AddressMonitor &monitor = addressMonitor[tid]; + + if(monitor.gotWakeup == false) { int block_size = cacheLineSize(); uint64_t mask = ~((uint64_t)(block_size - 1)); assert(pkt->req->hasPaddr()); - addressMonitor.pAddr = pkt->getAddr() & mask; - addressMonitor.waiting = true; + monitor.pAddr = pkt->getAddr() & mask; + monitor.waiting = true; - DPRINTF(Mwait,"mwait called (vAddr=0x%lx, line's paddr=0x%lx)\n", - addressMonitor.vAddr, addressMonitor.pAddr); + DPRINTF(Mwait,"[tid:%d] mwait called (vAddr=0x%lx, " + "line's paddr=0x%lx)\n", tid, monitor.vAddr, monitor.pAddr); return true; } else { - addressMonitor.gotWakeup = false; + monitor.gotWakeup = false; return false; } } void -BaseCPU::mwaitAtomic(ThreadContext *tc, TheISA::TLB *dtb) +BaseCPU::mwaitAtomic(ThreadID tid, ThreadContext *tc, TheISA::TLB *dtb) { + assert(tid < numThreads); + AddressMonitor &monitor = addressMonitor[tid]; + Request req; - Addr addr = addressMonitor.vAddr; + Addr addr = monitor.vAddr; int block_size = cacheLineSize(); uint64_t mask = ~((uint64_t)(block_size - 1)); int size = block_size; @@ -320,11 +329,11 @@ Fault fault = dtb->translateAtomic(&req, tc, BaseTLB::Read); assert(fault == NoFault); - addressMonitor.pAddr = req.getPaddr() & mask; - addressMonitor.waiting = true; + monitor.pAddr = req.getPaddr() & mask; + monitor.waiting = true; - DPRINTF(Mwait,"mwait called (vAddr=0x%lx, line's paddr=0x%lx)\n", - addressMonitor.vAddr, addressMonitor.pAddr); + DPRINTF(Mwait,"[tid:%d] mwait called (vAddr=0x%lx, line's paddr=0x%lx)\n", + tid, monitor.vAddr, monitor.pAddr); } void diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc --- a/src/cpu/simple/base.cc +++ b/src/cpu/simple/base.cc @@ -418,9 +418,8 @@ void BaseSimpleCPU::wakeup() { - getCpuAddrMonitor()->gotWakeup = true; - for (ThreadID tid = 0; tid < numThreads; tid++) { + getCpuAddrMonitor(tid)->gotWakeup = true; if (threadInfo[tid]->thread->status() == ThreadContext::Suspended) { DPRINTF(Quiesce,"Suspended Processor awoke\n"); threadInfo[tid]->thread->activate(); diff --git a/src/cpu/simple/exec_context.hh b/src/cpu/simple/exec_context.hh --- a/src/cpu/simple/exec_context.hh +++ b/src/cpu/simple/exec_context.hh @@ -372,22 +372,22 @@ void armMonitor(Addr address) M5_ATTR_OVERRIDE { - cpu->armMonitor(address); + cpu->armMonitor(thread->threadId(), address); } bool mwait(PacketPtr pkt) M5_ATTR_OVERRIDE { - return cpu->mwait(pkt); + return cpu->mwait(thread->threadId(), pkt); } void mwaitAtomic(ThreadContext *tc) M5_ATTR_OVERRIDE { - cpu->mwaitAtomic(tc, thread->dtb); + cpu->mwaitAtomic(thread->threadId(), tc, thread->dtb); } AddressMonitor *getAddrMonitor() M5_ATTR_OVERRIDE { - return cpu->getCpuAddrMonitor(); + return cpu->getCpuAddrMonitor(thread->threadId()); } #if THE_ISA == MIPS_ISA diff --git a/src/cpu/simple/timing.hh b/src/cpu/simple/timing.hh --- a/src/cpu/simple/timing.hh +++ b/src/cpu/simple/timing.hh @@ -132,6 +132,7 @@ }; FetchTranslation fetchTranslation; + void threadSnoop(PacketPtr pkt, ThreadID sender); void sendData(RequestPtr req, uint8_t *data, uint64_t *res, bool read); void sendSplitData(RequestPtr req1, RequestPtr req2, RequestPtr req, uint8_t *data, bool read); diff --git a/src/cpu/simple/timing.cc b/src/cpu/simple/timing.cc --- a/src/cpu/simple/timing.cc +++ b/src/cpu/simple/timing.cc @@ -296,6 +296,7 @@ if (do_access) { dcache_pkt = pkt; handleWritePacket(); + threadSnoop(pkt, curThread); } else { _status = DcacheWaitResponse; completeDataAccess(pkt); @@ -532,6 +533,19 @@ return NoFault; } +void +TimingSimpleCPU::threadSnoop(PacketPtr pkt, ThreadID sender) +{ + for (ThreadID tid = 0; tid < numThreads; tid++) { + if (tid != sender) { + if (getCpuAddrMonitor(tid)->doMonitor(pkt)) { + wakeup(); + } + TheISA::handleLockedSnoop(threadInfo[tid]->thread, pkt, + dcachePort.cacheBlockMask); + } + } +} void TimingSimpleCPU::finishTranslation(WholeTranslationState *state) @@ -844,8 +858,10 @@ TimingSimpleCPU::DcachePort::recvTimingSnoopReq(PacketPtr pkt) { // X86 ISA: Snooping an invalidation for monitor/mwait - if (cpu->getCpuAddrMonitor()->doMonitor(pkt)) { - cpu->wakeup(); + for (ThreadID tid = 0; tid < cpu->numThreads; tid++) { + if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) { + cpu->wakeup(); + } } for (auto &t_info : cpu->threadInfo) { @@ -857,8 +873,10 @@ TimingSimpleCPU::DcachePort::recvFunctionalSnoop(PacketPtr pkt) { // X86 ISA: Snooping an invalidation for monitor/mwait - if (cpu->getCpuAddrMonitor()->doMonitor(pkt)) { - cpu->wakeup(); + for (ThreadID tid = 0; tid < cpu->numThreads; tid++) { + if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) { + cpu->wakeup(); + } } } diff --git a/src/cpu/simple/atomic.hh b/src/cpu/simple/atomic.hh --- a/src/cpu/simple/atomic.hh +++ b/src/cpu/simple/atomic.hh @@ -186,6 +186,9 @@ /** Return a reference to the instruction port. */ virtual MasterPort &getInstPort() { return icachePort; } + /** Perform snoop for other cpu-local thread contexts. */ + void threadSnoop(PacketPtr pkt, ThreadID sender); + public: DrainState drain() M5_ATTR_OVERRIDE; diff --git a/src/cpu/simple/atomic.cc b/src/cpu/simple/atomic.cc --- a/src/cpu/simple/atomic.cc +++ b/src/cpu/simple/atomic.cc @@ -86,9 +86,10 @@ { BaseSimpleCPU::init(); - ifetch_req.setThreadContext(_cpuId, 0); - data_read_req.setThreadContext(_cpuId, 0); - data_write_req.setThreadContext(_cpuId, 0); + int cid = threadContexts[0]->contextId(); + ifetch_req.setThreadContext(cid, 0); + data_read_req.setThreadContext(cid, 0); + data_write_req.setThreadContext(cid, 0); } AtomicSimpleCPU::AtomicSimpleCPU(AtomicSimpleCPUParams *p) @@ -130,6 +131,24 @@ } void +AtomicSimpleCPU::threadSnoop(PacketPtr pkt, ThreadID sender) +{ + DPRINTF(SimpleCPU, "received snoop pkt for addr:%#x %s\n", pkt->getAddr(), + pkt->cmdString()); + + for (ThreadID tid = 0; tid < numThreads; tid++) { + if (tid != sender) { + if (getCpuAddrMonitor(tid)->doMonitor(pkt)) { + wakeup(); + } + + TheISA::handleLockedSnoop(threadInfo[tid]->thread, + pkt, dcachePort.cacheBlockMask); + } + } +} + +void AtomicSimpleCPU::drainResume() { assert(!tickEvent.scheduled()); @@ -264,8 +283,10 @@ // X86 ISA: Snooping an invalidation for monitor/mwait AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner); - if (cpu->getCpuAddrMonitor()->doMonitor(pkt)) { - cpu->wakeup(); + for (ThreadID tid = 0; tid < cpu->numThreads; tid++) { + if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) { + cpu->wakeup(); + } } // if snoop invalidates, release any associated locks @@ -288,8 +309,10 @@ // X86 ISA: Snooping an invalidation for monitor/mwait AtomicSimpleCPU *cpu = (AtomicSimpleCPU *)(&owner); - if (cpu->getCpuAddrMonitor()->doMonitor(pkt)) { - cpu->wakeup(); + for (ThreadID tid = 0; tid < cpu->numThreads; tid++) { + if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) { + cpu->wakeup(); + } } // if snoop invalidates, release any associated locks @@ -458,6 +481,9 @@ system->getPhysMem().access(&pkt); else dcache_latency += dcachePort.sendAtomic(&pkt); + + // Notify other threads on this CPU of write + threadSnoop(&pkt, curThread); } dcache_access = true; assert(!pkt.isError()); @@ -512,9 +538,11 @@ // Set memory request ids to current thread if (numThreads > 1) { - ifetch_req.setThreadContext(_cpuId, curThread); - data_read_req.setThreadContext(_cpuId, curThread); - data_write_req.setThreadContext(_cpuId, curThread); + ContextID cid = threadContexts[curThread]->contextId(); + + ifetch_req.setThreadContext(cid, curThread); + data_read_req.setThreadContext(cid, curThread); + data_write_req.setThreadContext(cid, curThread); } SimpleExecContext& t_info = *threadInfo[curThread]; diff --git a/src/cpu/minor/lsq.cc b/src/cpu/minor/lsq.cc --- a/src/cpu/minor/lsq.cc +++ b/src/cpu/minor/lsq.cc @@ -1501,7 +1501,8 @@ if (inst->traceData) inst->traceData->setMem(addr, size, flags); - request->request.setThreadContext(cpu.cpuId(), /* thread id */ 0); + int cid = cpu.threads[inst->id.threadId]->getTC()->contextId(); + request->request.setThreadContext(cid, /* thread id */ 0); request->request.setVirt(0 /* asid */, addr, size, flags, cpu.dataMasterId(), /* I've no idea why we need the PC, but give it */ diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -118,9 +118,10 @@ void FullO3CPU::DcachePort::recvTimingSnoopReq(PacketPtr pkt) { - // X86 ISA: Snooping an invalidation for monitor/mwait - if(cpu->getCpuAddrMonitor()->doMonitor(pkt)) { - cpu->wakeup(); + for (ThreadID tid = 0; tid < cpu->numThreads; tid++) { + if (cpu->getCpuAddrMonitor(tid)->doMonitor(pkt)) { + cpu->wakeup(); + } } lsq->recvTimingSnoopReq(pkt); }