diff -r 24d6b6ffc9d3 -r 1b646af30186 src/arch/arm/tlb.cc --- a/src/arch/arm/tlb.cc Wed Jan 12 11:13:12 2011 -0600 +++ b/src/arch/arm/tlb.cc Wed Jan 12 11:13:16 2011 -0600 @@ -696,6 +696,8 @@ #endif if (!delay) translation->finish(fault, req, tc, mode); + else + translation->delay(); return fault; } diff -r 24d6b6ffc9d3 -r 1b646af30186 src/cpu/base_dyn_inst.hh --- a/src/cpu/base_dyn_inst.hh Wed Jan 12 11:13:12 2011 -0600 +++ b/src/cpu/base_dyn_inst.hh Wed Jan 12 11:13:16 2011 -0600 @@ -1,4 +1,16 @@ /* + * Copyright (c) 2011 ARM Limited + * All rights reserved. + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2004-2006 The Regents of The University of Michigan * Copyright (c) 2009 The University of Edinburgh * All rights reserved. @@ -150,6 +162,26 @@ /** Finish a DTB address translation. */ void finishTranslation(WholeTranslationState *state); + /** True if the DTB address translation has started. */ + bool translationStarted; + + /** True if the DTB address translation has completed. */ + bool translationCompleted; + + /** + * True if the DTB address translation has been delayed due to a hw page + * table walk. + */ + bool translationDelayed; + + /** + * Saved memory requests (needed when the DTB address translation is + * delayed due to a hw page table walk). + */ + RequestPtr savedReq; + RequestPtr savedSreqLow; + RequestPtr savedSreqHigh; + /** @todo: Consider making this private. */ public: /** The sequence number of the instruction. */ @@ -835,33 +867,42 @@ unsigned size, unsigned flags) { reqMade = true; - Request *req = new Request(asid, addr, size, flags, this->pc.instAddr(), - thread->contextId(), threadNumber); - + Request *req = NULL; Request *sreqLow = NULL; Request *sreqHigh = NULL; - // Only split the request if the ISA supports unaligned accesses. - if (TheISA::HasUnalignedMemAcc) { - splitRequest(req, sreqLow, sreqHigh); - } - initiateTranslation(req, sreqLow, sreqHigh, NULL, BaseTLB::Read); + if (translationDelayed) { + req = savedReq; + sreqLow = savedSreqLow; + sreqHigh = savedSreqHigh; + } else { + req = new Request(asid, addr, size, flags, this->pc.instAddr(), + thread->contextId(), threadNumber); - if (fault == NoFault) { - effAddr = req->getVaddr(); - effAddrValid = true; - fault = cpu->read(req, sreqLow, sreqHigh, data, lqIdx); - } else { - // Commit will have to clean up whatever happened. Set this - // instruction as executed. - this->setExecuted(); + // Only split the request if the ISA supports unaligned accesses. + if (TheISA::HasUnalignedMemAcc) { + splitRequest(req, sreqLow, sreqHigh); + } + initiateTranslation(req, sreqLow, sreqHigh, NULL, BaseTLB::Read); } - if (fault != NoFault) { - // Return a fixed value to keep simulation deterministic even - // along misspeculated paths. - if (data) - bzero(data, size); + if (translationCompleted) { + if (fault == NoFault) { + effAddr = req->getVaddr(); + effAddrValid = true; + fault = cpu->read(req, sreqLow, sreqHigh, data, lqIdx); + } else { + // Commit will have to clean up whatever happened. Set this + // instruction as executed. + this->setExecuted(); + } + + if (fault != NoFault) { + // Return a fixed value to keep simulation deterministic even + // along misspeculated paths. + if (data) + bzero(data, size); + } } if (traceData) { @@ -897,19 +938,26 @@ } reqMade = true; - Request *req = new Request(asid, addr, size, flags, this->pc.instAddr(), - thread->contextId(), threadNumber); - + Request *req = NULL; Request *sreqLow = NULL; Request *sreqHigh = NULL; - // Only split the request if the ISA supports unaligned accesses. - if (TheISA::HasUnalignedMemAcc) { - splitRequest(req, sreqLow, sreqHigh); + if (translationDelayed) { + req = savedReq; + sreqLow = savedSreqLow; + sreqHigh = savedSreqHigh; + } else { + req = new Request(asid, addr, size, flags, this->pc.instAddr(), + thread->contextId(), threadNumber); + + // Only split the request if the ISA supports unaligned accesses. + if (TheISA::HasUnalignedMemAcc) { + splitRequest(req, sreqLow, sreqHigh); + } + initiateTranslation(req, sreqLow, sreqHigh, res, BaseTLB::Write); } - initiateTranslation(req, sreqLow, sreqHigh, res, BaseTLB::Write); - if (fault == NoFault) { + if (fault == NoFault && translationCompleted) { effAddr = req->getVaddr(); effAddrValid = true; fault = cpu->write(req, sreqLow, sreqHigh, data, sqIdx); @@ -953,6 +1001,8 @@ RequestPtr sreqHigh, uint64_t *res, BaseTLB::Mode mode) { + translationStarted = true; + if (!TheISA::HasUnalignedMemAcc || sreqLow == NULL) { WholeTranslationState *state = new WholeTranslationState(req, NULL, res, mode); @@ -961,6 +1011,13 @@ DataTranslation > *trans = new DataTranslation >(this, state); cpu->dtb->translateTiming(req, thread->getTC(), trans, mode); + if (!translationCompleted) { + translationDelayed = true; + // Save memory requests. + savedReq = state->mainReq; + savedSreqLow = state->sreqLow; + savedSreqHigh = state->sreqHigh; + } } else { WholeTranslationState *state = new WholeTranslationState(req, sreqLow, sreqHigh, NULL, res, mode); @@ -973,6 +1030,13 @@ cpu->dtb->translateTiming(sreqLow, thread->getTC(), stransLow, mode); cpu->dtb->translateTiming(sreqHigh, thread->getTC(), stransHigh, mode); + if (!translationCompleted) { + translationDelayed = true; + // Save memory requests. + savedReq = state->mainReq; + savedSreqLow = state->sreqLow; + savedSreqHigh = state->sreqHigh; + } } } @@ -998,6 +1062,8 @@ state->deleteReqs(); } delete state; + + translationCompleted = true; } #endif // __CPU_BASE_DYN_INST_HH__ diff -r 24d6b6ffc9d3 -r 1b646af30186 src/cpu/base_dyn_inst_impl.hh --- a/src/cpu/base_dyn_inst_impl.hh Wed Jan 12 11:13:12 2011 -0600 +++ b/src/cpu/base_dyn_inst_impl.hh Wed Jan 12 11:13:16 2011 -0600 @@ -1,4 +1,16 @@ /* + * Copyright (c) 2011 ARM Limited + * All rights reserved. + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * @@ -107,6 +119,10 @@ effAddrValid = false; physEffAddr = 0; + translationStarted = false; + translationCompleted = false; + translationDelayed = false; + isUncacheable = false; reqMade = false; readyRegs = 0; diff -r 24d6b6ffc9d3 -r 1b646af30186 src/cpu/o3/fetch.hh --- a/src/cpu/o3/fetch.hh Wed Jan 12 11:13:12 2011 -0600 +++ b/src/cpu/o3/fetch.hh Wed Jan 12 11:13:16 2011 -0600 @@ -137,6 +137,10 @@ {} void + delay() + {} + + void finish(Fault fault, RequestPtr req, ThreadContext *tc, BaseTLB::Mode mode) { diff -r 24d6b6ffc9d3 -r 1b646af30186 src/cpu/o3/iew_impl.hh --- a/src/cpu/o3/iew_impl.hh Wed Jan 12 11:13:12 2011 -0600 +++ b/src/cpu/o3/iew_impl.hh Wed Jan 12 11:13:16 2011 -0600 @@ -1240,12 +1240,33 @@ // Loads will mark themselves as executed, and their writeback // event adds the instruction to the queue to commit fault = ldstQueue.executeLoad(inst); + + if (inst->translationStarted && !inst->translationCompleted && + fault == NoFault) { + // A hw page table walk is currently going on; the + // instruction must be deferred. + DPRINTF(IEW, "Execute: Delayed translation, deferring " + "load.\n"); + instQueue.deferMemInst(inst); + continue; + } + if (inst->isDataPrefetch() || inst->isInstPrefetch()) { fault = NoFault; } } else if (inst->isStore()) { fault = ldstQueue.executeStore(inst); + if (inst->translationStarted && !inst->translationCompleted && + fault == NoFault) { + // A hw page table walk is currently going on; the + // instruction must be deferred. + DPRINTF(IEW, "Execute: Delayed translation, deferring " + "store.\n"); + instQueue.deferMemInst(inst); + continue; + } + // If the store had a fault then it may not have a mem req if (fault != NoFault || inst->readPredicate() == false || !inst->isStoreConditional()) { diff -r 24d6b6ffc9d3 -r 1b646af30186 src/cpu/o3/inst_queue.hh --- a/src/cpu/o3/inst_queue.hh Wed Jan 12 11:13:12 2011 -0600 +++ b/src/cpu/o3/inst_queue.hh Wed Jan 12 11:13:16 2011 -0600 @@ -1,4 +1,16 @@ /* + * Copyright (c) 2011 ARM Limited + * All rights reserved. + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * @@ -180,6 +192,11 @@ */ DynInstPtr getInstToExecute(); + /** Returns a memory instruction that was referred due to a delayed DTB + * translation if it is now ready to execute. + */ + DynInstPtr getDeferredMemInstToExecute(); + /** * Records the instruction as the producer of a register without * adding it to the rest of the IQ. @@ -223,6 +240,12 @@ /** Completes a memory operation. */ void completeMemInst(DynInstPtr &completed_inst); + /** + * Defers a memory instruction when its DTB translation incurs a hw + * page table walk. + */ + void deferMemInst(DynInstPtr &deferred_inst); + /** Indicates an ordering violation between a store and a load. */ void violation(DynInstPtr &store, DynInstPtr &faulting_load); @@ -284,6 +307,11 @@ /** List of instructions that are ready to be executed. */ std::list instsToExecute; + /** List of instructions waiting for their DTB translation to + * complete (hw page table walk in progress). + */ + std::list deferredMemInsts; + /** * Struct for comparing entries to be added to the priority queue. * This gives reverse ordering to the instructions in terms of diff -r 24d6b6ffc9d3 -r 1b646af30186 src/cpu/o3/inst_queue_impl.hh --- a/src/cpu/o3/inst_queue_impl.hh Wed Jan 12 11:13:12 2011 -0600 +++ b/src/cpu/o3/inst_queue_impl.hh Wed Jan 12 11:13:16 2011 -0600 @@ -1,4 +1,16 @@ /* + * Copyright (c) 2011 ARM Limited + * All rights reserved. + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2004-2006 The Regents of The University of Michigan * All rights reserved. * @@ -356,6 +368,7 @@ } nonSpecInsts.clear(); listOrder.clear(); + deferredMemInsts.clear(); } template @@ -685,6 +698,15 @@ IssueStruct *i2e_info = issueToExecuteQueue->access(0); + DynInstPtr deferred_mem_inst; + int total_deferred_mem_issued = 0; + while (total_deferred_mem_issued < totalWidth && + (deferred_mem_inst = getDeferredMemInstToExecute()) != NULL) { + issueToExecuteQueue->access(0)->size++; + instsToExecute.push_back(deferred_mem_inst); + total_deferred_mem_issued++; + } + // Have iterator to head of the list // While I haven't exceeded bandwidth or reached the end of the list, // Try to get a FU that can do what this op needs. @@ -697,7 +719,7 @@ ListOrderIt order_end_it = listOrder.end(); int total_issued = 0; - while (total_issued < totalWidth && + while (total_issued < (totalWidth - total_deferred_mem_issued) && iewStage->canIssue() && order_it != order_end_it) { OpClass op_class = (*order_it).queueType; @@ -808,7 +830,7 @@ iqInstsIssued+= total_issued; // If we issued any instructions, tell the CPU we had activity. - if (total_issued) { + if (total_issued || total_deferred_mem_issued) { cpu->activityThisCycle(); } else { DPRINTF(IQ, "Not able to schedule any instructions.\n"); @@ -964,6 +986,12 @@ InstructionQueue::rescheduleMemInst(DynInstPtr &resched_inst) { DPRINTF(IQ, "Rescheduling mem inst [sn:%lli]\n", resched_inst->seqNum); + + // Reset DTB translation state + resched_inst->translationStarted = false; + resched_inst->translationCompleted = false; + resched_inst->translationDelayed = false; + resched_inst->clearCanIssue(); memDepUnit[resched_inst->threadNumber].reschedule(resched_inst); } @@ -994,6 +1022,29 @@ template void +InstructionQueue::deferMemInst(DynInstPtr &deferred_inst) +{ + deferredMemInsts.push_back(deferred_inst); +} + +template +typename Impl::DynInstPtr +InstructionQueue::getDeferredMemInstToExecute() +{ + for (ListIt it = deferredMemInsts.begin(); it != deferredMemInsts.end(); + ++it) { + assert((*it)->translationStarted && (*it)->translationDelayed); + if ((*it)->translationCompleted) { + DynInstPtr ret = *it; + deferredMemInsts.erase(it); + return ret; + } + } + return NULL; +} + +template +void InstructionQueue::violation(DynInstPtr &store, DynInstPtr &faulting_load) { diff -r 24d6b6ffc9d3 -r 1b646af30186 src/cpu/o3/lsq_unit_impl.hh --- a/src/cpu/o3/lsq_unit_impl.hh Wed Jan 12 11:13:12 2011 -0600 +++ b/src/cpu/o3/lsq_unit_impl.hh Wed Jan 12 11:13:16 2011 -0600 @@ -445,12 +445,16 @@ Fault load_fault = NoFault; DPRINTF(LSQUnit, "Executing load PC %s, [sn:%lli]\n", - inst->pcState(),inst->seqNum); + inst->pcState(), inst->seqNum); assert(!inst->isSquashed()); load_fault = inst->initiateAcc(); + if (inst->translationStarted && !inst->translationCompleted && + load_fault == NoFault) + return load_fault; + // If the instruction faulted or predicated false, then we need to send it // along to commit without the instruction completing. if (load_fault != NoFault || inst->readPredicate() == false) { @@ -532,6 +536,10 @@ Fault store_fault = store_inst->initiateAcc(); + if (store_inst->translationStarted && !store_inst->translationCompleted && + store_fault == NoFault) + return store_fault; + if (store_inst->readPredicate() == false) store_inst->forwardOldRegs(); diff -r 24d6b6ffc9d3 -r 1b646af30186 src/cpu/simple/timing.hh --- a/src/cpu/simple/timing.hh Wed Jan 12 11:13:12 2011 -0600 +++ b/src/cpu/simple/timing.hh Wed Jan 12 11:13:16 2011 -0600 @@ -108,6 +108,10 @@ {} void + delay() + {} + + void finish(Fault fault, RequestPtr req, ThreadContext *tc, BaseTLB::Mode mode) { diff -r 24d6b6ffc9d3 -r 1b646af30186 src/cpu/translation.hh --- a/src/cpu/translation.hh Wed Jan 12 11:13:12 2011 -0600 +++ b/src/cpu/translation.hh Wed Jan 12 11:13:16 2011 -0600 @@ -1,4 +1,16 @@ /* + * Copyright (c) 2011 ARM Limited + * All rights reserved. + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2002-2005 The Regents of The University of Michigan * Copyright (c) 2009 The University of Edinburgh * All rights reserved. @@ -53,6 +65,7 @@ Fault faults[2]; public: + bool delay; bool isSplit; RequestPtr mainReq; RequestPtr sreqLow; @@ -67,8 +80,8 @@ */ WholeTranslationState(RequestPtr _req, uint8_t *_data, uint64_t *_res, BaseTLB::Mode _mode) - : outstanding(1), isSplit(false), mainReq(_req), sreqLow(NULL), - sreqHigh(NULL), data(_data), res(_res), mode(_mode) + : outstanding(1), delay(false), isSplit(false), mainReq(_req), + sreqLow(NULL), sreqHigh(NULL), data(_data), res(_res), mode(_mode) { faults[0] = faults[1] = NoFault; assert(mode == BaseTLB::Read || mode == BaseTLB::Write); @@ -82,8 +95,9 @@ WholeTranslationState(RequestPtr _req, RequestPtr _sreqLow, RequestPtr _sreqHigh, uint8_t *_data, uint64_t *_res, BaseTLB::Mode _mode) - : outstanding(2), isSplit(true), mainReq(_req), sreqLow(_sreqLow), - sreqHigh(_sreqHigh), data(_data), res(_res), mode(_mode) + : outstanding(2), delay(false), isSplit(true), mainReq(_req), + sreqLow(_sreqLow), sreqHigh(_sreqHigh), data(_data), res(_res), + mode(_mode) { faults[0] = faults[1] = NoFault; assert(mode == BaseTLB::Read || mode == BaseTLB::Write); @@ -221,6 +235,16 @@ } /** + * Signal the translation state that the translation has been delayed due + * to a hw page table walk. Split requests are transparently handled. + */ + void + delay() + { + state->delay = true; + } + + /** * Finish this part of the translation and indicate that the whole * translation is complete if the state says so. */ diff -r 24d6b6ffc9d3 -r 1b646af30186 src/sim/tlb.hh --- a/src/sim/tlb.hh Wed Jan 12 11:13:12 2011 -0600 +++ b/src/sim/tlb.hh Wed Jan 12 11:13:16 2011 -0600 @@ -1,4 +1,16 @@ /* + * Copyright (c) 2011 ARM Limited + * All rights reserved. + * + * The license below extends only to copyright in the software and shall + * not be construed as granting a license to any other intellectual + * property including but not limited to intellectual property relating + * to a hardware implementation of the functionality of the software + * licensed hereunder. You may use the software subject to the license + * terms below provided that you ensure that this notice is replicated + * unmodified and in its entirety in all distributions of the software, + * modified or unmodified, in source code or in binary form. + * * Copyright (c) 2006 The Regents of The University of Michigan * All rights reserved. * @@ -64,6 +76,12 @@ virtual ~Translation() {} + /** + * Signal that the translation has been delayed due to a hw page table + * walk. + */ + virtual void delay() = 0; + /* * The memory for this object may be dynamically allocated, and it may * be responsible for cleaning itself up which will happen in this