diff --git a/src/arch/noisa/cpu_dummy.hh b/src/arch/noisa/cpu_dummy.hh --- a/src/arch/noisa/cpu_dummy.hh +++ b/src/arch/noisa/cpu_dummy.hh @@ -2,5 +2,6 @@ class BaseCPU { public: - static int numSimulatedInstructions() { return 0; } + static int numSimulatedOps() { return 0; } + static ins numSimulatedInsts() { return 0; } }; diff --git a/src/cpu/base.hh b/src/cpu/base.hh --- a/src/cpu/base.hh +++ b/src/cpu/base.hh @@ -279,7 +279,9 @@ */ virtual BranchPred *getBranchPred() { return NULL; }; - virtual Counter totalInstructions() const = 0; + virtual Counter totalOps() const = 0; + + virtual Counter totalInsts() const = 0; // Function tracing private: @@ -303,13 +305,24 @@ public: static int numSimulatedCPUs() { return cpuList.size(); } - static Counter numSimulatedInstructions() + static Counter numSimulatedOps() { Counter total = 0; int size = cpuList.size(); for (int i = 0; i < size; ++i) - total += cpuList[i]->totalInstructions(); + total += cpuList[i]->totalOps(); + + return total; + } + + static Counter numSimulatedInsts() + { + Counter total = 0; + + int size = cpuList.size(); + for (int i = 0; i < size; ++i) + total += cpuList[i]->totalInsts(); return total; } diff --git a/src/cpu/base.cc b/src/cpu/base.cc --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -75,7 +75,7 @@ void CPUProgressEvent::process() { - Counter temp = cpu->totalInstructions(); + Counter temp = cpu->totalOps(); #ifndef NDEBUG double ipc = double(temp - lastNumInst) / (_interval / cpu->ticks(1)); diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh --- a/src/cpu/inorder/cpu.hh +++ b/src/cpu/inorder/cpu.hh @@ -774,7 +774,17 @@ } /** Count the Total Instructions Committed in the CPU. */ - virtual Counter totalInstructions() const + virtual Counter totalOps() const + { + Counter total(0); + + for (ThreadID tid = 0; tid < (ThreadID)thread.size(); tid++) + total += thread[tid]->numOp; + + return total; + } + + virtual Counter totalInsts() const { Counter total(0); @@ -864,13 +874,16 @@ Stats::Scalar comFloats; /** Stat for the number of committed instructions per thread. */ - Stats::Vector committedInsts; + Stats::Vector instsExecuted; + + /** Stat for the number of committed ops (including micro ops) per thread. */ + Stats::Vector opsExecuted; /** Stat for the number of committed instructions per thread. */ - Stats::Vector smtCommittedInsts; + Stats::Vector smtInstsExecuted; /** Stat for the total number of committed instructions. */ - Stats::Scalar totalCommittedInsts; + Stats::Scalar totalInstsExecuted; /** Stat for the CPI per thread. */ Stats::Formula cpi; diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc --- a/src/cpu/inorder/cpu.cc +++ b/src/cpu/inorder/cpu.cc @@ -638,55 +638,60 @@ .name(name() + ".smtCycles") .desc("Total number of cycles that the CPU was in SMT-mode"); - committedInsts + instsExecuted .init(numThreads) - .name(name() + ".committedInsts") - .desc("Number of Instructions Simulated (Per-Thread)"); + .name(name() + ".instCount") + .desc("Number of committed instructions, per-thread"); - smtCommittedInsts + opsExecuted .init(numThreads) - .name(name() + ".smtCommittedInsts") + .name(name() + ".opCount") + .desc("Number of committed ops (including micro ops), per-thread"); + + smtInstsExecuted + .init(numThreads) + .name(name() + ".smtInstCount") .desc("Number of SMT Instructions Simulated (Per-Thread)"); - totalCommittedInsts - .name(name() + ".committedInsts_total") + totalInstsExecuted + .name(name() + ".instCountTotal") .desc("Number of Instructions Simulated (Total)"); cpi .name(name() + ".cpi") .desc("CPI: Cycles Per Instruction (Per-Thread)") .precision(6); - cpi = numCycles / committedInsts; + cpi = numCycles / instsExecuted; smtCpi .name(name() + ".smt_cpi") .desc("CPI: Total SMT-CPI") .precision(6); - smtCpi = smtCycles / smtCommittedInsts; + smtCpi = smtCycles / smtInstsExecuted; totalCpi .name(name() + ".cpi_total") .desc("CPI: Total CPI of All Threads") .precision(6); - totalCpi = numCycles / totalCommittedInsts; + totalCpi = numCycles / totalInstsExecuted; ipc .name(name() + ".ipc") .desc("IPC: Instructions Per Cycle (Per-Thread)") .precision(6); - ipc = committedInsts / numCycles; + ipc = instsExecuted / numCycles; smtIpc .name(name() + ".smt_ipc") .desc("IPC: Total SMT-IPC") .precision(6); - smtIpc = smtCommittedInsts / smtCycles; + smtIpc = smtInstsExecuted / smtCycles; totalIpc .name(name() + ".ipc_total") .desc("IPC: Total IPC of All Threads") .precision(6); - totalIpc = totalCommittedInsts / numCycles; + totalIpc = totalInstsExecuted / numCycles; BaseCPU::regStats(); } @@ -1457,22 +1462,27 @@ instsPerSwitch++; // Increment thread-state's instruction count - thread[tid]->numInst++; + thread[tid]->numOp++; // Increment thread-state's instruction stats - thread[tid]->numInsts++; + thread[tid]->numOps++; // Count committed insts per thread stats - committedInsts[tid]++; + if (!inst->isMicroop() || inst->isLastMicroop()) { + instsExecuted[tid]++; - // Count total insts committed stat - totalCommittedInsts++; + // Count total insts committed stat + totalInstsExecuted++; - // Count SMT-committed insts per thread stat - if (numActiveThreads() > 1) { - smtCommittedInsts[tid]++; + // Count SMT-committed insts per thread stat + if (numActiveThreads() > 1) { + smtInstsExecuted[tid]++; + } } + // Count committed ops (including micro ops) per thread stats + opsExecuted[tid]++; + // Instruction-Mix Stats if (inst->isLoad()) { comLoads++; @@ -1491,7 +1501,7 @@ } // Check for instruction-count-based events. - comInstEventQueue[tid]->serviceEvents(thread[tid]->numInst); + comInstEventQueue[tid]->serviceEvents(thread[tid]->numOp); // Finally, remove instruction from CPU removeInst(inst); diff --git a/src/cpu/inorder/inorder_dyn_inst.hh b/src/cpu/inorder/inorder_dyn_inst.hh --- a/src/cpu/inorder/inorder_dyn_inst.hh +++ b/src/cpu/inorder/inorder_dyn_inst.hh @@ -399,6 +399,8 @@ bool isUnverifiable() const { return staticInst->isUnverifiable(); } bool isSyscall() const { return staticInst->isSyscall(); } + bool isMicroop() const { return staticInst->isMicroop(); } + bool isLastMicroop() const { return staticInst->isLastMicroop(); } ///////////////////////////////////////////// diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -465,7 +465,9 @@ Stats::Distribution numCommittedDist; /** Total number of instructions committed. */ - Stats::Vector statComInst; + Stats::Vector instsCommitted; + /** Total number of ops (including micro ops) committed. */ + Stats::Vector opsCommitted; /** Total number of software prefetches committed. */ Stats::Vector statComSwp; /** Stat for the total number of committed memory references. */ diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -193,13 +193,20 @@ .flags(Stats::pdf) ; - statComInst + instsCommitted .init(cpu->numThreads) - .name(name() + ".count") + .name(name() + ".inst_count") .desc("Number of instructions committed") .flags(total) ; + opsCommitted + .init(cpu->numThreads) + .name(name() + ".op_count") + .desc("Number of ops (including micro ops) committed") + .flags(total) + ; + statComSwp .init(cpu->numThreads) .name(name() + ".swp_count") @@ -986,7 +993,7 @@ // To match the old model, don't count nops and instruction // prefetches towards the total commit count. if (!head_inst->isNop() && !head_inst->isInstPrefetch()) { - cpu->instDone(tid); + cpu->instDone(tid, head_inst); } // Updates misc. registers. @@ -1159,7 +1166,7 @@ if (head_inst->traceData) { if (DTRACE(ExecFaulting)) { head_inst->traceData->setFetchSeq(head_inst->seqNum); - head_inst->traceData->setCPSeq(thread[tid]->numInst); + head_inst->traceData->setCPSeq(thread[tid]->numOp); head_inst->traceData->dump(); } delete head_inst->traceData; @@ -1193,7 +1200,7 @@ head_inst->seqNum, head_inst->pcState()); if (head_inst->traceData) { head_inst->traceData->setFetchSeq(head_inst->seqNum); - head_inst->traceData->setCPSeq(thread[tid]->numInst); + head_inst->traceData->setCPSeq(thread[tid]->numOp); head_inst->traceData->dump(); delete head_inst->traceData; head_inst->traceData = NULL; @@ -1344,10 +1351,14 @@ if (inst->isDataPrefetch()) { statComSwp[tid]++; } else { - statComInst[tid]++; + if (!inst->isMicroop() || inst->isLastMicroop()) + instsCommitted[tid]++; + opsCommitted[tid]++; } #else - statComInst[tid]++; + if (!inst->isMicroop() || inst->isLastMicroop()) + instsCommitted[tid]++; + opsCommitted[tid]++; #endif // diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -315,8 +315,11 @@ /** Remove all of a thread's context from CPU */ void removeThread(ThreadID tid); + /** Count the Total Operations (including micro ops) Committed in the CPU. */ + virtual Counter totalOps() const; + /** Count the Total Instructions Committed in the CPU. */ - virtual Counter totalInstructions() const; + virtual Counter totalInsts() const; /** Add Thread to Active Threads List. */ void activateContext(ThreadID tid, int delay); @@ -482,7 +485,7 @@ ListIt addInst(DynInstPtr &inst); /** Function to tell the CPU that an instruction has completed. */ - void instDone(ThreadID tid); + void instDone(ThreadID tid, DynInstPtr &inst); /** Remove an instruction from the front end of the list. There's * no restriction on location of the instruction. @@ -723,10 +726,10 @@ /** Stat for total number of cycles the CPU spends descheduled due to a * quiesce operation or waiting for an interrupt. */ Stats::Scalar quiesceCycles; - /** Stat for the number of committed instructions per thread. */ - Stats::Vector committedInsts; + /** Stats for the number of committed instructions per thread. */ + Stats::Vector instsExecuted; /** Stat for the total number of committed instructions. */ - Stats::Scalar totalCommittedInsts; + Stats::Scalar totalInstsExecuted; /** Stat for the CPI per thread. */ Stats::Formula cpi; /** Stat for the total CPI. */ diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -452,38 +452,38 @@ // -------------------------------- // Should probably be in Base CPU but need templated // MaxThreads so put in here instead - committedInsts + instsExecuted .init(numThreads) - .name(name() + ".committedInsts") + .name(name() + ".inst_count") .desc("Number of Instructions Simulated"); - totalCommittedInsts - .name(name() + ".committedInsts_total") + totalInstsExecuted + .name(name() + ".inst_count_total") .desc("Number of Instructions Simulated"); cpi .name(name() + ".cpi") .desc("CPI: Cycles Per Instruction") .precision(6); - cpi = numCycles / committedInsts; + cpi = numCycles / instsExecuted; totalCpi .name(name() + ".cpi_total") .desc("CPI: Total CPI of All Threads") .precision(6); - totalCpi = numCycles / totalCommittedInsts; + totalCpi = numCycles / totalInstsExecuted; ipc .name(name() + ".ipc") .desc("IPC: Instructions Per Cycle") .precision(6); - ipc = committedInsts / numCycles; + ipc = instsExecuted / numCycles; totalIpc .name(name() + ".ipc_total") .desc("IPC: Total IPC of All Threads") .precision(6); - totalIpc = totalCommittedInsts / numCycles; + totalIpc = totalInstsExecuted / numCycles; this->fetch.regStats(); this->decode.regStats(); @@ -662,7 +662,20 @@ template Counter -FullO3CPU::totalInstructions() const +FullO3CPU::totalOps() const +{ + Counter total(0); + + ThreadID size = thread.size(); + for (ThreadID i = 0; i < size; i++) + total += thread[i]->numOp; + + return total; +} + +template +Counter +FullO3CPU::totalInsts() const { Counter total(0); @@ -1418,16 +1431,20 @@ template void -FullO3CPU::instDone(ThreadID tid) +FullO3CPU::instDone(ThreadID tid, DynInstPtr &inst) { // Keep an instruction count. - thread[tid]->numInst++; - thread[tid]->numInsts++; - committedInsts[tid]++; - totalCommittedInsts++; + thread[tid]->numOp++; + thread[tid]->numOps++; + if (!inst->isMicroop() || inst->isLastMicroop()) { + thread[tid]->numInst++; + instsExecuted[tid]++; + totalInstsExecuted++; + } + system->totalNumInsts++; // Check for instruction-count-based events. - comInstEventQueue[tid]->serviceEvents(thread[tid]->numInst); + comInstEventQueue[tid]->serviceEvents(thread[tid]->numOp); system->instEventQueue.serviceEvents(system->totalNumInsts); } diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh --- a/src/cpu/simple/base.hh +++ b/src/cpu/simple/base.hh @@ -176,19 +176,38 @@ virtual void resetStats(); // number of simulated instructions + Counter numOp; + Counter startNumOp; Counter numInst; Counter startNumInst; Stats::Scalar numInsts; + // number of simulated instructions + Stats::Scalar instsExecuted; + + // number of simulated ops (including micro ops) + Stats::Scalar opsExecuted; + void countInst() { - numInst++; - numInsts++; + if (!curStaticInst->isMicroop() || curStaticInst->isLastMicroop()) { + instsExecuted++; + numInst++; + numInsts++; + } + + opsExecuted++; + numOp++; system->totalNumInsts++; thread->funcExeInst++; } - virtual Counter totalInstructions() const + virtual Counter totalOps() const + { + return numOp - startNumOp; + } + + virtual Counter totalInsts() const { return numInst - startNumInst; } diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc --- a/src/cpu/simple/base.cc +++ b/src/cpu/simple/base.cc @@ -99,6 +99,8 @@ tc = thread->getTC(); + numOp = 0; + startNumOp = 0; numInst = 0; startNumInst = 0; numLoad = 0; @@ -140,6 +142,16 @@ BaseCPU::regStats(); + instsExecuted + .name(name() + ".inst_count") + .desc("Number of instructions executed") + ; + + opsExecuted + .name(name() + ".op_count") + .desc("Number of ops (including micro ops) executed") + ; + numInsts .name(name() + ".num_insts") .desc("Number of instructions executed") diff --git a/src/cpu/thread_state.hh b/src/cpu/thread_state.hh --- a/src/cpu/thread_state.hh +++ b/src/cpu/thread_state.hh @@ -143,10 +143,13 @@ * below the CPU. */ void connectToMemFunc(Port *port); + /** Number of instructions committed. */ Counter numInst; + /** Number of ops committed. */ + Counter numOp; /** Stat for number instructions committed. */ - Stats::Scalar numInsts; + Stats::Scalar numOps; /** Stat for number of memory references. */ Stats::Scalar numMemRefs; diff --git a/src/cpu/thread_state.cc b/src/cpu/thread_state.cc --- a/src/cpu/thread_state.cc +++ b/src/cpu/thread_state.cc @@ -47,7 +47,7 @@ #else ThreadState::ThreadState(BaseCPU *cpu, ThreadID _tid, Process *_process) #endif - : numInst(0), numLoad(0), _status(ThreadContext::Halted), + : numInst(0), numOp(0), numLoad(0), _status(ThreadContext::Halted), baseCpu(cpu), _threadId(_tid), lastActivate(0), lastSuspend(0), #if FULL_SYSTEM profile(NULL), profileNode(NULL), profilePC(0), quiesceEvent(NULL), diff --git a/src/sim/stat_control.cc b/src/sim/stat_control.cc --- a/src/sim/stat_control.cc +++ b/src/sim/stat_control.cc @@ -89,11 +89,13 @@ struct Global { + Stats::Formula hostOpRate; Stats::Formula hostInstRate; Stats::Formula hostTickRate; Stats::Value hostMemory; Stats::Value hostSeconds; + Stats::Value simOps; Stats::Value simInsts; Global(); @@ -101,8 +103,16 @@ Global::Global() { + simOps + .functor(BaseCPU::numSimulatedOps) + .name("sim_ops") + .desc("Number of ops (including micro ops) simulated") + .precision(0) + .prereq(simOps) + ; + simInsts - .functor(BaseCPU::numSimulatedInstructions) + .functor(BaseCPU::numSimulatedInsts) .name("sim_insts") .desc("Number of instructions simulated") .precision(0) @@ -126,9 +136,16 @@ .desc("Number of ticks simulated") ; + hostOpRate + .name("host_op_rate") + .desc("Simulator op (including micro ops) rate (ops/s)") + .precision(0) + .prereq(simOps) + ; + hostInstRate .name("host_inst_rate") - .desc("Simulator instruction rate (inst/s)") + .desc("Simulator instruction rate (insts/s)") .precision(0) .prereq(simInsts) ; @@ -154,6 +171,7 @@ ; simSeconds = simTicks / simFreq; + hostOpRate = simOps / hostSeconds; hostInstRate = simInsts / hostSeconds; hostTickRate = simTicks / hostSeconds;