diff --git a/src/arch/noisa/cpu_dummy.hh b/src/arch/noisa/cpu_dummy.hh --- a/src/arch/noisa/cpu_dummy.hh +++ b/src/arch/noisa/cpu_dummy.hh @@ -2,5 +2,6 @@ class BaseCPU { public: - static int numSimulatedInstructions() { return 0; } + static int numSimulatedOperations() { return 0; } + static ins numSimulatedInstructions() { return 0; } }; diff --git a/src/cpu/base.hh b/src/cpu/base.hh --- a/src/cpu/base.hh +++ b/src/cpu/base.hh @@ -279,6 +279,8 @@ */ virtual BranchPred *getBranchPred() { return NULL; }; + virtual Counter totalOperations() const = 0; + virtual Counter totalInstructions() const = 0; // Function tracing @@ -303,6 +305,17 @@ public: static int numSimulatedCPUs() { return cpuList.size(); } + static Counter numSimulatedOperations() + { + Counter total = 0; + + int size = cpuList.size(); + for (int i = 0; i < size; ++i) + total += cpuList[i]->totalOperations(); + + return total; + } + static Counter numSimulatedInstructions() { Counter total = 0; diff --git a/src/cpu/base.cc b/src/cpu/base.cc --- a/src/cpu/base.cc +++ b/src/cpu/base.cc @@ -75,7 +75,7 @@ void CPUProgressEvent::process() { - Counter temp = cpu->totalInstructions(); + Counter temp = cpu->totalOperations(); #ifndef NDEBUG double ipc = double(temp - lastNumInst) / (_interval / cpu->ticks(1)); diff --git a/src/cpu/inorder/cpu.hh b/src/cpu/inorder/cpu.hh --- a/src/cpu/inorder/cpu.hh +++ b/src/cpu/inorder/cpu.hh @@ -774,6 +774,16 @@ } /** Count the Total Instructions Committed in the CPU. */ + virtual Counter totalOperations() const + { + Counter total(0); + + for (ThreadID tid = 0; tid < (ThreadID)thread.size(); tid++) + total += thread[tid]->numOp; + + return total; + } + virtual Counter totalInstructions() const { Counter total(0); @@ -864,13 +874,16 @@ Stats::Scalar comFloats; /** Stat for the number of committed instructions per thread. */ - Stats::Vector committedInsts; + Stats::Vector instructionsExecuted; + + /** Stat for the number of committed operations (including micro ops) per thread. */ + Stats::Vector operationsExecuted; /** Stat for the number of committed instructions per thread. */ - Stats::Vector smtCommittedInsts; + Stats::Vector smtInstructionsExecuted; /** Stat for the total number of committed instructions. */ - Stats::Scalar totalCommittedInsts; + Stats::Scalar totalInstructionsExecuted; /** Stat for the CPI per thread. */ Stats::Formula cpi; diff --git a/src/cpu/inorder/cpu.cc b/src/cpu/inorder/cpu.cc --- a/src/cpu/inorder/cpu.cc +++ b/src/cpu/inorder/cpu.cc @@ -638,55 +638,60 @@ .name(name() + ".smtCycles") .desc("Total number of cycles that the CPU was in SMT-mode"); - committedInsts + instructionsExecuted .init(numThreads) - .name(name() + ".committedInsts") - .desc("Number of Instructions Simulated (Per-Thread)"); + .name(name() + ".instCount") + .desc("Number of committed instructions, per-thread"); - smtCommittedInsts + operationsExecuted .init(numThreads) - .name(name() + ".smtCommittedInsts") + .name(name() + ".opCount") + .desc("Number of committed operations (including micro ops), per-thread"); + + smtInstructionsExecuted + .init(numThreads) + .name(name() + ".smtInstCount") .desc("Number of SMT Instructions Simulated (Per-Thread)"); - totalCommittedInsts - .name(name() + ".committedInsts_total") + totalInstructionsExecuted + .name(name() + ".instCountTotal") .desc("Number of Instructions Simulated (Total)"); cpi .name(name() + ".cpi") .desc("CPI: Cycles Per Instruction (Per-Thread)") .precision(6); - cpi = numCycles / committedInsts; + cpi = numCycles / instructionsExecuted; smtCpi .name(name() + ".smt_cpi") .desc("CPI: Total SMT-CPI") .precision(6); - smtCpi = smtCycles / smtCommittedInsts; + smtCpi = smtCycles / smtInstructionsExecuted; totalCpi .name(name() + ".cpi_total") .desc("CPI: Total CPI of All Threads") .precision(6); - totalCpi = numCycles / totalCommittedInsts; + totalCpi = numCycles / totalInstructionsExecuted; ipc .name(name() + ".ipc") .desc("IPC: Instructions Per Cycle (Per-Thread)") .precision(6); - ipc = committedInsts / numCycles; + ipc = instructionsExecuted / numCycles; smtIpc .name(name() + ".smt_ipc") .desc("IPC: Total SMT-IPC") .precision(6); - smtIpc = smtCommittedInsts / smtCycles; + smtIpc = smtInstructionsExecuted / smtCycles; totalIpc .name(name() + ".ipc_total") .desc("IPC: Total IPC of All Threads") .precision(6); - totalIpc = totalCommittedInsts / numCycles; + totalIpc = totalInstructionsExecuted / numCycles; BaseCPU::regStats(); } @@ -1457,22 +1462,27 @@ instsPerSwitch++; // Increment thread-state's instruction count - thread[tid]->numInst++; + thread[tid]->numOp++; // Increment thread-state's instruction stats - thread[tid]->numInsts++; + thread[tid]->numOps++; // Count committed insts per thread stats - committedInsts[tid]++; + if (!inst->isMicroop() || inst->isLastMicroop()) { + instructionsExecuted[tid]++; - // Count total insts committed stat - totalCommittedInsts++; + // Count total insts committed stat + totalInstructionsExecuted++; - // Count SMT-committed insts per thread stat - if (numActiveThreads() > 1) { - smtCommittedInsts[tid]++; + // Count SMT-committed insts per thread stat + if (numActiveThreads() > 1) { + smtInstructionsExecuted[tid]++; + } } + // Count committed operations (including micro ops) per thread stats + operationsExecuted[tid]++; + // Instruction-Mix Stats if (inst->isLoad()) { comLoads++; @@ -1491,7 +1501,7 @@ } // Check for instruction-count-based events. - comInstEventQueue[tid]->serviceEvents(thread[tid]->numInst); + comInstEventQueue[tid]->serviceEvents(thread[tid]->numOp); // Finally, remove instruction from CPU removeInst(inst); diff --git a/src/cpu/inorder/inorder_dyn_inst.hh b/src/cpu/inorder/inorder_dyn_inst.hh --- a/src/cpu/inorder/inorder_dyn_inst.hh +++ b/src/cpu/inorder/inorder_dyn_inst.hh @@ -399,6 +399,8 @@ bool isUnverifiable() const { return staticInst->isUnverifiable(); } bool isSyscall() const { return staticInst->isSyscall(); } + bool isMicroop() const { return staticInst->isMicroop(); } + bool isLastMicroop() const { return staticInst->isLastMicroop(); } ///////////////////////////////////////////// diff --git a/src/cpu/o3/commit.hh b/src/cpu/o3/commit.hh --- a/src/cpu/o3/commit.hh +++ b/src/cpu/o3/commit.hh @@ -465,7 +465,9 @@ Stats::Distribution numCommittedDist; /** Total number of instructions committed. */ - Stats::Vector statComInst; + Stats::Vector instructionsCommitted; + /** Total number of operations (including micro ops) committed. */ + Stats::Vector operationsCommitted; /** Total number of software prefetches committed. */ Stats::Vector statComSwp; /** Stat for the total number of committed memory references. */ diff --git a/src/cpu/o3/commit_impl.hh b/src/cpu/o3/commit_impl.hh --- a/src/cpu/o3/commit_impl.hh +++ b/src/cpu/o3/commit_impl.hh @@ -193,13 +193,20 @@ .flags(Stats::pdf) ; - statComInst + instructionsCommitted .init(cpu->numThreads) - .name(name() + ".count") + .name(name() + ".inst_count") .desc("Number of instructions committed") .flags(total) ; + operationsCommitted + .init(cpu->numThreads) + .name(name() + ".op_count") + .desc("Number of operations (including micro ops) committed") + .flags(total) + ; + statComSwp .init(cpu->numThreads) .name(name() + ".swp_count") @@ -986,7 +993,7 @@ // To match the old model, don't count nops and instruction // prefetches towards the total commit count. if (!head_inst->isNop() && !head_inst->isInstPrefetch()) { - cpu->instDone(tid); + cpu->instDone(tid, head_inst); } // Updates misc. registers. @@ -1159,7 +1166,7 @@ if (head_inst->traceData) { if (DTRACE(ExecFaulting)) { head_inst->traceData->setFetchSeq(head_inst->seqNum); - head_inst->traceData->setCPSeq(thread[tid]->numInst); + head_inst->traceData->setCPSeq(thread[tid]->numOp); head_inst->traceData->dump(); } delete head_inst->traceData; @@ -1193,7 +1200,7 @@ head_inst->seqNum, head_inst->pcState()); if (head_inst->traceData) { head_inst->traceData->setFetchSeq(head_inst->seqNum); - head_inst->traceData->setCPSeq(thread[tid]->numInst); + head_inst->traceData->setCPSeq(thread[tid]->numOp); head_inst->traceData->dump(); delete head_inst->traceData; head_inst->traceData = NULL; @@ -1344,10 +1351,14 @@ if (inst->isDataPrefetch()) { statComSwp[tid]++; } else { - statComInst[tid]++; + if (!inst->isMicroop() || inst->isLastMicroop()) + instructionsCommitted[tid]++; + operationsCommitted[tid]++; } #else - statComInst[tid]++; + if (!inst->isMicroop() || inst->isLastMicroop()) + instructionsCommitted[tid]++; + operationsCommitted[tid]++; #endif // diff --git a/src/cpu/o3/cpu.hh b/src/cpu/o3/cpu.hh --- a/src/cpu/o3/cpu.hh +++ b/src/cpu/o3/cpu.hh @@ -315,6 +315,9 @@ /** Remove all of a thread's context from CPU */ void removeThread(ThreadID tid); + /** Count the Total Operations (including micro ops) Committed in the CPU. */ + virtual Counter totalOperations() const; + /** Count the Total Instructions Committed in the CPU. */ virtual Counter totalInstructions() const; @@ -482,7 +485,7 @@ ListIt addInst(DynInstPtr &inst); /** Function to tell the CPU that an instruction has completed. */ - void instDone(ThreadID tid); + void instDone(ThreadID tid, DynInstPtr &inst); /** Remove an instruction from the front end of the list. There's * no restriction on location of the instruction. @@ -723,10 +726,10 @@ /** Stat for total number of cycles the CPU spends descheduled due to a * quiesce operation or waiting for an interrupt. */ Stats::Scalar quiesceCycles; - /** Stat for the number of committed instructions per thread. */ - Stats::Vector committedInsts; + /** Stats for the number of committed instructions per thread. */ + Stats::Vector instructionsExecuted; /** Stat for the total number of committed instructions. */ - Stats::Scalar totalCommittedInsts; + Stats::Scalar totalInstructionsExecuted; /** Stat for the CPI per thread. */ Stats::Formula cpi; /** Stat for the total CPI. */ diff --git a/src/cpu/o3/cpu.cc b/src/cpu/o3/cpu.cc --- a/src/cpu/o3/cpu.cc +++ b/src/cpu/o3/cpu.cc @@ -452,38 +452,38 @@ // -------------------------------- // Should probably be in Base CPU but need templated // MaxThreads so put in here instead - committedInsts + instructionsExecuted .init(numThreads) - .name(name() + ".committedInsts") + .name(name() + ".inst_count") .desc("Number of Instructions Simulated"); - totalCommittedInsts - .name(name() + ".committedInsts_total") + totalInstructionsExecuted + .name(name() + ".inst_count_total") .desc("Number of Instructions Simulated"); cpi .name(name() + ".cpi") .desc("CPI: Cycles Per Instruction") .precision(6); - cpi = numCycles / committedInsts; + cpi = numCycles / instructionsExecuted; totalCpi .name(name() + ".cpi_total") .desc("CPI: Total CPI of All Threads") .precision(6); - totalCpi = numCycles / totalCommittedInsts; + totalCpi = numCycles / totalInstructionsExecuted; ipc .name(name() + ".ipc") .desc("IPC: Instructions Per Cycle") .precision(6); - ipc = committedInsts / numCycles; + ipc = instructionsExecuted / numCycles; totalIpc .name(name() + ".ipc_total") .desc("IPC: Total IPC of All Threads") .precision(6); - totalIpc = totalCommittedInsts / numCycles; + totalIpc = totalInstructionsExecuted / numCycles; this->fetch.regStats(); this->decode.regStats(); @@ -662,6 +662,19 @@ template Counter +FullO3CPU::totalOperations() const +{ + Counter total(0); + + ThreadID size = thread.size(); + for (ThreadID i = 0; i < size; i++) + total += thread[i]->numOp; + + return total; +} + +template +Counter FullO3CPU::totalInstructions() const { Counter total(0); @@ -1418,16 +1431,20 @@ template void -FullO3CPU::instDone(ThreadID tid) +FullO3CPU::instDone(ThreadID tid, DynInstPtr &inst) { // Keep an instruction count. - thread[tid]->numInst++; - thread[tid]->numInsts++; - committedInsts[tid]++; - totalCommittedInsts++; + thread[tid]->numOp++; + thread[tid]->numOps++; + if (!inst->isMicroop() || inst->isLastMicroop()) { + thread[tid]->numInst++; + instructionsExecuted[tid]++; + totalInstructionsExecuted++; + } + system->totalNumInsts++; // Check for instruction-count-based events. - comInstEventQueue[tid]->serviceEvents(thread[tid]->numInst); + comInstEventQueue[tid]->serviceEvents(thread[tid]->numOp); system->instEventQueue.serviceEvents(system->totalNumInsts); } diff --git a/src/cpu/simple/base.hh b/src/cpu/simple/base.hh --- a/src/cpu/simple/base.hh +++ b/src/cpu/simple/base.hh @@ -176,18 +176,37 @@ virtual void resetStats(); // number of simulated instructions + Counter numOp; + Counter startNumOp; Counter numInst; Counter startNumInst; Stats::Scalar numInsts; + // number of simulated instructions + Stats::Scalar instructionsExecuted; + + // number of simulated operations (including micro ops) + Stats::Scalar operationsExecuted; + void countInst() { - numInst++; - numInsts++; + if (!curStaticInst->isMicroop() || curStaticInst->isLastMicroop()) { + instructionsExecuted++; + numInst++; + numInsts++; + } + + operationsExecuted++; + numOp++; system->totalNumInsts++; thread->funcExeInst++; } + virtual Counter totalOperations() const + { + return numOp - startNumOp; + } + virtual Counter totalInstructions() const { return numInst - startNumInst; diff --git a/src/cpu/simple/base.cc b/src/cpu/simple/base.cc --- a/src/cpu/simple/base.cc +++ b/src/cpu/simple/base.cc @@ -99,6 +99,8 @@ tc = thread->getTC(); + numOp = 0; + startNumOp = 0; numInst = 0; startNumInst = 0; numLoad = 0; @@ -140,6 +142,16 @@ BaseCPU::regStats(); + instructionsExecuted + .name(name() + ".inst_count") + .desc("Number of instructions executed") + ; + + operationsExecuted + .name(name() + ".op_count") + .desc("Number of operations (including micro ops) executed") + ; + numInsts .name(name() + ".num_insts") .desc("Number of instructions executed") diff --git a/src/cpu/thread_state.hh b/src/cpu/thread_state.hh --- a/src/cpu/thread_state.hh +++ b/src/cpu/thread_state.hh @@ -143,10 +143,13 @@ * below the CPU. */ void connectToMemFunc(Port *port); + /** Number of instructions committed. */ Counter numInst; + /** Number of operations committed. */ + Counter numOp; /** Stat for number instructions committed. */ - Stats::Scalar numInsts; + Stats::Scalar numOps; /** Stat for number of memory references. */ Stats::Scalar numMemRefs; diff --git a/src/cpu/thread_state.cc b/src/cpu/thread_state.cc --- a/src/cpu/thread_state.cc +++ b/src/cpu/thread_state.cc @@ -47,7 +47,7 @@ #else ThreadState::ThreadState(BaseCPU *cpu, ThreadID _tid, Process *_process) #endif - : numInst(0), numLoad(0), _status(ThreadContext::Halted), + : numInst(0), numOp(0), numLoad(0), _status(ThreadContext::Halted), baseCpu(cpu), _threadId(_tid), lastActivate(0), lastSuspend(0), #if FULL_SYSTEM profile(NULL), profileNode(NULL), profilePC(0), quiesceEvent(NULL), diff --git a/src/sim/stat_control.cc b/src/sim/stat_control.cc --- a/src/sim/stat_control.cc +++ b/src/sim/stat_control.cc @@ -89,11 +89,13 @@ struct Global { + Stats::Formula hostOpRate; Stats::Formula hostInstRate; Stats::Formula hostTickRate; Stats::Value hostMemory; Stats::Value hostSeconds; + Stats::Value simOps; Stats::Value simInsts; Global(); @@ -101,6 +103,14 @@ Global::Global() { + simOps + .functor(BaseCPU::numSimulatedOperations) + .name("sim_ops") + .desc("Number of operations (including micro ops) simulated") + .precision(0) + .prereq(simOps) + ; + simInsts .functor(BaseCPU::numSimulatedInstructions) .name("sim_insts") @@ -126,9 +136,16 @@ .desc("Number of ticks simulated") ; + hostOpRate + .name("host_op_rate") + .desc("Simulator operation (including micro ops) rate (ops/s)") + .precision(0) + .prereq(simOps) + ; + hostInstRate .name("host_inst_rate") - .desc("Simulator instruction rate (inst/s)") + .desc("Simulator instruction rate (insts/s)") .precision(0) .prereq(simInsts) ; @@ -154,6 +171,7 @@ ; simSeconds = simTicks / simFreq; + hostOpRate = simOps / hostSeconds; hostInstRate = simInsts / hostSeconds; hostTickRate = simTicks / hostSeconds;