diff -r 64f7c05a4944 -r d444c341c00c src/cpu/trace/trace_cpu.hh --- a/src/cpu/trace/trace_cpu.hh Fri Aug 12 17:43:57 2016 +0100 +++ b/src/cpu/trace/trace_cpu.hh Fri Aug 12 17:44:06 2016 +0100 @@ -136,10 +136,9 @@ * Strictly-ordered requests are skipped and the dependencies on such requests * are handled by simply marking them complete immediately. * - * The simulated seconds can be calculated as the difference between the - * final_tick stat and the tickOffset stat. A CountedExitEvent that contains a - * static int belonging to the Trace CPU class as a down counter is used to - * implement multi Trace CPU simulation exit. + * A CountedExitEvent that contains a static int belonging to the Trace CPU + * class as a down counter is used to implement multi Trace CPU simulation + * exit. */ class TraceCPU : public BaseCPU @@ -171,9 +170,15 @@ */ Counter totalOps() const { - return dcacheGen.getMicroOpCount(); + return numOps.value(); } + /* + * Set the no. of ops when elastic data generator completes executing a + * node. + */ + void updateNumOps(uint64_t rob_num) { numOps = rob_num; } + /* Pure virtual function in BaseCPU. Do nothing. */ void wakeup(ThreadID tid = 0) { @@ -876,6 +881,14 @@ */ Tick init(); + /** + * Adjust traceOffset based on what TraceCPU init() determines on + * comparing the offsets in the fetch request and elastic traces. + * + * @param trace_offset trace offset set by comparing both traces + */ + void adjustInitTraceOffset(Tick& offset); + /** Returns name of the ElasticDataGen instance. */ const std::string& name() const { return genName; } @@ -1081,10 +1094,12 @@ bool oneTraceComplete; /** - * This is stores the tick of the first instruction fetch request - * which is later used for dumping the tickOffset stat. + * This stores the time offset in the trace, which is taken away from + * the ready times of requests. This is specially useful because the time + * offset can be very large if the traces are generated from the middle of + * a program. */ - Tick firstFetchTick; + Tick traceOffset; /** * Number of Trace CPUs in the system used as a shared variable and passed @@ -1109,13 +1124,6 @@ /** Stat for the CPI. This is really cycles per micro-op and not inst. */ Stats::Formula cpi; - /** - * The first execution tick is dumped as a stat so that the simulated - * seconds for a trace replay can be calculated as a difference between the - * final_tick stat and the tickOffset stat - */ - Stats::Scalar tickOffset; - public: /** Used to get a reference to the icache port. */ diff -r 64f7c05a4944 -r d444c341c00c src/cpu/trace/trace_cpu.cc --- a/src/cpu/trace/trace_cpu.cc Fri Aug 12 17:43:57 2016 +0100 +++ b/src/cpu/trace/trace_cpu.cc Fri Aug 12 17:44:06 2016 +0100 @@ -60,7 +60,7 @@ icacheNextEvent(this), dcacheNextEvent(this), oneTraceComplete(false), - firstFetchTick(0), + traceOffset(0), execCompleteEvent(nullptr) { // Increment static counter for number of Trace CPUs. @@ -116,22 +116,31 @@ BaseCPU::init(); - // Get the send tick of the first instruction read request and schedule - // icacheNextEvent at that tick. + // Get the send tick of the first instruction read request Tick first_icache_tick = icacheGen.init(); - schedule(icacheNextEvent, first_icache_tick); - // Get the send tick of the first data read/write request and schedule - // dcacheNextEvent at that tick. + // Get the send tick of the first data read/write request Tick first_dcache_tick = dcacheGen.init(); - schedule(dcacheNextEvent, first_dcache_tick); + + // Set the trace offset as the minimum of that in both traces + traceOffset = std::min(first_icache_tick, first_dcache_tick); + inform("%s: Time offset (tick) found as min of both traces is %lli.\n", + name(), traceOffset); + + // Schedule next icache and dcache event by subtracting the offset + schedule(icacheNextEvent, first_icache_tick - traceOffset); + schedule(dcacheNextEvent, first_dcache_tick - traceOffset); + + // Adjust the trace offset for the dcache generator's ready nodes + // We don't need to do this for the icache generator as it will + // send its first request at the first event and schedule subsequent + // events using a relative tick delta + dcacheGen.adjustInitTraceOffset(traceOffset); // The static counter for number of Trace CPUs is correctly set at this // point so create an event and pass it. execCompleteEvent = new CountedExitEvent("end of all traces reached.", numTraceCPUs); - // Save the first fetch request tick to dump it as tickOffset - firstFetchTick = first_icache_tick; } void @@ -164,6 +173,9 @@ { DPRINTF(TraceCPUData, "DcacheGen event.\n"); + // Update stat for numCycles + numCycles = clockEdge() / clockPeriod(); + dcacheGen.execute(); if (dcacheGen.isExecComplete()) { checkAndSchedExitEvent(); @@ -179,11 +191,6 @@ // Schedule event to indicate execution is complete as both // instruction and data access traces have been played back. inform("%s: Execution complete.\n", name()); - - // Record stats which are computed at the end of simulation - tickOffset = firstFetchTick; - numCycles = (clockEdge() - firstFetchTick) / clockPeriod(); - numOps = dcacheGen.getMicroOpCount(); schedule(*execCompleteEvent, curTick()); } } @@ -216,11 +223,6 @@ ; cpi = numCycles/numOps; - tickOffset - .name(name() + ".tickOffset") - .desc("The first execution tick for the root node of elastic traces") - ; - icacheGen.regStats(); dcacheGen.regStats(); } @@ -312,6 +314,13 @@ } void +TraceCPU::ElasticDataGen::adjustInitTraceOffset(Tick& offset) { + for (auto& free_node : readyList) { + free_node.execTick -= offset; + } +} + +void TraceCPU::ElasticDataGen::exit() { trace.reset(); @@ -534,6 +543,8 @@ hwResource.release(node_ptr); // clear the dynamically allocated set of dependents (node_ptr->dependents).clear(); + // Update the stat for numOps simulated + owner.updateNumOps(node_ptr->robNum); // delete node delete node_ptr; // remove from graph @@ -736,6 +747,8 @@ // clear the dynamically allocated set of dependents (node_ptr->dependents).clear(); + // Update the stat for numOps completed + owner.updateNumOps(node_ptr->robNum); // delete node delete node_ptr; // remove from graph